blob: af7661eb0c7789d3dcb2bdd6937d1accb2548890 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097/* macros used to store 'join' flags in string object pointers. note
98 that all use of text and tail as object pointers must be wrapped in
99 JOIN_OBJ. see comments in the ElementObject definition for more
100 info. */
101#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
102#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
103#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104
105/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000108static PyObject* elementtree_iter_obj;
109static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110static PyObject* elementpath_obj;
111
112/* helpers */
113
114LOCAL(PyObject*)
115deepcopy(PyObject* object, PyObject* memo)
116{
117 /* do a deep copy of the given object */
118
119 PyObject* args;
120 PyObject* result;
121
122 if (!elementtree_deepcopy_obj) {
123 PyErr_SetString(
124 PyExc_RuntimeError,
125 "deepcopy helper not found"
126 );
127 return NULL;
128 }
129
130 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000131 if (!args)
132 return NULL;
133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000134 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
135 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
136
137 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
138
139 Py_DECREF(args);
140
141 return result;
142}
143
144LOCAL(PyObject*)
145list_join(PyObject* list)
146{
147 /* join list elements (destroying the list in the process) */
148
149 PyObject* joiner;
150 PyObject* function;
151 PyObject* args;
152 PyObject* result;
153
154 switch (PyList_GET_SIZE(list)) {
155 case 0:
156 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000157 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 case 1:
159 result = PyList_GET_ITEM(list, 0);
160 Py_INCREF(result);
161 Py_DECREF(list);
162 return result;
163 }
164
165 /* two or more elements: slice out a suitable separator from the
166 first member, and use that to join the entire list */
167
168 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
169 if (!joiner)
170 return NULL;
171
172 function = PyObject_GetAttrString(joiner, "join");
173 if (!function) {
174 Py_DECREF(joiner);
175 return NULL;
176 }
177
178 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000179 if (!args)
180 return NULL;
181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000182 PyTuple_SET_ITEM(args, 0, list);
183
184 result = PyObject_CallObject(function, args);
185
186 Py_DECREF(args); /* also removes list */
187 Py_DECREF(function);
188 Py_DECREF(joiner);
189
190 return result;
191}
192
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000193/* -------------------------------------------------------------------- */
194/* the element type */
195
196typedef struct {
197
198 /* attributes (a dictionary object), or None if no attributes */
199 PyObject* attrib;
200
201 /* child elements */
202 int length; /* actual number of items */
203 int allocated; /* allocated items */
204
205 /* this either points to _children or to a malloced buffer */
206 PyObject* *children;
207
208 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210} ElementObjectExtra;
211
212typedef struct {
213 PyObject_HEAD
214
215 /* element tag (a string). */
216 PyObject* tag;
217
218 /* text before first child. note that this is a tagged pointer;
219 use JOIN_OBJ to get the object pointer. the join flag is used
220 to distinguish lists created by the tree builder from lists
221 assigned to the attribute by application code; the former
222 should be joined before being returned to the user, the latter
223 should be left intact. */
224 PyObject* text;
225
226 /* text after this element, in parent. note that this is a tagged
227 pointer; use JOIN_OBJ to get the object pointer. */
228 PyObject* tail;
229
230 ElementObjectExtra* extra;
231
232} ElementObject;
233
Neal Norwitz227b5332006-03-22 09:28:35 +0000234static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235
Christian Heimes90aa7642007-12-19 02:45:37 +0000236#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237
238/* -------------------------------------------------------------------- */
239/* element constructor and destructor */
240
241LOCAL(int)
242element_new_extra(ElementObject* self, PyObject* attrib)
243{
244 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
245 if (!self->extra)
246 return -1;
247
248 if (!attrib)
249 attrib = Py_None;
250
251 Py_INCREF(attrib);
252 self->extra->attrib = attrib;
253
254 self->extra->length = 0;
255 self->extra->allocated = STATIC_CHILDREN;
256 self->extra->children = self->extra->_children;
257
258 return 0;
259}
260
261LOCAL(void)
262element_dealloc_extra(ElementObject* self)
263{
264 int i;
265
266 Py_DECREF(self->extra->attrib);
267
268 for (i = 0; i < self->extra->length; i++)
269 Py_DECREF(self->extra->children[i]);
270
271 if (self->extra->children != self->extra->_children)
272 PyObject_Free(self->extra->children);
273
274 PyObject_Free(self->extra);
275}
276
277LOCAL(PyObject*)
278element_new(PyObject* tag, PyObject* attrib)
279{
280 ElementObject* self;
281
282 self = PyObject_New(ElementObject, &Element_Type);
283 if (self == NULL)
284 return NULL;
285
286 /* use None for empty dictionaries */
287 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
288 attrib = Py_None;
289
290 self->extra = NULL;
291
292 if (attrib != Py_None) {
293
Thomas Wouters477c8d52006-05-27 19:21:47 +0000294 if (element_new_extra(self, attrib) < 0) {
295 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000296 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000297 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298
299 self->extra->length = 0;
300 self->extra->allocated = STATIC_CHILDREN;
301 self->extra->children = self->extra->_children;
302
303 }
304
305 Py_INCREF(tag);
306 self->tag = tag;
307
308 Py_INCREF(Py_None);
309 self->text = Py_None;
310
311 Py_INCREF(Py_None);
312 self->tail = Py_None;
313
314 ALLOC(sizeof(ElementObject), "create element");
315
316 return (PyObject*) self;
317}
318
319LOCAL(int)
320element_resize(ElementObject* self, int extra)
321{
322 int size;
323 PyObject* *children;
324
325 /* make sure self->children can hold the given number of extra
326 elements. set an exception and return -1 if allocation failed */
327
328 if (!self->extra)
329 element_new_extra(self, NULL);
330
331 size = self->extra->length + extra;
332
333 if (size > self->extra->allocated) {
334 /* use Python 2.4's list growth strategy */
335 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000336 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100337 * which needs at least 4 bytes.
338 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000339 * be safe.
340 */
341 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000342 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000343 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100344 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000345 * false alarm always assume at least one child to be safe.
346 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000347 children = PyObject_Realloc(self->extra->children,
348 size * sizeof(PyObject*));
349 if (!children)
350 goto nomemory;
351 } else {
352 children = PyObject_Malloc(size * sizeof(PyObject*));
353 if (!children)
354 goto nomemory;
355 /* copy existing children from static area to malloc buffer */
356 memcpy(children, self->extra->children,
357 self->extra->length * sizeof(PyObject*));
358 }
359 self->extra->children = children;
360 self->extra->allocated = size;
361 }
362
363 return 0;
364
365 nomemory:
366 PyErr_NoMemory();
367 return -1;
368}
369
370LOCAL(int)
371element_add_subelement(ElementObject* self, PyObject* element)
372{
373 /* add a child element to a parent */
374
375 if (element_resize(self, 1) < 0)
376 return -1;
377
378 Py_INCREF(element);
379 self->extra->children[self->extra->length] = element;
380
381 self->extra->length++;
382
383 return 0;
384}
385
386LOCAL(PyObject*)
387element_get_attrib(ElementObject* self)
388{
389 /* return borrowed reference to attrib dictionary */
390 /* note: this function assumes that the extra section exists */
391
392 PyObject* res = self->extra->attrib;
393
394 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000395 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396 /* create missing dictionary */
397 res = PyDict_New();
398 if (!res)
399 return NULL;
400 self->extra->attrib = res;
401 }
402
403 return res;
404}
405
406LOCAL(PyObject*)
407element_get_text(ElementObject* self)
408{
409 /* return borrowed reference to text attribute */
410
411 PyObject* res = self->text;
412
413 if (JOIN_GET(res)) {
414 res = JOIN_OBJ(res);
415 if (PyList_CheckExact(res)) {
416 res = list_join(res);
417 if (!res)
418 return NULL;
419 self->text = res;
420 }
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_tail(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->tail;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->tail = res;
440 }
441 }
442
443 return res;
444}
445
446static PyObject*
447element(PyObject* self, PyObject* args, PyObject* kw)
448{
449 PyObject* elem;
450
451 PyObject* tag;
452 PyObject* attrib = NULL;
453 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
454 &PyDict_Type, &attrib))
455 return NULL;
456
457 if (attrib || kw) {
458 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
459 if (!attrib)
460 return NULL;
461 if (kw)
462 PyDict_Update(attrib, kw);
463 } else {
464 Py_INCREF(Py_None);
465 attrib = Py_None;
466 }
467
468 elem = element_new(tag, attrib);
469
470 Py_DECREF(attrib);
471
472 return elem;
473}
474
475static PyObject*
476subelement(PyObject* self, PyObject* args, PyObject* kw)
477{
478 PyObject* elem;
479
480 ElementObject* parent;
481 PyObject* tag;
482 PyObject* attrib = NULL;
483 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
484 &Element_Type, &parent, &tag,
485 &PyDict_Type, &attrib))
486 return NULL;
487
488 if (attrib || kw) {
489 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
490 if (!attrib)
491 return NULL;
492 if (kw)
493 PyDict_Update(attrib, kw);
494 } else {
495 Py_INCREF(Py_None);
496 attrib = Py_None;
497 }
498
499 elem = element_new(tag, attrib);
500
501 Py_DECREF(attrib);
502
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000503 if (element_add_subelement(parent, elem) < 0) {
504 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000505 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000506 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000507
508 return elem;
509}
510
511static void
512element_dealloc(ElementObject* self)
513{
514 if (self->extra)
515 element_dealloc_extra(self);
516
517 /* discard attributes */
518 Py_DECREF(self->tag);
519 Py_DECREF(JOIN_OBJ(self->text));
520 Py_DECREF(JOIN_OBJ(self->tail));
521
522 RELEASE(sizeof(ElementObject), "destroy element");
523
524 PyObject_Del(self);
525}
526
527/* -------------------------------------------------------------------- */
528/* methods (in alphabetical order) */
529
530static PyObject*
531element_append(ElementObject* self, PyObject* args)
532{
533 PyObject* element;
534 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
535 return NULL;
536
537 if (element_add_subelement(self, element) < 0)
538 return NULL;
539
540 Py_RETURN_NONE;
541}
542
543static PyObject*
544element_clear(ElementObject* self, PyObject* args)
545{
546 if (!PyArg_ParseTuple(args, ":clear"))
547 return NULL;
548
549 if (self->extra) {
550 element_dealloc_extra(self);
551 self->extra = NULL;
552 }
553
554 Py_INCREF(Py_None);
555 Py_DECREF(JOIN_OBJ(self->text));
556 self->text = Py_None;
557
558 Py_INCREF(Py_None);
559 Py_DECREF(JOIN_OBJ(self->tail));
560 self->tail = Py_None;
561
562 Py_RETURN_NONE;
563}
564
565static PyObject*
566element_copy(ElementObject* self, PyObject* args)
567{
568 int i;
569 ElementObject* element;
570
571 if (!PyArg_ParseTuple(args, ":__copy__"))
572 return NULL;
573
574 element = (ElementObject*) element_new(
575 self->tag, (self->extra) ? self->extra->attrib : Py_None
576 );
577 if (!element)
578 return NULL;
579
580 Py_DECREF(JOIN_OBJ(element->text));
581 element->text = self->text;
582 Py_INCREF(JOIN_OBJ(element->text));
583
584 Py_DECREF(JOIN_OBJ(element->tail));
585 element->tail = self->tail;
586 Py_INCREF(JOIN_OBJ(element->tail));
587
588 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100589
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000590 if (element_resize(element, self->extra->length) < 0) {
591 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000593 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594
595 for (i = 0; i < self->extra->length; i++) {
596 Py_INCREF(self->extra->children[i]);
597 element->extra->children[i] = self->extra->children[i];
598 }
599
600 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100601
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602 }
603
604 return (PyObject*) element;
605}
606
607static PyObject*
608element_deepcopy(ElementObject* self, PyObject* args)
609{
610 int i;
611 ElementObject* element;
612 PyObject* tag;
613 PyObject* attrib;
614 PyObject* text;
615 PyObject* tail;
616 PyObject* id;
617
618 PyObject* memo;
619 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
620 return NULL;
621
622 tag = deepcopy(self->tag, memo);
623 if (!tag)
624 return NULL;
625
626 if (self->extra) {
627 attrib = deepcopy(self->extra->attrib, memo);
628 if (!attrib) {
629 Py_DECREF(tag);
630 return NULL;
631 }
632 } else {
633 Py_INCREF(Py_None);
634 attrib = Py_None;
635 }
636
637 element = (ElementObject*) element_new(tag, attrib);
638
639 Py_DECREF(tag);
640 Py_DECREF(attrib);
641
642 if (!element)
643 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100644
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000645 text = deepcopy(JOIN_OBJ(self->text), memo);
646 if (!text)
647 goto error;
648 Py_DECREF(element->text);
649 element->text = JOIN_SET(text, JOIN_GET(self->text));
650
651 tail = deepcopy(JOIN_OBJ(self->tail), memo);
652 if (!tail)
653 goto error;
654 Py_DECREF(element->tail);
655 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
656
657 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100658
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000659 if (element_resize(element, self->extra->length) < 0)
660 goto error;
661
662 for (i = 0; i < self->extra->length; i++) {
663 PyObject* child = deepcopy(self->extra->children[i], memo);
664 if (!child) {
665 element->extra->length = i;
666 goto error;
667 }
668 element->extra->children[i] = child;
669 }
670
671 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100672
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673 }
674
675 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000676 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000677 if (!id)
678 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679
680 i = PyDict_SetItem(memo, id, (PyObject*) element);
681
682 Py_DECREF(id);
683
684 if (i < 0)
685 goto error;
686
687 return (PyObject*) element;
688
689 error:
690 Py_DECREF(element);
691 return NULL;
692}
693
694LOCAL(int)
695checkpath(PyObject* tag)
696{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000697 Py_ssize_t i;
698 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 /* check if a tag contains an xpath character */
701
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000702#define PATHCHAR(ch) \
703 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000705 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200706 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
707 void *data = PyUnicode_DATA(tag);
708 unsigned int kind = PyUnicode_KIND(tag);
709 for (i = 0; i < len; i++) {
710 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
711 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200713 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200715 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 return 1;
717 }
718 return 0;
719 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000720 if (PyBytes_Check(tag)) {
721 char *p = PyBytes_AS_STRING(tag);
722 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 if (p[i] == '{')
724 check = 0;
725 else if (p[i] == '}')
726 check = 1;
727 else if (check && PATHCHAR(p[i]))
728 return 1;
729 }
730 return 0;
731 }
732
733 return 1; /* unknown type; might be path expression */
734}
735
736static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000737element_extend(ElementObject* self, PyObject* args)
738{
739 PyObject* seq;
740 Py_ssize_t i, seqlen = 0;
741
742 PyObject* seq_in;
743 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
744 return NULL;
745
746 seq = PySequence_Fast(seq_in, "");
747 if (!seq) {
748 PyErr_Format(
749 PyExc_TypeError,
750 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
751 );
752 return NULL;
753 }
754
755 seqlen = PySequence_Size(seq);
756 for (i = 0; i < seqlen; i++) {
757 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
758 if (element_add_subelement(self, element) < 0) {
759 Py_DECREF(seq);
760 return NULL;
761 }
762 }
763
764 Py_DECREF(seq);
765
766 Py_RETURN_NONE;
767}
768
769static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000770element_find(ElementObject* self, PyObject* args)
771{
772 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000774 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200775
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000776 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000777 return NULL;
778
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200779 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200780 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200781 return _PyObject_CallMethodId(
782 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200784 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785
786 if (!self->extra)
787 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100788
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789 for (i = 0; i < self->extra->length; i++) {
790 PyObject* item = self->extra->children[i];
791 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000792 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000793 Py_INCREF(item);
794 return item;
795 }
796 }
797
798 Py_RETURN_NONE;
799}
800
801static PyObject*
802element_findtext(ElementObject* self, PyObject* args)
803{
804 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805 PyObject* tag;
806 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000807 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200808 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200809
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000810 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811 return NULL;
812
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000813 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200814 return _PyObject_CallMethodId(
815 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000816 );
817
818 if (!self->extra) {
819 Py_INCREF(default_value);
820 return default_value;
821 }
822
823 for (i = 0; i < self->extra->length; i++) {
824 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000825 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
826
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000827 PyObject* text = element_get_text(item);
828 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000829 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000830 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 return text;
832 }
833 }
834
835 Py_INCREF(default_value);
836 return default_value;
837}
838
839static PyObject*
840element_findall(ElementObject* self, PyObject* args)
841{
842 int i;
843 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000845 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200846
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000847 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000848 return NULL;
849
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200850 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200851 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200852 return _PyObject_CallMethodId(
853 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000854 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200855 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000856
857 out = PyList_New(0);
858 if (!out)
859 return NULL;
860
861 if (!self->extra)
862 return out;
863
864 for (i = 0; i < self->extra->length; i++) {
865 PyObject* item = self->extra->children[i];
866 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000867 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000868 if (PyList_Append(out, item) < 0) {
869 Py_DECREF(out);
870 return NULL;
871 }
872 }
873 }
874
875 return out;
876}
877
878static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000879element_iterfind(ElementObject* self, PyObject* args)
880{
881 PyObject* tag;
882 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200883 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200884
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000885 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
886 return NULL;
887
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200888 return _PyObject_CallMethodId(
889 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000890 );
891}
892
893static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000894element_get(ElementObject* self, PyObject* args)
895{
896 PyObject* value;
897
898 PyObject* key;
899 PyObject* default_value = Py_None;
900 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
901 return NULL;
902
903 if (!self->extra || self->extra->attrib == Py_None)
904 value = default_value;
905 else {
906 value = PyDict_GetItem(self->extra->attrib, key);
907 if (!value)
908 value = default_value;
909 }
910
911 Py_INCREF(value);
912 return value;
913}
914
915static PyObject*
916element_getchildren(ElementObject* self, PyObject* args)
917{
918 int i;
919 PyObject* list;
920
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000921 /* FIXME: report as deprecated? */
922
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000923 if (!PyArg_ParseTuple(args, ":getchildren"))
924 return NULL;
925
926 if (!self->extra)
927 return PyList_New(0);
928
929 list = PyList_New(self->extra->length);
930 if (!list)
931 return NULL;
932
933 for (i = 0; i < self->extra->length; i++) {
934 PyObject* item = self->extra->children[i];
935 Py_INCREF(item);
936 PyList_SET_ITEM(list, i, item);
937 }
938
939 return list;
940}
941
942static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000943element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000944{
945 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100946
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000948 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000949 return NULL;
950
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000951 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000952 PyErr_SetString(
953 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000954 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000955 );
956 return NULL;
957 }
958
959 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000960 if (!args)
961 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000962
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000963 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
964 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
965
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000966 result = PyObject_CallObject(elementtree_iter_obj, args);
967
968 Py_DECREF(args);
969
970 return result;
971}
972
973
974static PyObject*
975element_itertext(ElementObject* self, PyObject* args)
976{
977 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100978
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000979 if (!PyArg_ParseTuple(args, ":itertext"))
980 return NULL;
981
982 if (!elementtree_itertext_obj) {
983 PyErr_SetString(
984 PyExc_RuntimeError,
985 "itertext helper not found"
986 );
987 return NULL;
988 }
989
990 args = PyTuple_New(1);
991 if (!args)
992 return NULL;
993
994 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
995
996 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997
998 Py_DECREF(args);
999
1000 return result;
1001}
1002
1003static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001004element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001005{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001006 ElementObject* self = (ElementObject*) self_;
1007
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008 if (!self->extra || index < 0 || index >= self->extra->length) {
1009 PyErr_SetString(
1010 PyExc_IndexError,
1011 "child index out of range"
1012 );
1013 return NULL;
1014 }
1015
1016 Py_INCREF(self->extra->children[index]);
1017 return self->extra->children[index];
1018}
1019
1020static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001021element_insert(ElementObject* self, PyObject* args)
1022{
1023 int i;
1024
1025 int index;
1026 PyObject* element;
1027 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1028 &Element_Type, &element))
1029 return NULL;
1030
1031 if (!self->extra)
1032 element_new_extra(self, NULL);
1033
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001034 if (index < 0) {
1035 index += self->extra->length;
1036 if (index < 0)
1037 index = 0;
1038 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001039 if (index > self->extra->length)
1040 index = self->extra->length;
1041
1042 if (element_resize(self, 1) < 0)
1043 return NULL;
1044
1045 for (i = self->extra->length; i > index; i--)
1046 self->extra->children[i] = self->extra->children[i-1];
1047
1048 Py_INCREF(element);
1049 self->extra->children[index] = element;
1050
1051 self->extra->length++;
1052
1053 Py_RETURN_NONE;
1054}
1055
1056static PyObject*
1057element_items(ElementObject* self, PyObject* args)
1058{
1059 if (!PyArg_ParseTuple(args, ":items"))
1060 return NULL;
1061
1062 if (!self->extra || self->extra->attrib == Py_None)
1063 return PyList_New(0);
1064
1065 return PyDict_Items(self->extra->attrib);
1066}
1067
1068static PyObject*
1069element_keys(ElementObject* self, PyObject* args)
1070{
1071 if (!PyArg_ParseTuple(args, ":keys"))
1072 return NULL;
1073
1074 if (!self->extra || self->extra->attrib == Py_None)
1075 return PyList_New(0);
1076
1077 return PyDict_Keys(self->extra->attrib);
1078}
1079
Martin v. Löwis18e16552006-02-15 17:27:45 +00001080static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081element_length(ElementObject* self)
1082{
1083 if (!self->extra)
1084 return 0;
1085
1086 return self->extra->length;
1087}
1088
1089static PyObject*
1090element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1091{
1092 PyObject* elem;
1093
1094 PyObject* tag;
1095 PyObject* attrib;
1096 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1097 return NULL;
1098
1099 attrib = PyDict_Copy(attrib);
1100 if (!attrib)
1101 return NULL;
1102
1103 elem = element_new(tag, attrib);
1104
1105 Py_DECREF(attrib);
1106
1107 return elem;
1108}
1109
1110static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111element_remove(ElementObject* self, PyObject* args)
1112{
1113 int i;
1114
1115 PyObject* element;
1116 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1117 return NULL;
1118
1119 if (!self->extra) {
1120 /* element has no children, so raise exception */
1121 PyErr_SetString(
1122 PyExc_ValueError,
1123 "list.remove(x): x not in list"
1124 );
1125 return NULL;
1126 }
1127
1128 for (i = 0; i < self->extra->length; i++) {
1129 if (self->extra->children[i] == element)
1130 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001131 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132 break;
1133 }
1134
1135 if (i == self->extra->length) {
1136 /* element is not in children, so raise exception */
1137 PyErr_SetString(
1138 PyExc_ValueError,
1139 "list.remove(x): x not in list"
1140 );
1141 return NULL;
1142 }
1143
1144 Py_DECREF(self->extra->children[i]);
1145
1146 self->extra->length--;
1147
1148 for (; i < self->extra->length; i++)
1149 self->extra->children[i] = self->extra->children[i+1];
1150
1151 Py_RETURN_NONE;
1152}
1153
1154static PyObject*
1155element_repr(ElementObject* self)
1156{
Walter Dörwald7569dfe2007-05-19 21:49:49 +00001157 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001158}
1159
1160static PyObject*
1161element_set(ElementObject* self, PyObject* args)
1162{
1163 PyObject* attrib;
1164
1165 PyObject* key;
1166 PyObject* value;
1167 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1168 return NULL;
1169
1170 if (!self->extra)
1171 element_new_extra(self, NULL);
1172
1173 attrib = element_get_attrib(self);
1174 if (!attrib)
1175 return NULL;
1176
1177 if (PyDict_SetItem(attrib, key, value) < 0)
1178 return NULL;
1179
1180 Py_RETURN_NONE;
1181}
1182
1183static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001184element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001186 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001187 int i;
1188 PyObject* old;
1189
1190 if (!self->extra || index < 0 || index >= self->extra->length) {
1191 PyErr_SetString(
1192 PyExc_IndexError,
1193 "child assignment index out of range");
1194 return -1;
1195 }
1196
1197 old = self->extra->children[index];
1198
1199 if (item) {
1200 Py_INCREF(item);
1201 self->extra->children[index] = item;
1202 } else {
1203 self->extra->length--;
1204 for (i = index; i < self->extra->length; i++)
1205 self->extra->children[i] = self->extra->children[i+1];
1206 }
1207
1208 Py_DECREF(old);
1209
1210 return 0;
1211}
1212
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001213static PyObject*
1214element_subscr(PyObject* self_, PyObject* item)
1215{
1216 ElementObject* self = (ElementObject*) self_;
1217
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001218 if (PyIndex_Check(item)) {
1219 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001220
1221 if (i == -1 && PyErr_Occurred()) {
1222 return NULL;
1223 }
1224 if (i < 0 && self->extra)
1225 i += self->extra->length;
1226 return element_getitem(self_, i);
1227 }
1228 else if (PySlice_Check(item)) {
1229 Py_ssize_t start, stop, step, slicelen, cur, i;
1230 PyObject* list;
1231
1232 if (!self->extra)
1233 return PyList_New(0);
1234
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001235 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001236 self->extra->length,
1237 &start, &stop, &step, &slicelen) < 0) {
1238 return NULL;
1239 }
1240
1241 if (slicelen <= 0)
1242 return PyList_New(0);
1243 else {
1244 list = PyList_New(slicelen);
1245 if (!list)
1246 return NULL;
1247
1248 for (cur = start, i = 0; i < slicelen;
1249 cur += step, i++) {
1250 PyObject* item = self->extra->children[cur];
1251 Py_INCREF(item);
1252 PyList_SET_ITEM(list, i, item);
1253 }
1254
1255 return list;
1256 }
1257 }
1258 else {
1259 PyErr_SetString(PyExc_TypeError,
1260 "element indices must be integers");
1261 return NULL;
1262 }
1263}
1264
1265static int
1266element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1267{
1268 ElementObject* self = (ElementObject*) self_;
1269
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001270 if (PyIndex_Check(item)) {
1271 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001272
1273 if (i == -1 && PyErr_Occurred()) {
1274 return -1;
1275 }
1276 if (i < 0 && self->extra)
1277 i += self->extra->length;
1278 return element_setitem(self_, i, value);
1279 }
1280 else if (PySlice_Check(item)) {
1281 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1282
1283 PyObject* recycle = NULL;
1284 PyObject* seq = NULL;
1285
1286 if (!self->extra)
1287 element_new_extra(self, NULL);
1288
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001289 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001290 self->extra->length,
1291 &start, &stop, &step, &slicelen) < 0) {
1292 return -1;
1293 }
1294
1295 if (value == NULL)
1296 newlen = 0;
1297 else {
1298 seq = PySequence_Fast(value, "");
1299 if (!seq) {
1300 PyErr_Format(
1301 PyExc_TypeError,
1302 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1303 );
1304 return -1;
1305 }
1306 newlen = PySequence_Size(seq);
1307 }
1308
1309 if (step != 1 && newlen != slicelen)
1310 {
1311 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001312 "attempt to assign sequence of size %zd "
1313 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001314 newlen, slicelen
1315 );
1316 return -1;
1317 }
1318
1319
1320 /* Resize before creating the recycle bin, to prevent refleaks. */
1321 if (newlen > slicelen) {
1322 if (element_resize(self, newlen - slicelen) < 0) {
1323 if (seq) {
1324 Py_DECREF(seq);
1325 }
1326 return -1;
1327 }
1328 }
1329
1330 if (slicelen > 0) {
1331 /* to avoid recursive calls to this method (via decref), move
1332 old items to the recycle bin here, and get rid of them when
1333 we're done modifying the element */
1334 recycle = PyList_New(slicelen);
1335 if (!recycle) {
1336 if (seq) {
1337 Py_DECREF(seq);
1338 }
1339 return -1;
1340 }
1341 for (cur = start, i = 0; i < slicelen;
1342 cur += step, i++)
1343 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1344 }
1345
1346 if (newlen < slicelen) {
1347 /* delete slice */
1348 for (i = stop; i < self->extra->length; i++)
1349 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1350 } else if (newlen > slicelen) {
1351 /* insert slice */
1352 for (i = self->extra->length-1; i >= stop; i--)
1353 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1354 }
1355
1356 /* replace the slice */
1357 for (cur = start, i = 0; i < newlen;
1358 cur += step, i++) {
1359 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1360 Py_INCREF(element);
1361 self->extra->children[cur] = element;
1362 }
1363
1364 self->extra->length += newlen - slicelen;
1365
1366 if (seq) {
1367 Py_DECREF(seq);
1368 }
1369
1370 /* discard the recycle bin, and everything in it */
1371 Py_XDECREF(recycle);
1372
1373 return 0;
1374 }
1375 else {
1376 PyErr_SetString(PyExc_TypeError,
1377 "element indices must be integers");
1378 return -1;
1379 }
1380}
1381
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001382static PyMethodDef element_methods[] = {
1383
1384 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1385
1386 {"get", (PyCFunction) element_get, METH_VARARGS},
1387 {"set", (PyCFunction) element_set, METH_VARARGS},
1388
1389 {"find", (PyCFunction) element_find, METH_VARARGS},
1390 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1391 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1392
1393 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001394 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001395 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1396 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1397
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001398 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1399 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1400 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1401
1402 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001403 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1404
1405 {"items", (PyCFunction) element_items, METH_VARARGS},
1406 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1407
1408 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1409
1410 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1411 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001413 {NULL, NULL}
1414};
1415
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001416static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001417element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001418{
1419 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001420 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001421
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001422 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001423 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001424
Alexander Belopolskye239d232010-12-08 23:31:48 +00001425 if (name == NULL)
1426 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001427
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001428 /* handle common attributes first */
1429 if (strcmp(name, "tag") == 0) {
1430 res = self->tag;
1431 Py_INCREF(res);
1432 return res;
1433 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001435 Py_INCREF(res);
1436 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001437 }
1438
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001439 /* methods */
1440 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1441 if (res)
1442 return res;
1443
1444 /* less common attributes */
1445 if (strcmp(name, "tail") == 0) {
1446 PyErr_Clear();
1447 res = element_get_tail(self);
1448 } else if (strcmp(name, "attrib") == 0) {
1449 PyErr_Clear();
1450 if (!self->extra)
1451 element_new_extra(self, NULL);
1452 res = element_get_attrib(self);
1453 }
1454
1455 if (!res)
1456 return NULL;
1457
1458 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459 return res;
1460}
1461
1462static int
1463element_setattr(ElementObject* self, const char* name, PyObject* value)
1464{
1465 if (value == NULL) {
1466 PyErr_SetString(
1467 PyExc_AttributeError,
1468 "can't delete element attributes"
1469 );
1470 return -1;
1471 }
1472
1473 if (strcmp(name, "tag") == 0) {
1474 Py_DECREF(self->tag);
1475 self->tag = value;
1476 Py_INCREF(self->tag);
1477 } else if (strcmp(name, "text") == 0) {
1478 Py_DECREF(JOIN_OBJ(self->text));
1479 self->text = value;
1480 Py_INCREF(self->text);
1481 } else if (strcmp(name, "tail") == 0) {
1482 Py_DECREF(JOIN_OBJ(self->tail));
1483 self->tail = value;
1484 Py_INCREF(self->tail);
1485 } else if (strcmp(name, "attrib") == 0) {
1486 if (!self->extra)
1487 element_new_extra(self, NULL);
1488 Py_DECREF(self->extra->attrib);
1489 self->extra->attrib = value;
1490 Py_INCREF(self->extra->attrib);
1491 } else {
1492 PyErr_SetString(PyExc_AttributeError, name);
1493 return -1;
1494 }
1495
1496 return 0;
1497}
1498
1499static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001500 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001501 0, /* sq_concat */
1502 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001503 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001504 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001505 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001506 0,
1507};
1508
1509static PyMappingMethods element_as_mapping = {
1510 (lenfunc) element_length,
1511 (binaryfunc) element_subscr,
1512 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001513};
1514
Neal Norwitz227b5332006-03-22 09:28:35 +00001515static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001516 PyVarObject_HEAD_INIT(NULL, 0)
1517 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518 /* methods */
1519 (destructor)element_dealloc, /* tp_dealloc */
1520 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001521 0, /* tp_getattr */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522 (setattrfunc)element_setattr, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001523 0, /* tp_reserved */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524 (reprfunc)element_repr, /* tp_repr */
1525 0, /* tp_as_number */
1526 &element_as_sequence, /* tp_as_sequence */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001527 &element_as_mapping, /* tp_as_mapping */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001528 0, /* tp_hash */
1529 0, /* tp_call */
1530 0, /* tp_str */
1531 (getattrofunc)element_getattro, /* tp_getattro */
1532 0, /* tp_setattro */
1533 0, /* tp_as_buffer */
1534 Py_TPFLAGS_DEFAULT, /* tp_flags */
1535 0, /* tp_doc */
1536 0, /* tp_traverse */
1537 0, /* tp_clear */
1538 0, /* tp_richcompare */
1539 0, /* tp_weaklistoffset */
1540 0, /* tp_iter */
1541 0, /* tp_iternext */
1542 element_methods, /* tp_methods */
1543 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544};
1545
1546/* ==================================================================== */
1547/* the tree builder type */
1548
1549typedef struct {
1550 PyObject_HEAD
1551
1552 PyObject* root; /* root node (first created node) */
1553
1554 ElementObject* this; /* current node */
1555 ElementObject* last; /* most recently created node */
1556
1557 PyObject* data; /* data collector (string or list), or NULL */
1558
1559 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001560 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561
1562 /* element tracing */
1563 PyObject* events; /* list of events, or NULL if not collecting */
1564 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1565 PyObject* end_event_obj;
1566 PyObject* start_ns_event_obj;
1567 PyObject* end_ns_event_obj;
1568
1569} TreeBuilderObject;
1570
Neal Norwitz227b5332006-03-22 09:28:35 +00001571static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572
Christian Heimes90aa7642007-12-19 02:45:37 +00001573#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574
1575/* -------------------------------------------------------------------- */
1576/* constructor and destructor */
1577
1578LOCAL(PyObject*)
1579treebuilder_new(void)
1580{
1581 TreeBuilderObject* self;
1582
1583 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1584 if (self == NULL)
1585 return NULL;
1586
1587 self->root = NULL;
1588
1589 Py_INCREF(Py_None);
1590 self->this = (ElementObject*) Py_None;
1591
1592 Py_INCREF(Py_None);
1593 self->last = (ElementObject*) Py_None;
1594
1595 self->data = NULL;
1596
1597 self->stack = PyList_New(20);
1598 self->index = 0;
1599
1600 self->events = NULL;
1601 self->start_event_obj = self->end_event_obj = NULL;
1602 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1603
1604 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1605
1606 return (PyObject*) self;
1607}
1608
1609static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001610treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001611{
1612 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1613 return NULL;
1614
1615 return treebuilder_new();
1616}
1617
1618static void
1619treebuilder_dealloc(TreeBuilderObject* self)
1620{
1621 Py_XDECREF(self->end_ns_event_obj);
1622 Py_XDECREF(self->start_ns_event_obj);
1623 Py_XDECREF(self->end_event_obj);
1624 Py_XDECREF(self->start_event_obj);
1625 Py_XDECREF(self->events);
1626 Py_DECREF(self->stack);
1627 Py_XDECREF(self->data);
1628 Py_DECREF(self->last);
1629 Py_DECREF(self->this);
1630 Py_XDECREF(self->root);
1631
1632 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1633
1634 PyObject_Del(self);
1635}
1636
1637/* -------------------------------------------------------------------- */
1638/* handlers */
1639
1640LOCAL(PyObject*)
1641treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1642 PyObject* standalone)
1643{
1644 Py_RETURN_NONE;
1645}
1646
1647LOCAL(PyObject*)
1648treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1649 PyObject* attrib)
1650{
1651 PyObject* node;
1652 PyObject* this;
1653
1654 if (self->data) {
1655 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001656 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657 self->last->text = JOIN_SET(
1658 self->data, PyList_CheckExact(self->data)
1659 );
1660 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001661 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662 self->last->tail = JOIN_SET(
1663 self->data, PyList_CheckExact(self->data)
1664 );
1665 }
1666 self->data = NULL;
1667 }
1668
1669 node = element_new(tag, attrib);
1670 if (!node)
1671 return NULL;
1672
1673 this = (PyObject*) self->this;
1674
1675 if (this != Py_None) {
1676 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001677 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678 } else {
1679 if (self->root) {
1680 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001681 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001682 "multiple elements on top level"
1683 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001684 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001685 }
1686 Py_INCREF(node);
1687 self->root = node;
1688 }
1689
1690 if (self->index < PyList_GET_SIZE(self->stack)) {
1691 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001692 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 Py_INCREF(this);
1694 } else {
1695 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001696 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001697 }
1698 self->index++;
1699
1700 Py_DECREF(this);
1701 Py_INCREF(node);
1702 self->this = (ElementObject*) node;
1703
1704 Py_DECREF(self->last);
1705 Py_INCREF(node);
1706 self->last = (ElementObject*) node;
1707
1708 if (self->start_event_obj) {
1709 PyObject* res;
1710 PyObject* action = self->start_event_obj;
1711 res = PyTuple_New(2);
1712 if (res) {
1713 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1714 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1715 PyList_Append(self->events, res);
1716 Py_DECREF(res);
1717 } else
1718 PyErr_Clear(); /* FIXME: propagate error */
1719 }
1720
1721 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001722
1723 error:
1724 Py_DECREF(node);
1725 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001726}
1727
1728LOCAL(PyObject*)
1729treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1730{
1731 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001732 if (self->last == (ElementObject*) Py_None) {
1733 /* ignore calls to data before the first call to start */
1734 Py_RETURN_NONE;
1735 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736 /* store the first item as is */
1737 Py_INCREF(data); self->data = data;
1738 } else {
1739 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001740 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1741 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001742 /* expat often generates single character data sections; handle
1743 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001744 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1745 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001746 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001747 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001748 } else if (PyList_CheckExact(self->data)) {
1749 if (PyList_Append(self->data, data) < 0)
1750 return NULL;
1751 } else {
1752 PyObject* list = PyList_New(2);
1753 if (!list)
1754 return NULL;
1755 PyList_SET_ITEM(list, 0, self->data);
1756 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1757 self->data = list;
1758 }
1759 }
1760
1761 Py_RETURN_NONE;
1762}
1763
1764LOCAL(PyObject*)
1765treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1766{
1767 PyObject* item;
1768
1769 if (self->data) {
1770 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001771 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001772 self->last->text = JOIN_SET(
1773 self->data, PyList_CheckExact(self->data)
1774 );
1775 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001776 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001777 self->last->tail = JOIN_SET(
1778 self->data, PyList_CheckExact(self->data)
1779 );
1780 }
1781 self->data = NULL;
1782 }
1783
1784 if (self->index == 0) {
1785 PyErr_SetString(
1786 PyExc_IndexError,
1787 "pop from empty stack"
1788 );
1789 return NULL;
1790 }
1791
1792 self->index--;
1793
1794 item = PyList_GET_ITEM(self->stack, self->index);
1795 Py_INCREF(item);
1796
1797 Py_DECREF(self->last);
1798
1799 self->last = (ElementObject*) self->this;
1800 self->this = (ElementObject*) item;
1801
1802 if (self->end_event_obj) {
1803 PyObject* res;
1804 PyObject* action = self->end_event_obj;
1805 PyObject* node = (PyObject*) self->last;
1806 res = PyTuple_New(2);
1807 if (res) {
1808 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1809 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1810 PyList_Append(self->events, res);
1811 Py_DECREF(res);
1812 } else
1813 PyErr_Clear(); /* FIXME: propagate error */
1814 }
1815
1816 Py_INCREF(self->last);
1817 return (PyObject*) self->last;
1818}
1819
1820LOCAL(void)
1821treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001822 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001823{
1824 PyObject* res;
1825 PyObject* action;
1826 PyObject* parcel;
1827
1828 if (!self->events)
1829 return;
1830
1831 if (start) {
1832 if (!self->start_ns_event_obj)
1833 return;
1834 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001835 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001836 if (!parcel)
1837 return;
1838 Py_INCREF(action);
1839 } else {
1840 if (!self->end_ns_event_obj)
1841 return;
1842 action = self->end_ns_event_obj;
1843 Py_INCREF(action);
1844 parcel = Py_None;
1845 Py_INCREF(parcel);
1846 }
1847
1848 res = PyTuple_New(2);
1849
1850 if (res) {
1851 PyTuple_SET_ITEM(res, 0, action);
1852 PyTuple_SET_ITEM(res, 1, parcel);
1853 PyList_Append(self->events, res);
1854 Py_DECREF(res);
1855 } else
1856 PyErr_Clear(); /* FIXME: propagate error */
1857}
1858
1859/* -------------------------------------------------------------------- */
1860/* methods (in alphabetical order) */
1861
1862static PyObject*
1863treebuilder_data(TreeBuilderObject* self, PyObject* args)
1864{
1865 PyObject* data;
1866 if (!PyArg_ParseTuple(args, "O:data", &data))
1867 return NULL;
1868
1869 return treebuilder_handle_data(self, data);
1870}
1871
1872static PyObject*
1873treebuilder_end(TreeBuilderObject* self, PyObject* args)
1874{
1875 PyObject* tag;
1876 if (!PyArg_ParseTuple(args, "O:end", &tag))
1877 return NULL;
1878
1879 return treebuilder_handle_end(self, tag);
1880}
1881
1882LOCAL(PyObject*)
1883treebuilder_done(TreeBuilderObject* self)
1884{
1885 PyObject* res;
1886
1887 /* FIXME: check stack size? */
1888
1889 if (self->root)
1890 res = self->root;
1891 else
1892 res = Py_None;
1893
1894 Py_INCREF(res);
1895 return res;
1896}
1897
1898static PyObject*
1899treebuilder_close(TreeBuilderObject* self, PyObject* args)
1900{
1901 if (!PyArg_ParseTuple(args, ":close"))
1902 return NULL;
1903
1904 return treebuilder_done(self);
1905}
1906
1907static PyObject*
1908treebuilder_start(TreeBuilderObject* self, PyObject* args)
1909{
1910 PyObject* tag;
1911 PyObject* attrib = Py_None;
1912 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1913 return NULL;
1914
1915 return treebuilder_handle_start(self, tag, attrib);
1916}
1917
1918static PyObject*
1919treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1920{
1921 PyObject* encoding;
1922 PyObject* standalone;
1923 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1924 return NULL;
1925
1926 return treebuilder_handle_xml(self, encoding, standalone);
1927}
1928
1929static PyMethodDef treebuilder_methods[] = {
1930 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1931 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1932 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1933 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1934 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1935 {NULL, NULL}
1936};
1937
Neal Norwitz227b5332006-03-22 09:28:35 +00001938static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001939 PyVarObject_HEAD_INIT(NULL, 0)
1940 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001941 /* methods */
1942 (destructor)treebuilder_dealloc, /* tp_dealloc */
1943 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001944 0, /* tp_getattr */
1945 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001946 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001947 0, /* tp_repr */
1948 0, /* tp_as_number */
1949 0, /* tp_as_sequence */
1950 0, /* tp_as_mapping */
1951 0, /* tp_hash */
1952 0, /* tp_call */
1953 0, /* tp_str */
1954 0, /* tp_getattro */
1955 0, /* tp_setattro */
1956 0, /* tp_as_buffer */
1957 Py_TPFLAGS_DEFAULT, /* tp_flags */
1958 0, /* tp_doc */
1959 0, /* tp_traverse */
1960 0, /* tp_clear */
1961 0, /* tp_richcompare */
1962 0, /* tp_weaklistoffset */
1963 0, /* tp_iter */
1964 0, /* tp_iternext */
1965 treebuilder_methods, /* tp_methods */
1966 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001967};
1968
1969/* ==================================================================== */
1970/* the expat interface */
1971
1972#if defined(USE_EXPAT)
1973
1974#include "expat.h"
1975
1976#if defined(USE_PYEXPAT_CAPI)
1977#include "pyexpat.h"
1978static struct PyExpat_CAPI* expat_capi;
1979#define EXPAT(func) (expat_capi->func)
1980#else
1981#define EXPAT(func) (XML_##func)
1982#endif
1983
1984typedef struct {
1985 PyObject_HEAD
1986
1987 XML_Parser parser;
1988
1989 PyObject* target;
1990 PyObject* entity;
1991
1992 PyObject* names;
1993
1994 PyObject* handle_xml;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001995
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001996 PyObject* handle_start;
1997 PyObject* handle_data;
1998 PyObject* handle_end;
1999
2000 PyObject* handle_comment;
2001 PyObject* handle_pi;
2002
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002003 PyObject* handle_close;
2004
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002005} XMLParserObject;
2006
Neal Norwitz227b5332006-03-22 09:28:35 +00002007static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002008
2009/* helpers */
2010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002011LOCAL(PyObject*)
2012makeuniversal(XMLParserObject* self, const char* string)
2013{
2014 /* convert a UTF-8 tag/attribute name from the expat parser
2015 to a universal name string */
2016
2017 int size = strlen(string);
2018 PyObject* key;
2019 PyObject* value;
2020
2021 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002022 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002023 if (!key)
2024 return NULL;
2025
2026 value = PyDict_GetItem(self->names, key);
2027
2028 if (value) {
2029 Py_INCREF(value);
2030 } else {
2031 /* new name. convert to universal name, and decode as
2032 necessary */
2033
2034 PyObject* tag;
2035 char* p;
2036 int i;
2037
2038 /* look for namespace separator */
2039 for (i = 0; i < size; i++)
2040 if (string[i] == '}')
2041 break;
2042 if (i != size) {
2043 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002044 tag = PyBytes_FromStringAndSize(NULL, size+1);
2045 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002046 p[0] = '{';
2047 memcpy(p+1, string, size);
2048 size++;
2049 } else {
2050 /* plain name; use key as tag */
2051 Py_INCREF(key);
2052 tag = key;
2053 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002054
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002055 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002056 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002057 value = PyUnicode_DecodeUTF8(p, size, "strict");
2058 Py_DECREF(tag);
2059 if (!value) {
2060 Py_DECREF(key);
2061 return NULL;
2062 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002063
2064 /* add to names dictionary */
2065 if (PyDict_SetItem(self->names, key, value) < 0) {
2066 Py_DECREF(key);
2067 Py_DECREF(value);
2068 return NULL;
2069 }
2070 }
2071
2072 Py_DECREF(key);
2073 return value;
2074}
2075
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002076static void
2077expat_set_error(const char* message, int line, int column)
2078{
Victor Stinner499dfcf2011-03-21 13:26:24 +01002079 PyObject *errmsg, *error, *position;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002080
Victor Stinner499dfcf2011-03-21 13:26:24 +01002081 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
2082 message, line, column);
2083 if (errmsg == NULL)
2084 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002085
Victor Stinner499dfcf2011-03-21 13:26:24 +01002086 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2087 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002088 if (!error)
2089 return;
2090
2091 /* add position attribute */
2092 position = Py_BuildValue("(ii)", line, column);
2093 if (!position) {
2094 Py_DECREF(error);
2095 return;
2096 }
2097 if (PyObject_SetAttrString(error, "position", position) == -1) {
2098 Py_DECREF(error);
2099 Py_DECREF(position);
2100 return;
2101 }
2102 Py_DECREF(position);
2103
2104 PyErr_SetObject(elementtree_parseerror_obj, error);
2105 Py_DECREF(error);
2106}
2107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002108/* -------------------------------------------------------------------- */
2109/* handlers */
2110
2111static void
2112expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2113 int data_len)
2114{
2115 PyObject* key;
2116 PyObject* value;
2117 PyObject* res;
2118
2119 if (data_len < 2 || data_in[0] != '&')
2120 return;
2121
Neal Norwitz0269b912007-08-08 06:56:02 +00002122 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002123 if (!key)
2124 return;
2125
2126 value = PyDict_GetItem(self->entity, key);
2127
2128 if (value) {
2129 if (TreeBuilder_CheckExact(self->target))
2130 res = treebuilder_handle_data(
2131 (TreeBuilderObject*) self->target, value
2132 );
2133 else if (self->handle_data)
2134 res = PyObject_CallFunction(self->handle_data, "O", value);
2135 else
2136 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002137 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002138 } else if (!PyErr_Occurred()) {
2139 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002140 char message[128] = "undefined entity ";
2141 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002142 expat_set_error(
2143 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002144 EXPAT(GetErrorLineNumber)(self->parser),
2145 EXPAT(GetErrorColumnNumber)(self->parser)
2146 );
2147 }
2148
2149 Py_DECREF(key);
2150}
2151
2152static void
2153expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2154 const XML_Char **attrib_in)
2155{
2156 PyObject* res;
2157 PyObject* tag;
2158 PyObject* attrib;
2159 int ok;
2160
2161 /* tag name */
2162 tag = makeuniversal(self, tag_in);
2163 if (!tag)
2164 return; /* parser will look for errors */
2165
2166 /* attributes */
2167 if (attrib_in[0]) {
2168 attrib = PyDict_New();
2169 if (!attrib)
2170 return;
2171 while (attrib_in[0] && attrib_in[1]) {
2172 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002173 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002174 if (!key || !value) {
2175 Py_XDECREF(value);
2176 Py_XDECREF(key);
2177 Py_DECREF(attrib);
2178 return;
2179 }
2180 ok = PyDict_SetItem(attrib, key, value);
2181 Py_DECREF(value);
2182 Py_DECREF(key);
2183 if (ok < 0) {
2184 Py_DECREF(attrib);
2185 return;
2186 }
2187 attrib_in += 2;
2188 }
2189 } else {
2190 Py_INCREF(Py_None);
2191 attrib = Py_None;
2192 }
2193
2194 if (TreeBuilder_CheckExact(self->target))
2195 /* shortcut */
2196 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2197 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002198 else if (self->handle_start) {
2199 if (attrib == Py_None) {
2200 Py_DECREF(attrib);
2201 attrib = PyDict_New();
2202 if (!attrib)
2203 return;
2204 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002205 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002206 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002207 res = NULL;
2208
2209 Py_DECREF(tag);
2210 Py_DECREF(attrib);
2211
2212 Py_XDECREF(res);
2213}
2214
2215static void
2216expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2217 int data_len)
2218{
2219 PyObject* data;
2220 PyObject* res;
2221
Neal Norwitz0269b912007-08-08 06:56:02 +00002222 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002223 if (!data)
2224 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225
2226 if (TreeBuilder_CheckExact(self->target))
2227 /* shortcut */
2228 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2229 else if (self->handle_data)
2230 res = PyObject_CallFunction(self->handle_data, "O", data);
2231 else
2232 res = NULL;
2233
2234 Py_DECREF(data);
2235
2236 Py_XDECREF(res);
2237}
2238
2239static void
2240expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2241{
2242 PyObject* tag;
2243 PyObject* res = NULL;
2244
2245 if (TreeBuilder_CheckExact(self->target))
2246 /* shortcut */
2247 /* the standard tree builder doesn't look at the end tag */
2248 res = treebuilder_handle_end(
2249 (TreeBuilderObject*) self->target, Py_None
2250 );
2251 else if (self->handle_end) {
2252 tag = makeuniversal(self, tag_in);
2253 if (tag) {
2254 res = PyObject_CallFunction(self->handle_end, "O", tag);
2255 Py_DECREF(tag);
2256 }
2257 }
2258
2259 Py_XDECREF(res);
2260}
2261
2262static void
2263expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2264 const XML_Char *uri)
2265{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002266 PyObject* sprefix = NULL;
2267 PyObject* suri = NULL;
2268
2269 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2270 if (!suri)
2271 return;
2272
2273 if (prefix)
2274 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2275 else
2276 sprefix = PyUnicode_FromString("");
2277 if (!sprefix) {
2278 Py_DECREF(suri);
2279 return;
2280 }
2281
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002283 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002285
2286 Py_DECREF(sprefix);
2287 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288}
2289
2290static void
2291expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2292{
2293 treebuilder_handle_namespace(
2294 (TreeBuilderObject*) self->target, 0, NULL, NULL
2295 );
2296}
2297
2298static void
2299expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2300{
2301 PyObject* comment;
2302 PyObject* res;
2303
2304 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002305 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002306 if (comment) {
2307 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2308 Py_XDECREF(res);
2309 Py_DECREF(comment);
2310 }
2311 }
2312}
2313
2314static void
2315expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2316 const XML_Char* data_in)
2317{
2318 PyObject* target;
2319 PyObject* data;
2320 PyObject* res;
2321
2322 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002323 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2324 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325 if (target && data) {
2326 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2327 Py_XDECREF(res);
2328 Py_DECREF(data);
2329 Py_DECREF(target);
2330 } else {
2331 Py_XDECREF(data);
2332 Py_XDECREF(target);
2333 }
2334 }
2335}
2336
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337static int
2338expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2339 XML_Encoding *info)
2340{
2341 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002342 unsigned char s[256];
2343 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002344 void *data;
2345 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002346
2347 memset(info, 0, sizeof(XML_Encoding));
2348
2349 for (i = 0; i < 256; i++)
2350 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002351
Fredrik Lundhc3389992005-12-25 11:40:19 +00002352 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002353 if (!u)
2354 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002355 if (PyUnicode_READY(u))
2356 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002357
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002358 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002359 Py_DECREF(u);
2360 return XML_STATUS_ERROR;
2361 }
2362
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002363 kind = PyUnicode_KIND(u);
2364 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002366 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2367 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2368 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002370 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371 }
2372
2373 Py_DECREF(u);
2374
2375 return XML_STATUS_OK;
2376}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377
2378/* -------------------------------------------------------------------- */
2379/* constructor and destructor */
2380
2381static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002382xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383{
2384 XMLParserObject* self;
2385 /* FIXME: does this need to be static? */
2386 static XML_Memory_Handling_Suite memory_handler;
2387
2388 PyObject* target = NULL;
2389 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002390 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002391 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2392 &target, &encoding))
2393 return NULL;
2394
2395#if defined(USE_PYEXPAT_CAPI)
2396 if (!expat_capi) {
2397 PyErr_SetString(
2398 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2399 );
2400 return NULL;
2401 }
2402#endif
2403
2404 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2405 if (self == NULL)
2406 return NULL;
2407
2408 self->entity = PyDict_New();
2409 if (!self->entity) {
2410 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002411 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002412 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002413
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 self->names = PyDict_New();
2415 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002416 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002418 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002419 }
2420
2421 memory_handler.malloc_fcn = PyObject_Malloc;
2422 memory_handler.realloc_fcn = PyObject_Realloc;
2423 memory_handler.free_fcn = PyObject_Free;
2424
2425 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2426 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002427 PyObject_Del(self->names);
2428 PyObject_Del(self->entity);
2429 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002431 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432 }
2433
2434 /* setup target handlers */
2435 if (!target) {
2436 target = treebuilder_new();
2437 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002438 EXPAT(ParserFree)(self->parser);
2439 PyObject_Del(self->names);
2440 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002442 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002443 }
2444 } else
2445 Py_INCREF(target);
2446 self->target = target;
2447
2448 self->handle_xml = PyObject_GetAttrString(target, "xml");
2449 self->handle_start = PyObject_GetAttrString(target, "start");
2450 self->handle_data = PyObject_GetAttrString(target, "data");
2451 self->handle_end = PyObject_GetAttrString(target, "end");
2452 self->handle_comment = PyObject_GetAttrString(target, "comment");
2453 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002454 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455
2456 PyErr_Clear();
2457
2458 /* configure parser */
2459 EXPAT(SetUserData)(self->parser, self);
2460 EXPAT(SetElementHandler)(
2461 self->parser,
2462 (XML_StartElementHandler) expat_start_handler,
2463 (XML_EndElementHandler) expat_end_handler
2464 );
2465 EXPAT(SetDefaultHandlerExpand)(
2466 self->parser,
2467 (XML_DefaultHandler) expat_default_handler
2468 );
2469 EXPAT(SetCharacterDataHandler)(
2470 self->parser,
2471 (XML_CharacterDataHandler) expat_data_handler
2472 );
2473 if (self->handle_comment)
2474 EXPAT(SetCommentHandler)(
2475 self->parser,
2476 (XML_CommentHandler) expat_comment_handler
2477 );
2478 if (self->handle_pi)
2479 EXPAT(SetProcessingInstructionHandler)(
2480 self->parser,
2481 (XML_ProcessingInstructionHandler) expat_pi_handler
2482 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483 EXPAT(SetUnknownEncodingHandler)(
2484 self->parser,
2485 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2486 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487
2488 ALLOC(sizeof(XMLParserObject), "create expatparser");
2489
2490 return (PyObject*) self;
2491}
2492
2493static void
2494xmlparser_dealloc(XMLParserObject* self)
2495{
2496 EXPAT(ParserFree)(self->parser);
2497
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002498 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499 Py_XDECREF(self->handle_pi);
2500 Py_XDECREF(self->handle_comment);
2501 Py_XDECREF(self->handle_end);
2502 Py_XDECREF(self->handle_data);
2503 Py_XDECREF(self->handle_start);
2504 Py_XDECREF(self->handle_xml);
2505
2506 Py_DECREF(self->target);
2507 Py_DECREF(self->entity);
2508 Py_DECREF(self->names);
2509
2510 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2511
2512 PyObject_Del(self);
2513}
2514
2515/* -------------------------------------------------------------------- */
2516/* methods (in alphabetical order) */
2517
2518LOCAL(PyObject*)
2519expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2520{
2521 int ok;
2522
2523 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2524
2525 if (PyErr_Occurred())
2526 return NULL;
2527
2528 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002529 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2531 EXPAT(GetErrorLineNumber)(self->parser),
2532 EXPAT(GetErrorColumnNumber)(self->parser)
2533 );
2534 return NULL;
2535 }
2536
2537 Py_RETURN_NONE;
2538}
2539
2540static PyObject*
2541xmlparser_close(XMLParserObject* self, PyObject* args)
2542{
2543 /* end feeding data to parser */
2544
2545 PyObject* res;
2546 if (!PyArg_ParseTuple(args, ":close"))
2547 return NULL;
2548
2549 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002550 if (!res)
2551 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002553 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554 Py_DECREF(res);
2555 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002556 } if (self->handle_close) {
2557 Py_DECREF(res);
2558 return PyObject_CallFunction(self->handle_close, "");
2559 } else
2560 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561}
2562
2563static PyObject*
2564xmlparser_feed(XMLParserObject* self, PyObject* args)
2565{
2566 /* feed data to parser */
2567
2568 char* data;
2569 int data_len;
2570 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2571 return NULL;
2572
2573 return expat_parse(self, data, data_len, 0);
2574}
2575
2576static PyObject*
2577xmlparser_parse(XMLParserObject* self, PyObject* args)
2578{
2579 /* (internal) parse until end of input stream */
2580
2581 PyObject* reader;
2582 PyObject* buffer;
2583 PyObject* res;
2584
2585 PyObject* fileobj;
2586 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2587 return NULL;
2588
2589 reader = PyObject_GetAttrString(fileobj, "read");
2590 if (!reader)
2591 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002592
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002593 /* read from open file object */
2594 for (;;) {
2595
2596 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2597
2598 if (!buffer) {
2599 /* read failed (e.g. due to KeyboardInterrupt) */
2600 Py_DECREF(reader);
2601 return NULL;
2602 }
2603
Christian Heimes72b710a2008-05-26 13:28:38 +00002604 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 Py_DECREF(buffer);
2606 break;
2607 }
2608
2609 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002610 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611 );
2612
2613 Py_DECREF(buffer);
2614
2615 if (!res) {
2616 Py_DECREF(reader);
2617 return NULL;
2618 }
2619 Py_DECREF(res);
2620
2621 }
2622
2623 Py_DECREF(reader);
2624
2625 res = expat_parse(self, "", 0, 1);
2626
2627 if (res && TreeBuilder_CheckExact(self->target)) {
2628 Py_DECREF(res);
2629 return treebuilder_done((TreeBuilderObject*) self->target);
2630 }
2631
2632 return res;
2633}
2634
2635static PyObject*
2636xmlparser_setevents(XMLParserObject* self, PyObject* args)
2637{
2638 /* activate element event reporting */
2639
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002640 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641 TreeBuilderObject* target;
2642
2643 PyObject* events; /* event collector */
2644 PyObject* event_set = Py_None;
2645 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2646 &event_set))
2647 return NULL;
2648
2649 if (!TreeBuilder_CheckExact(self->target)) {
2650 PyErr_SetString(
2651 PyExc_TypeError,
2652 "event handling only supported for cElementTree.Treebuilder "
2653 "targets"
2654 );
2655 return NULL;
2656 }
2657
2658 target = (TreeBuilderObject*) self->target;
2659
2660 Py_INCREF(events);
2661 Py_XDECREF(target->events);
2662 target->events = events;
2663
2664 /* clear out existing events */
2665 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2666 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2667 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2668 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2669
2670 if (event_set == Py_None) {
2671 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002672 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673 Py_RETURN_NONE;
2674 }
2675
2676 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2677 goto error;
2678
2679 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2680 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2681 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002682 if (PyUnicode_Check(item)) {
2683 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002684 if (event == NULL)
2685 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002686 } else if (PyBytes_Check(item))
2687 event = PyBytes_AS_STRING(item);
2688 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002689 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002690 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691 if (strcmp(event, "start") == 0) {
2692 Py_INCREF(item);
2693 target->start_event_obj = item;
2694 } else if (strcmp(event, "end") == 0) {
2695 Py_INCREF(item);
2696 Py_XDECREF(target->end_event_obj);
2697 target->end_event_obj = item;
2698 } else if (strcmp(event, "start-ns") == 0) {
2699 Py_INCREF(item);
2700 Py_XDECREF(target->start_ns_event_obj);
2701 target->start_ns_event_obj = item;
2702 EXPAT(SetNamespaceDeclHandler)(
2703 self->parser,
2704 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2705 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2706 );
2707 } else if (strcmp(event, "end-ns") == 0) {
2708 Py_INCREF(item);
2709 Py_XDECREF(target->end_ns_event_obj);
2710 target->end_ns_event_obj = item;
2711 EXPAT(SetNamespaceDeclHandler)(
2712 self->parser,
2713 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2714 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2715 );
2716 } else {
2717 PyErr_Format(
2718 PyExc_ValueError,
2719 "unknown event '%s'", event
2720 );
2721 return NULL;
2722 }
2723 }
2724
2725 Py_RETURN_NONE;
2726
2727 error:
2728 PyErr_SetString(
2729 PyExc_TypeError,
2730 "invalid event tuple"
2731 );
2732 return NULL;
2733}
2734
2735static PyMethodDef xmlparser_methods[] = {
2736 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2737 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2738 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2739 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2740 {NULL, NULL}
2741};
2742
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002743static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002744xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002746 if (PyUnicode_Check(nameobj)) {
2747 PyObject* res;
2748 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2749 res = self->entity;
2750 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2751 res = self->target;
2752 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2753 return PyUnicode_FromFormat(
2754 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002756 }
2757 else
2758 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759
Alexander Belopolskye239d232010-12-08 23:31:48 +00002760 Py_INCREF(res);
2761 return res;
2762 }
2763 generic:
2764 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765}
2766
Neal Norwitz227b5332006-03-22 09:28:35 +00002767static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002768 PyVarObject_HEAD_INIT(NULL, 0)
2769 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770 /* methods */
2771 (destructor)xmlparser_dealloc, /* tp_dealloc */
2772 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002773 0, /* tp_getattr */
2774 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002775 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002776 0, /* tp_repr */
2777 0, /* tp_as_number */
2778 0, /* tp_as_sequence */
2779 0, /* tp_as_mapping */
2780 0, /* tp_hash */
2781 0, /* tp_call */
2782 0, /* tp_str */
2783 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2784 0, /* tp_setattro */
2785 0, /* tp_as_buffer */
2786 Py_TPFLAGS_DEFAULT, /* tp_flags */
2787 0, /* tp_doc */
2788 0, /* tp_traverse */
2789 0, /* tp_clear */
2790 0, /* tp_richcompare */
2791 0, /* tp_weaklistoffset */
2792 0, /* tp_iter */
2793 0, /* tp_iternext */
2794 xmlparser_methods, /* tp_methods */
2795 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796};
2797
2798#endif
2799
2800/* ==================================================================== */
2801/* python module interface */
2802
2803static PyMethodDef _functions[] = {
2804 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2805 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2806 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2807#if defined(USE_EXPAT)
2808 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002809#endif
2810 {NULL, NULL}
2811};
2812
Martin v. Löwis1a214512008-06-11 05:26:20 +00002813
2814static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002815 PyModuleDef_HEAD_INIT,
2816 "_elementtree",
2817 NULL,
2818 -1,
2819 _functions,
2820 NULL,
2821 NULL,
2822 NULL,
2823 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002824};
2825
Neal Norwitzf6657e62006-12-28 04:47:50 +00002826PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002827PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828{
2829 PyObject* m;
2830 PyObject* g;
2831 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002832
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002833 /* Initialize object types */
2834 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002835 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002836 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002837 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002838#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002839 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002840 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841#endif
2842
Martin v. Löwis1a214512008-06-11 05:26:20 +00002843 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002844 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002845 return NULL;
2846
2847 /* The code below requires that the module gets already added
2848 to sys.modules. */
2849 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002850 _elementtreemodule.m_name,
2851 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002852
2853 /* python glue code */
2854
2855 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002856 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002857 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002858
2859 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2860
2861 bootstrap = (
2862
Florent Xiclunaf4bdf4e2012-02-11 11:28:16 +01002863 "from copy import deepcopy\n"
2864 "from xml.etree import ElementPath\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002865
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002866 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002867 " if tag == '*':\n"
2868 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869 " if tag is None or node.tag == tag:\n"
2870 " yield node\n"
2871 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002872 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002873 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002874
2875 "def itertext(node):\n" /* helper */
2876 " if node.text:\n"
2877 " yield node.text\n"
2878 " for e in node:\n"
2879 " for s in e.itertext():\n"
2880 " yield s\n"
2881 " if e.tail:\n"
2882 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002883
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002884 );
2885
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002886 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
2887 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002888
2889 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002891 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
2892 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002893
2894#if defined(USE_PYEXPAT_CAPI)
2895 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002896 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
2897 if (expat_capi) {
2898 /* check that it's usable */
2899 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
2900 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
2901 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
2902 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
2903 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
2904 expat_capi = NULL;
2905 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002908 elementtree_parseerror_obj = PyErr_NewException(
2909 "cElementTree.ParseError", PyExc_SyntaxError, NULL
2910 );
2911 Py_INCREF(elementtree_parseerror_obj);
2912 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
2913
2914 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002915}