blob: b9abcac8d0c8706cd4f9652d6329ec8f4feb7e1e [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000177 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
254staticforward PyTypeObject Element_Type;
255
Christian Heimese93237d2007-12-19 02:37:44 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
340element_resize(ElementObject* self, int extra)
341{
342 int size;
343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
364 * "children", which needs at least 4 bytes. Although it's a
365 * false alarm always assume at least one child to be safe.
366 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000415 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000416 /* create missing dictionary */
417 res = PyDict_New();
418 if (!res)
419 return NULL;
420 self->extra->attrib = res;
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_text(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->text;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->text = res;
440 }
441 }
442
443 return res;
444}
445
446LOCAL(PyObject*)
447element_get_tail(ElementObject* self)
448{
449 /* return borrowed reference to text attribute */
450
451 PyObject* res = self->tail;
452
453 if (JOIN_GET(res)) {
454 res = JOIN_OBJ(res);
455 if (PyList_CheckExact(res)) {
456 res = list_join(res);
457 if (!res)
458 return NULL;
459 self->tail = res;
460 }
461 }
462
463 return res;
464}
465
466static PyObject*
467element(PyObject* self, PyObject* args, PyObject* kw)
468{
469 PyObject* elem;
470
471 PyObject* tag;
472 PyObject* attrib = NULL;
473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
474 &PyDict_Type, &attrib))
475 return NULL;
476
477 if (attrib || kw) {
478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
479 if (!attrib)
480 return NULL;
481 if (kw)
482 PyDict_Update(attrib, kw);
483 } else {
484 Py_INCREF(Py_None);
485 attrib = Py_None;
486 }
487
488 elem = element_new(tag, attrib);
489
490 Py_DECREF(attrib);
491
492 return elem;
493}
494
495static PyObject*
496subelement(PyObject* self, PyObject* args, PyObject* kw)
497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
508 if (attrib || kw) {
509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
510 if (!attrib)
511 return NULL;
512 if (kw)
513 PyDict_Update(attrib, kw);
514 } else {
515 Py_INCREF(Py_None);
516 attrib = Py_None;
517 }
518
519 elem = element_new(tag, attrib);
520
521 Py_DECREF(attrib);
522
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000523 if (element_add_subelement(parent, elem) < 0) {
524 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 return elem;
529}
530
531static void
532element_dealloc(ElementObject* self)
533{
534 if (self->extra)
535 element_dealloc_extra(self);
536
537 /* discard attributes */
538 Py_DECREF(self->tag);
539 Py_DECREF(JOIN_OBJ(self->text));
540 Py_DECREF(JOIN_OBJ(self->tail));
541
542 RELEASE(sizeof(ElementObject), "destroy element");
543
544 PyObject_Del(self);
545}
546
547/* -------------------------------------------------------------------- */
548/* methods (in alphabetical order) */
549
550static PyObject*
551element_append(ElementObject* self, PyObject* args)
552{
553 PyObject* element;
554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
555 return NULL;
556
557 if (element_add_subelement(self, element) < 0)
558 return NULL;
559
560 Py_RETURN_NONE;
561}
562
563static PyObject*
564element_clear(ElementObject* self, PyObject* args)
565{
566 if (!PyArg_ParseTuple(args, ":clear"))
567 return NULL;
568
569 if (self->extra) {
570 element_dealloc_extra(self);
571 self->extra = NULL;
572 }
573
574 Py_INCREF(Py_None);
575 Py_DECREF(JOIN_OBJ(self->text));
576 self->text = Py_None;
577
578 Py_INCREF(Py_None);
579 Py_DECREF(JOIN_OBJ(self->tail));
580 self->tail = Py_None;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_copy(ElementObject* self, PyObject* args)
587{
588 int i;
589 ElementObject* element;
590
591 if (!PyArg_ParseTuple(args, ":__copy__"))
592 return NULL;
593
594 element = (ElementObject*) element_new(
595 self->tag, (self->extra) ? self->extra->attrib : Py_None
596 );
597 if (!element)
598 return NULL;
599
600 Py_DECREF(JOIN_OBJ(element->text));
601 element->text = self->text;
602 Py_INCREF(JOIN_OBJ(element->text));
603
604 Py_DECREF(JOIN_OBJ(element->tail));
605 element->tail = self->tail;
606 Py_INCREF(JOIN_OBJ(element->tail));
607
608 if (self->extra) {
609
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 if (element_resize(element, self->extra->length) < 0) {
611 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 for (i = 0; i < self->extra->length; i++) {
616 Py_INCREF(self->extra->children[i]);
617 element->extra->children[i] = self->extra->children[i];
618 }
619
620 element->extra->length = self->extra->length;
621
622 }
623
624 return (PyObject*) element;
625}
626
627static PyObject*
628element_deepcopy(ElementObject* self, PyObject* args)
629{
630 int i;
631 ElementObject* element;
632 PyObject* tag;
633 PyObject* attrib;
634 PyObject* text;
635 PyObject* tail;
636 PyObject* id;
637
638 PyObject* memo;
639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
640 return NULL;
641
642 tag = deepcopy(self->tag, memo);
643 if (!tag)
644 return NULL;
645
646 if (self->extra) {
647 attrib = deepcopy(self->extra->attrib, memo);
648 if (!attrib) {
649 Py_DECREF(tag);
650 return NULL;
651 }
652 } else {
653 Py_INCREF(Py_None);
654 attrib = Py_None;
655 }
656
657 element = (ElementObject*) element_new(tag, attrib);
658
659 Py_DECREF(tag);
660 Py_DECREF(attrib);
661
662 if (!element)
663 return NULL;
664
665 text = deepcopy(JOIN_OBJ(self->text), memo);
666 if (!text)
667 goto error;
668 Py_DECREF(element->text);
669 element->text = JOIN_SET(text, JOIN_GET(self->text));
670
671 tail = deepcopy(JOIN_OBJ(self->tail), memo);
672 if (!tail)
673 goto error;
674 Py_DECREF(element->tail);
675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
676
677 if (self->extra) {
678
679 if (element_resize(element, self->extra->length) < 0)
680 goto error;
681
682 for (i = 0; i < self->extra->length; i++) {
683 PyObject* child = deepcopy(self->extra->children[i], memo);
684 if (!child) {
685 element->extra->length = i;
686 goto error;
687 }
688 element->extra->children[i] = child;
689 }
690
691 element->extra->length = self->extra->length;
692
693 }
694
695 /* add object to memo dictionary (so deepcopy won't visit it again) */
696 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000697 if (!id)
698 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 i = PyDict_SetItem(memo, id, (PyObject*) element);
701
702 Py_DECREF(id);
703
704 if (i < 0)
705 goto error;
706
707 return (PyObject*) element;
708
709 error:
710 Py_DECREF(element);
711 return NULL;
712}
713
714LOCAL(int)
715checkpath(PyObject* tag)
716{
Neal Norwitzc7074382006-06-12 02:06:17 +0000717 Py_ssize_t i;
718 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 /* check if a tag contains an xpath character */
721
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000722#define PATHCHAR(ch) \
723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724
725#if defined(Py_USING_UNICODE)
726 if (PyUnicode_Check(tag)) {
727 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
728 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
729 if (p[i] == '{')
730 check = 0;
731 else if (p[i] == '}')
732 check = 1;
733 else if (check && PATHCHAR(p[i]))
734 return 1;
735 }
736 return 0;
737 }
738#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000739 if (PyString_Check(tag)) {
740 char *p = PyString_AS_STRING(tag);
741 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742 if (p[i] == '{')
743 check = 0;
744 else if (p[i] == '}')
745 check = 1;
746 else if (check && PATHCHAR(p[i]))
747 return 1;
748 }
749 return 0;
750 }
751
752 return 1; /* unknown type; might be path expression */
753}
754
755static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000756element_extend(ElementObject* self, PyObject* args)
757{
758 PyObject* seq;
759 Py_ssize_t i, seqlen = 0;
760
761 PyObject* seq_in;
762 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
763 return NULL;
764
765 seq = PySequence_Fast(seq_in, "");
766 if (!seq) {
767 PyErr_Format(
768 PyExc_TypeError,
769 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
770 );
771 return NULL;
772 }
773
774 seqlen = PySequence_Size(seq);
775 for (i = 0; i < seqlen; i++) {
776 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
777 if (element_add_subelement(self, element) < 0) {
778 Py_DECREF(seq);
779 return NULL;
780 }
781 }
782
783 Py_DECREF(seq);
784
785 Py_RETURN_NONE;
786}
787
788static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789element_find(ElementObject* self, PyObject* args)
790{
791 int i;
792
793 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000794 PyObject* namespaces = Py_None;
795 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796 return NULL;
797
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000798 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000800 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 );
802
803 if (!self->extra)
804 Py_RETURN_NONE;
805
806 for (i = 0; i < self->extra->length; i++) {
807 PyObject* item = self->extra->children[i];
808 if (Element_CheckExact(item) &&
809 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
810 Py_INCREF(item);
811 return item;
812 }
813 }
814
815 Py_RETURN_NONE;
816}
817
818static PyObject*
819element_findtext(ElementObject* self, PyObject* args)
820{
821 int i;
822
823 PyObject* tag;
824 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000825 PyObject* namespaces = Py_None;
826 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000827 return NULL;
828
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000829 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000831 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000832 );
833
834 if (!self->extra) {
835 Py_INCREF(default_value);
836 return default_value;
837 }
838
839 for (i = 0; i < self->extra->length; i++) {
840 ElementObject* item = (ElementObject*) self->extra->children[i];
841 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
842 PyObject* text = element_get_text(item);
843 if (text == Py_None)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000844 return PyString_FromString("");
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000845 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000846 return text;
847 }
848 }
849
850 Py_INCREF(default_value);
851 return default_value;
852}
853
854static PyObject*
855element_findall(ElementObject* self, PyObject* args)
856{
857 int i;
858 PyObject* out;
859
860 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000861 PyObject* namespaces = Py_None;
862 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000863 return NULL;
864
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000865 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000866 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000867 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000868 );
869
870 out = PyList_New(0);
871 if (!out)
872 return NULL;
873
874 if (!self->extra)
875 return out;
876
877 for (i = 0; i < self->extra->length; i++) {
878 PyObject* item = self->extra->children[i];
879 if (Element_CheckExact(item) &&
880 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
881 if (PyList_Append(out, item) < 0) {
882 Py_DECREF(out);
883 return NULL;
884 }
885 }
886 }
887
888 return out;
889}
890
891static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000892element_iterfind(ElementObject* self, PyObject* args)
893{
894 PyObject* tag;
895 PyObject* namespaces = Py_None;
896 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
897 return NULL;
898
899 return PyObject_CallMethod(
900 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
901 );
902}
903
904static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905element_get(ElementObject* self, PyObject* args)
906{
907 PyObject* value;
908
909 PyObject* key;
910 PyObject* default_value = Py_None;
911 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
912 return NULL;
913
914 if (!self->extra || self->extra->attrib == Py_None)
915 value = default_value;
916 else {
917 value = PyDict_GetItem(self->extra->attrib, key);
918 if (!value)
919 value = default_value;
920 }
921
922 Py_INCREF(value);
923 return value;
924}
925
926static PyObject*
927element_getchildren(ElementObject* self, PyObject* args)
928{
929 int i;
930 PyObject* list;
931
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000932 /* FIXME: report as deprecated? */
933
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000934 if (!PyArg_ParseTuple(args, ":getchildren"))
935 return NULL;
936
937 if (!self->extra)
938 return PyList_New(0);
939
940 list = PyList_New(self->extra->length);
941 if (!list)
942 return NULL;
943
944 for (i = 0; i < self->extra->length; i++) {
945 PyObject* item = self->extra->children[i];
946 Py_INCREF(item);
947 PyList_SET_ITEM(list, i, item);
948 }
949
950 return list;
951}
952
953static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000954element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000955{
956 PyObject* result;
957
958 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000959 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000960 return NULL;
961
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000962 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000963 PyErr_SetString(
964 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000965 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000966 );
967 return NULL;
968 }
969
970 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000971 if (!args)
972 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000973
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000974 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
975 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
976
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000977 result = PyObject_CallObject(elementtree_iter_obj, args);
978
979 Py_DECREF(args);
980
981 return result;
982}
983
984
985static PyObject*
986element_itertext(ElementObject* self, PyObject* args)
987{
988 PyObject* result;
989
990 if (!PyArg_ParseTuple(args, ":itertext"))
991 return NULL;
992
993 if (!elementtree_itertext_obj) {
994 PyErr_SetString(
995 PyExc_RuntimeError,
996 "itertext helper not found"
997 );
998 return NULL;
999 }
1000
1001 args = PyTuple_New(1);
1002 if (!args)
1003 return NULL;
1004
1005 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1006
1007 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008
1009 Py_DECREF(args);
1010
1011 return result;
1012}
1013
1014static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001015element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001016{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001017 ElementObject* self = (ElementObject*) self_;
1018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001019 if (!self->extra || index < 0 || index >= self->extra->length) {
1020 PyErr_SetString(
1021 PyExc_IndexError,
1022 "child index out of range"
1023 );
1024 return NULL;
1025 }
1026
1027 Py_INCREF(self->extra->children[index]);
1028 return self->extra->children[index];
1029}
1030
1031static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001032element_insert(ElementObject* self, PyObject* args)
1033{
1034 int i;
1035
1036 int index;
1037 PyObject* element;
1038 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1039 &Element_Type, &element))
1040 return NULL;
1041
1042 if (!self->extra)
1043 element_new_extra(self, NULL);
1044
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001045 if (index < 0) {
1046 index += self->extra->length;
1047 if (index < 0)
1048 index = 0;
1049 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001050 if (index > self->extra->length)
1051 index = self->extra->length;
1052
1053 if (element_resize(self, 1) < 0)
1054 return NULL;
1055
1056 for (i = self->extra->length; i > index; i--)
1057 self->extra->children[i] = self->extra->children[i-1];
1058
1059 Py_INCREF(element);
1060 self->extra->children[index] = element;
1061
1062 self->extra->length++;
1063
1064 Py_RETURN_NONE;
1065}
1066
1067static PyObject*
1068element_items(ElementObject* self, PyObject* args)
1069{
1070 if (!PyArg_ParseTuple(args, ":items"))
1071 return NULL;
1072
1073 if (!self->extra || self->extra->attrib == Py_None)
1074 return PyList_New(0);
1075
1076 return PyDict_Items(self->extra->attrib);
1077}
1078
1079static PyObject*
1080element_keys(ElementObject* self, PyObject* args)
1081{
1082 if (!PyArg_ParseTuple(args, ":keys"))
1083 return NULL;
1084
1085 if (!self->extra || self->extra->attrib == Py_None)
1086 return PyList_New(0);
1087
1088 return PyDict_Keys(self->extra->attrib);
1089}
1090
Martin v. Löwis18e16552006-02-15 17:27:45 +00001091static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092element_length(ElementObject* self)
1093{
1094 if (!self->extra)
1095 return 0;
1096
1097 return self->extra->length;
1098}
1099
1100static PyObject*
1101element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1102{
1103 PyObject* elem;
1104
1105 PyObject* tag;
1106 PyObject* attrib;
1107 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1108 return NULL;
1109
1110 attrib = PyDict_Copy(attrib);
1111 if (!attrib)
1112 return NULL;
1113
1114 elem = element_new(tag, attrib);
1115
1116 Py_DECREF(attrib);
1117
1118 return elem;
1119}
1120
1121static PyObject*
1122element_reduce(ElementObject* self, PyObject* args)
1123{
1124 if (!PyArg_ParseTuple(args, ":__reduce__"))
1125 return NULL;
1126
1127 /* Hack alert: This method is used to work around a __copy__
1128 problem on certain 2.3 and 2.4 versions. To save time and
1129 simplify the code, we create the copy in here, and use a dummy
1130 copyelement helper to trick the copy module into doing the
1131 right thing. */
1132
1133 if (!elementtree_copyelement_obj) {
1134 PyErr_SetString(
1135 PyExc_RuntimeError,
1136 "copyelement helper not found"
1137 );
1138 return NULL;
1139 }
1140
1141 return Py_BuildValue(
1142 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1143 );
1144}
1145
1146static PyObject*
1147element_remove(ElementObject* self, PyObject* args)
1148{
1149 int i;
1150
1151 PyObject* element;
1152 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1153 return NULL;
1154
1155 if (!self->extra) {
1156 /* element has no children, so raise exception */
1157 PyErr_SetString(
1158 PyExc_ValueError,
1159 "list.remove(x): x not in list"
1160 );
1161 return NULL;
1162 }
1163
1164 for (i = 0; i < self->extra->length; i++) {
1165 if (self->extra->children[i] == element)
1166 break;
1167 if (PyObject_Compare(self->extra->children[i], element) == 0)
1168 break;
1169 }
1170
1171 if (i == self->extra->length) {
1172 /* element is not in children, so raise exception */
1173 PyErr_SetString(
1174 PyExc_ValueError,
1175 "list.remove(x): x not in list"
1176 );
1177 return NULL;
1178 }
1179
1180 Py_DECREF(self->extra->children[i]);
1181
1182 self->extra->length--;
1183
1184 for (; i < self->extra->length; i++)
1185 self->extra->children[i] = self->extra->children[i+1];
1186
1187 Py_RETURN_NONE;
1188}
1189
1190static PyObject*
1191element_repr(ElementObject* self)
1192{
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001193 PyObject *repr, *tag;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001195 tag = PyObject_Repr(self->tag);
1196 if (!tag)
1197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198
Florent Xiclunae2e81e82010-03-11 15:55:11 +00001199 repr = PyString_FromFormat("<Element %s at %p>",
1200 PyString_AS_STRING(tag), self);
1201
1202 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001203
1204 return repr;
1205}
1206
1207static PyObject*
1208element_set(ElementObject* self, PyObject* args)
1209{
1210 PyObject* attrib;
1211
1212 PyObject* key;
1213 PyObject* value;
1214 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1215 return NULL;
1216
1217 if (!self->extra)
1218 element_new_extra(self, NULL);
1219
1220 attrib = element_get_attrib(self);
1221 if (!attrib)
1222 return NULL;
1223
1224 if (PyDict_SetItem(attrib, key, value) < 0)
1225 return NULL;
1226
1227 Py_RETURN_NONE;
1228}
1229
1230static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001231element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001233 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001234 int i;
1235 PyObject* old;
1236
1237 if (!self->extra || index < 0 || index >= self->extra->length) {
1238 PyErr_SetString(
1239 PyExc_IndexError,
1240 "child assignment index out of range");
1241 return -1;
1242 }
1243
1244 old = self->extra->children[index];
1245
1246 if (item) {
1247 Py_INCREF(item);
1248 self->extra->children[index] = item;
1249 } else {
1250 self->extra->length--;
1251 for (i = index; i < self->extra->length; i++)
1252 self->extra->children[i] = self->extra->children[i+1];
1253 }
1254
1255 Py_DECREF(old);
1256
1257 return 0;
1258}
1259
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001260static PyObject*
1261element_subscr(PyObject* self_, PyObject* item)
1262{
1263 ElementObject* self = (ElementObject*) self_;
1264
1265#if (PY_VERSION_HEX < 0x02050000)
1266 if (PyInt_Check(item) || PyLong_Check(item)) {
1267 long i = PyInt_AsLong(item);
1268#else
1269 if (PyIndex_Check(item)) {
1270 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1271#endif
1272
1273 if (i == -1 && PyErr_Occurred()) {
1274 return NULL;
1275 }
1276 if (i < 0 && self->extra)
1277 i += self->extra->length;
1278 return element_getitem(self_, i);
1279 }
1280 else if (PySlice_Check(item)) {
1281 Py_ssize_t start, stop, step, slicelen, cur, i;
1282 PyObject* list;
1283
1284 if (!self->extra)
1285 return PyList_New(0);
1286
1287 if (PySlice_GetIndicesEx((PySliceObject *)item,
1288 self->extra->length,
1289 &start, &stop, &step, &slicelen) < 0) {
1290 return NULL;
1291 }
1292
1293 if (slicelen <= 0)
1294 return PyList_New(0);
1295 else {
1296 list = PyList_New(slicelen);
1297 if (!list)
1298 return NULL;
1299
1300 for (cur = start, i = 0; i < slicelen;
1301 cur += step, i++) {
1302 PyObject* item = self->extra->children[cur];
1303 Py_INCREF(item);
1304 PyList_SET_ITEM(list, i, item);
1305 }
1306
1307 return list;
1308 }
1309 }
1310 else {
1311 PyErr_SetString(PyExc_TypeError,
1312 "element indices must be integers");
1313 return NULL;
1314 }
1315}
1316
1317static int
1318element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1319{
1320 ElementObject* self = (ElementObject*) self_;
1321
1322#if (PY_VERSION_HEX < 0x02050000)
1323 if (PyInt_Check(item) || PyLong_Check(item)) {
1324 long i = PyInt_AsLong(item);
1325#else
1326 if (PyIndex_Check(item)) {
1327 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1328#endif
1329
1330 if (i == -1 && PyErr_Occurred()) {
1331 return -1;
1332 }
1333 if (i < 0 && self->extra)
1334 i += self->extra->length;
1335 return element_setitem(self_, i, value);
1336 }
1337 else if (PySlice_Check(item)) {
1338 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1339
1340 PyObject* recycle = NULL;
1341 PyObject* seq = NULL;
1342
1343 if (!self->extra)
1344 element_new_extra(self, NULL);
1345
1346 if (PySlice_GetIndicesEx((PySliceObject *)item,
1347 self->extra->length,
1348 &start, &stop, &step, &slicelen) < 0) {
1349 return -1;
1350 }
1351
1352 if (value == NULL)
1353 newlen = 0;
1354 else {
1355 seq = PySequence_Fast(value, "");
1356 if (!seq) {
1357 PyErr_Format(
1358 PyExc_TypeError,
1359 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1360 );
1361 return -1;
1362 }
1363 newlen = PySequence_Size(seq);
1364 }
1365
1366 if (step != 1 && newlen != slicelen)
1367 {
1368 PyErr_Format(PyExc_ValueError,
1369#if (PY_VERSION_HEX < 0x02050000)
1370 "attempt to assign sequence of size %d "
1371 "to extended slice of size %d",
1372#else
1373 "attempt to assign sequence of size %zd "
1374 "to extended slice of size %zd",
1375#endif
1376 newlen, slicelen
1377 );
1378 return -1;
1379 }
1380
1381
1382 /* Resize before creating the recycle bin, to prevent refleaks. */
1383 if (newlen > slicelen) {
1384 if (element_resize(self, newlen - slicelen) < 0) {
1385 if (seq) {
1386 Py_DECREF(seq);
1387 }
1388 return -1;
1389 }
1390 }
1391
1392 if (slicelen > 0) {
1393 /* to avoid recursive calls to this method (via decref), move
1394 old items to the recycle bin here, and get rid of them when
1395 we're done modifying the element */
1396 recycle = PyList_New(slicelen);
1397 if (!recycle) {
1398 if (seq) {
1399 Py_DECREF(seq);
1400 }
1401 return -1;
1402 }
1403 for (cur = start, i = 0; i < slicelen;
1404 cur += step, i++)
1405 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1406 }
1407
1408 if (newlen < slicelen) {
1409 /* delete slice */
1410 for (i = stop; i < self->extra->length; i++)
1411 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1412 } else if (newlen > slicelen) {
1413 /* insert slice */
1414 for (i = self->extra->length-1; i >= stop; i--)
1415 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1416 }
1417
1418 /* replace the slice */
1419 for (cur = start, i = 0; i < newlen;
1420 cur += step, i++) {
1421 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1422 Py_INCREF(element);
1423 self->extra->children[cur] = element;
1424 }
1425
1426 self->extra->length += newlen - slicelen;
1427
1428 if (seq) {
1429 Py_DECREF(seq);
1430 }
1431
1432 /* discard the recycle bin, and everything in it */
1433 Py_XDECREF(recycle);
1434
1435 return 0;
1436 }
1437 else {
1438 PyErr_SetString(PyExc_TypeError,
1439 "element indices must be integers");
1440 return -1;
1441 }
1442}
1443
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001444static PyMethodDef element_methods[] = {
1445
1446 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1447
1448 {"get", (PyCFunction) element_get, METH_VARARGS},
1449 {"set", (PyCFunction) element_set, METH_VARARGS},
1450
1451 {"find", (PyCFunction) element_find, METH_VARARGS},
1452 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1453 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1454
1455 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001456 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001457 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1458 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1459
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001460 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1461 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1462 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1463
1464 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001465 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1466
1467 {"items", (PyCFunction) element_items, METH_VARARGS},
1468 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1469
1470 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1471
1472 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1473 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1474
1475 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1476 C objects correctly, so we have to fake it using a __reduce__-
1477 based hack (see the element_reduce implementation above for
1478 details). */
1479
1480 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1481 using a runtime test to figure out if we need to fake things
1482 or now (see the init code below). The following entry is
1483 enabled only if the hack is needed. */
1484
1485 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1486
1487 {NULL, NULL}
1488};
1489
1490static PyObject*
1491element_getattr(ElementObject* self, char* name)
1492{
1493 PyObject* res;
1494
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001495 /* handle common attributes first */
1496 if (strcmp(name, "tag") == 0) {
1497 res = self->tag;
1498 Py_INCREF(res);
1499 return res;
1500 } else if (strcmp(name, "text") == 0) {
1501 res = element_get_text(self);
1502 Py_INCREF(res);
1503 return res;
1504 }
1505
1506 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1508 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001509 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510
1511 PyErr_Clear();
1512
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001513 /* less common attributes */
1514 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 res = element_get_tail(self);
1516 } else if (strcmp(name, "attrib") == 0) {
1517 if (!self->extra)
1518 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001519 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001520 } else {
1521 PyErr_SetString(PyExc_AttributeError, name);
1522 return NULL;
1523 }
1524
1525 if (!res)
1526 return NULL;
1527
1528 Py_INCREF(res);
1529 return res;
1530}
1531
1532static int
1533element_setattr(ElementObject* self, const char* name, PyObject* value)
1534{
1535 if (value == NULL) {
1536 PyErr_SetString(
1537 PyExc_AttributeError,
1538 "can't delete element attributes"
1539 );
1540 return -1;
1541 }
1542
1543 if (strcmp(name, "tag") == 0) {
1544 Py_DECREF(self->tag);
1545 self->tag = value;
1546 Py_INCREF(self->tag);
1547 } else if (strcmp(name, "text") == 0) {
1548 Py_DECREF(JOIN_OBJ(self->text));
1549 self->text = value;
1550 Py_INCREF(self->text);
1551 } else if (strcmp(name, "tail") == 0) {
1552 Py_DECREF(JOIN_OBJ(self->tail));
1553 self->tail = value;
1554 Py_INCREF(self->tail);
1555 } else if (strcmp(name, "attrib") == 0) {
1556 if (!self->extra)
1557 element_new_extra(self, NULL);
1558 Py_DECREF(self->extra->attrib);
1559 self->extra->attrib = value;
1560 Py_INCREF(self->extra->attrib);
1561 } else {
1562 PyErr_SetString(PyExc_AttributeError, name);
1563 return -1;
1564 }
1565
1566 return 0;
1567}
1568
1569static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001570 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571 0, /* sq_concat */
1572 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001573 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001574 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001575 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001576 0,
1577};
1578
1579static PyMappingMethods element_as_mapping = {
1580 (lenfunc) element_length,
1581 (binaryfunc) element_subscr,
1582 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583};
1584
1585statichere PyTypeObject Element_Type = {
1586 PyObject_HEAD_INIT(NULL)
1587 0, "Element", sizeof(ElementObject), 0,
1588 /* methods */
1589 (destructor)element_dealloc, /* tp_dealloc */
1590 0, /* tp_print */
1591 (getattrfunc)element_getattr, /* tp_getattr */
1592 (setattrfunc)element_setattr, /* tp_setattr */
1593 0, /* tp_compare */
1594 (reprfunc)element_repr, /* tp_repr */
1595 0, /* tp_as_number */
1596 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001597 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598};
1599
1600/* ==================================================================== */
1601/* the tree builder type */
1602
1603typedef struct {
1604 PyObject_HEAD
1605
1606 PyObject* root; /* root node (first created node) */
1607
1608 ElementObject* this; /* current node */
1609 ElementObject* last; /* most recently created node */
1610
1611 PyObject* data; /* data collector (string or list), or NULL */
1612
1613 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001614 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615
1616 /* element tracing */
1617 PyObject* events; /* list of events, or NULL if not collecting */
1618 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1619 PyObject* end_event_obj;
1620 PyObject* start_ns_event_obj;
1621 PyObject* end_ns_event_obj;
1622
1623} TreeBuilderObject;
1624
1625staticforward PyTypeObject TreeBuilder_Type;
1626
Christian Heimese93237d2007-12-19 02:37:44 +00001627#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628
1629/* -------------------------------------------------------------------- */
1630/* constructor and destructor */
1631
1632LOCAL(PyObject*)
1633treebuilder_new(void)
1634{
1635 TreeBuilderObject* self;
1636
1637 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1638 if (self == NULL)
1639 return NULL;
1640
1641 self->root = NULL;
1642
1643 Py_INCREF(Py_None);
1644 self->this = (ElementObject*) Py_None;
1645
1646 Py_INCREF(Py_None);
1647 self->last = (ElementObject*) Py_None;
1648
1649 self->data = NULL;
1650
1651 self->stack = PyList_New(20);
1652 self->index = 0;
1653
1654 self->events = NULL;
1655 self->start_event_obj = self->end_event_obj = NULL;
1656 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1657
1658 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1659
1660 return (PyObject*) self;
1661}
1662
1663static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001664treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665{
1666 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1667 return NULL;
1668
1669 return treebuilder_new();
1670}
1671
1672static void
1673treebuilder_dealloc(TreeBuilderObject* self)
1674{
1675 Py_XDECREF(self->end_ns_event_obj);
1676 Py_XDECREF(self->start_ns_event_obj);
1677 Py_XDECREF(self->end_event_obj);
1678 Py_XDECREF(self->start_event_obj);
1679 Py_XDECREF(self->events);
1680 Py_DECREF(self->stack);
1681 Py_XDECREF(self->data);
1682 Py_DECREF(self->last);
1683 Py_DECREF(self->this);
1684 Py_XDECREF(self->root);
1685
1686 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1687
1688 PyObject_Del(self);
1689}
1690
1691/* -------------------------------------------------------------------- */
1692/* handlers */
1693
1694LOCAL(PyObject*)
1695treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1696 PyObject* standalone)
1697{
1698 Py_RETURN_NONE;
1699}
1700
1701LOCAL(PyObject*)
1702treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1703 PyObject* attrib)
1704{
1705 PyObject* node;
1706 PyObject* this;
1707
1708 if (self->data) {
1709 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001710 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001711 self->last->text = JOIN_SET(
1712 self->data, PyList_CheckExact(self->data)
1713 );
1714 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001715 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001716 self->last->tail = JOIN_SET(
1717 self->data, PyList_CheckExact(self->data)
1718 );
1719 }
1720 self->data = NULL;
1721 }
1722
1723 node = element_new(tag, attrib);
1724 if (!node)
1725 return NULL;
1726
1727 this = (PyObject*) self->this;
1728
1729 if (this != Py_None) {
1730 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001731 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001732 } else {
1733 if (self->root) {
1734 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001735 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001736 "multiple elements on top level"
1737 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001738 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001739 }
1740 Py_INCREF(node);
1741 self->root = node;
1742 }
1743
1744 if (self->index < PyList_GET_SIZE(self->stack)) {
1745 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001746 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001747 Py_INCREF(this);
1748 } else {
1749 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001750 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001751 }
1752 self->index++;
1753
1754 Py_DECREF(this);
1755 Py_INCREF(node);
1756 self->this = (ElementObject*) node;
1757
1758 Py_DECREF(self->last);
1759 Py_INCREF(node);
1760 self->last = (ElementObject*) node;
1761
1762 if (self->start_event_obj) {
1763 PyObject* res;
1764 PyObject* action = self->start_event_obj;
1765 res = PyTuple_New(2);
1766 if (res) {
1767 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1768 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1769 PyList_Append(self->events, res);
1770 Py_DECREF(res);
1771 } else
1772 PyErr_Clear(); /* FIXME: propagate error */
1773 }
1774
1775 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001776
1777 error:
1778 Py_DECREF(node);
1779 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001780}
1781
1782LOCAL(PyObject*)
1783treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1784{
1785 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001786 if (self->last == (ElementObject*) Py_None) {
1787 /* ignore calls to data before the first call to start */
1788 Py_RETURN_NONE;
1789 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001790 /* store the first item as is */
1791 Py_INCREF(data); self->data = data;
1792 } else {
1793 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001794 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1795 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001796 /* expat often generates single character data sections; handle
1797 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001798 Py_ssize_t size = PyString_GET_SIZE(self->data);
1799 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001800 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001801 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001802 } else if (PyList_CheckExact(self->data)) {
1803 if (PyList_Append(self->data, data) < 0)
1804 return NULL;
1805 } else {
1806 PyObject* list = PyList_New(2);
1807 if (!list)
1808 return NULL;
1809 PyList_SET_ITEM(list, 0, self->data);
1810 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1811 self->data = list;
1812 }
1813 }
1814
1815 Py_RETURN_NONE;
1816}
1817
1818LOCAL(PyObject*)
1819treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1820{
1821 PyObject* item;
1822
1823 if (self->data) {
1824 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001825 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826 self->last->text = JOIN_SET(
1827 self->data, PyList_CheckExact(self->data)
1828 );
1829 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001830 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001831 self->last->tail = JOIN_SET(
1832 self->data, PyList_CheckExact(self->data)
1833 );
1834 }
1835 self->data = NULL;
1836 }
1837
1838 if (self->index == 0) {
1839 PyErr_SetString(
1840 PyExc_IndexError,
1841 "pop from empty stack"
1842 );
1843 return NULL;
1844 }
1845
1846 self->index--;
1847
1848 item = PyList_GET_ITEM(self->stack, self->index);
1849 Py_INCREF(item);
1850
1851 Py_DECREF(self->last);
1852
1853 self->last = (ElementObject*) self->this;
1854 self->this = (ElementObject*) item;
1855
1856 if (self->end_event_obj) {
1857 PyObject* res;
1858 PyObject* action = self->end_event_obj;
1859 PyObject* node = (PyObject*) self->last;
1860 res = PyTuple_New(2);
1861 if (res) {
1862 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1863 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1864 PyList_Append(self->events, res);
1865 Py_DECREF(res);
1866 } else
1867 PyErr_Clear(); /* FIXME: propagate error */
1868 }
1869
1870 Py_INCREF(self->last);
1871 return (PyObject*) self->last;
1872}
1873
1874LOCAL(void)
1875treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001876 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001877{
1878 PyObject* res;
1879 PyObject* action;
1880 PyObject* parcel;
1881
1882 if (!self->events)
1883 return;
1884
1885 if (start) {
1886 if (!self->start_ns_event_obj)
1887 return;
1888 action = self->start_ns_event_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001889 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001890 if (!parcel)
1891 return;
1892 Py_INCREF(action);
1893 } else {
1894 if (!self->end_ns_event_obj)
1895 return;
1896 action = self->end_ns_event_obj;
1897 Py_INCREF(action);
1898 parcel = Py_None;
1899 Py_INCREF(parcel);
1900 }
1901
1902 res = PyTuple_New(2);
1903
1904 if (res) {
1905 PyTuple_SET_ITEM(res, 0, action);
1906 PyTuple_SET_ITEM(res, 1, parcel);
1907 PyList_Append(self->events, res);
1908 Py_DECREF(res);
1909 } else
1910 PyErr_Clear(); /* FIXME: propagate error */
1911}
1912
1913/* -------------------------------------------------------------------- */
1914/* methods (in alphabetical order) */
1915
1916static PyObject*
1917treebuilder_data(TreeBuilderObject* self, PyObject* args)
1918{
1919 PyObject* data;
1920 if (!PyArg_ParseTuple(args, "O:data", &data))
1921 return NULL;
1922
1923 return treebuilder_handle_data(self, data);
1924}
1925
1926static PyObject*
1927treebuilder_end(TreeBuilderObject* self, PyObject* args)
1928{
1929 PyObject* tag;
1930 if (!PyArg_ParseTuple(args, "O:end", &tag))
1931 return NULL;
1932
1933 return treebuilder_handle_end(self, tag);
1934}
1935
1936LOCAL(PyObject*)
1937treebuilder_done(TreeBuilderObject* self)
1938{
1939 PyObject* res;
1940
1941 /* FIXME: check stack size? */
1942
1943 if (self->root)
1944 res = self->root;
1945 else
1946 res = Py_None;
1947
1948 Py_INCREF(res);
1949 return res;
1950}
1951
1952static PyObject*
1953treebuilder_close(TreeBuilderObject* self, PyObject* args)
1954{
1955 if (!PyArg_ParseTuple(args, ":close"))
1956 return NULL;
1957
1958 return treebuilder_done(self);
1959}
1960
1961static PyObject*
1962treebuilder_start(TreeBuilderObject* self, PyObject* args)
1963{
1964 PyObject* tag;
1965 PyObject* attrib = Py_None;
1966 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1967 return NULL;
1968
1969 return treebuilder_handle_start(self, tag, attrib);
1970}
1971
1972static PyObject*
1973treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1974{
1975 PyObject* encoding;
1976 PyObject* standalone;
1977 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1978 return NULL;
1979
1980 return treebuilder_handle_xml(self, encoding, standalone);
1981}
1982
1983static PyMethodDef treebuilder_methods[] = {
1984 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1985 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1986 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1987 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1988 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1989 {NULL, NULL}
1990};
1991
1992static PyObject*
1993treebuilder_getattr(TreeBuilderObject* self, char* name)
1994{
1995 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1996}
1997
1998statichere PyTypeObject TreeBuilder_Type = {
1999 PyObject_HEAD_INIT(NULL)
2000 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
2001 /* methods */
2002 (destructor)treebuilder_dealloc, /* tp_dealloc */
2003 0, /* tp_print */
2004 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2005};
2006
2007/* ==================================================================== */
2008/* the expat interface */
2009
2010#if defined(USE_EXPAT)
2011
2012#include "expat.h"
2013
2014#if defined(USE_PYEXPAT_CAPI)
2015#include "pyexpat.h"
2016static struct PyExpat_CAPI* expat_capi;
2017#define EXPAT(func) (expat_capi->func)
2018#else
2019#define EXPAT(func) (XML_##func)
2020#endif
2021
2022typedef struct {
2023 PyObject_HEAD
2024
2025 XML_Parser parser;
2026
2027 PyObject* target;
2028 PyObject* entity;
2029
2030 PyObject* names;
2031
2032 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002033
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002034 PyObject* handle_start;
2035 PyObject* handle_data;
2036 PyObject* handle_end;
2037
2038 PyObject* handle_comment;
2039 PyObject* handle_pi;
2040
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002041 PyObject* handle_close;
2042
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002043} XMLParserObject;
2044
2045staticforward PyTypeObject XMLParser_Type;
2046
2047/* helpers */
2048
2049#if defined(Py_USING_UNICODE)
2050LOCAL(int)
2051checkstring(const char* string, int size)
2052{
2053 int i;
2054
2055 /* check if an 8-bit string contains UTF-8 characters */
2056 for (i = 0; i < size; i++)
2057 if (string[i] & 0x80)
2058 return 1;
2059
2060 return 0;
2061}
2062#endif
2063
2064LOCAL(PyObject*)
2065makestring(const char* string, int size)
2066{
2067 /* convert a UTF-8 string to either a 7-bit ascii string or a
2068 Unicode string */
2069
2070#if defined(Py_USING_UNICODE)
2071 if (checkstring(string, size))
2072 return PyUnicode_DecodeUTF8(string, size, "strict");
2073#endif
2074
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002075 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002076}
2077
2078LOCAL(PyObject*)
2079makeuniversal(XMLParserObject* self, const char* string)
2080{
2081 /* convert a UTF-8 tag/attribute name from the expat parser
2082 to a universal name string */
2083
2084 int size = strlen(string);
2085 PyObject* key;
2086 PyObject* value;
2087
2088 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002089 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002090 if (!key)
2091 return NULL;
2092
2093 value = PyDict_GetItem(self->names, key);
2094
2095 if (value) {
2096 Py_INCREF(value);
2097 } else {
2098 /* new name. convert to universal name, and decode as
2099 necessary */
2100
2101 PyObject* tag;
2102 char* p;
2103 int i;
2104
2105 /* look for namespace separator */
2106 for (i = 0; i < size; i++)
2107 if (string[i] == '}')
2108 break;
2109 if (i != size) {
2110 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002111 tag = PyString_FromStringAndSize(NULL, size+1);
2112 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002113 p[0] = '{';
2114 memcpy(p+1, string, size);
2115 size++;
2116 } else {
2117 /* plain name; use key as tag */
2118 Py_INCREF(key);
2119 tag = key;
2120 }
2121
2122 /* decode universal name */
2123#if defined(Py_USING_UNICODE)
2124 /* inline makestring, to avoid duplicating the source string if
2125 it's not an utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002126 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002127 if (checkstring(p, size)) {
2128 value = PyUnicode_DecodeUTF8(p, size, "strict");
2129 Py_DECREF(tag);
2130 if (!value) {
2131 Py_DECREF(key);
2132 return NULL;
2133 }
2134 } else
2135#endif
2136 value = tag; /* use tag as is */
2137
2138 /* add to names dictionary */
2139 if (PyDict_SetItem(self->names, key, value) < 0) {
2140 Py_DECREF(key);
2141 Py_DECREF(value);
2142 return NULL;
2143 }
2144 }
2145
2146 Py_DECREF(key);
2147 return value;
2148}
2149
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002150static void
2151expat_set_error(const char* message, int line, int column)
2152{
2153 PyObject *error;
2154 PyObject *position;
2155 char buffer[256];
2156
2157 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2158
2159 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2160 if (!error)
2161 return;
2162
2163 /* add position attribute */
2164 position = Py_BuildValue("(ii)", line, column);
2165 if (!position) {
2166 Py_DECREF(error);
2167 return;
2168 }
2169 if (PyObject_SetAttrString(error, "position", position) == -1) {
2170 Py_DECREF(error);
2171 Py_DECREF(position);
2172 return;
2173 }
2174 Py_DECREF(position);
2175
2176 PyErr_SetObject(elementtree_parseerror_obj, error);
2177 Py_DECREF(error);
2178}
2179
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002180/* -------------------------------------------------------------------- */
2181/* handlers */
2182
2183static void
2184expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2185 int data_len)
2186{
2187 PyObject* key;
2188 PyObject* value;
2189 PyObject* res;
2190
2191 if (data_len < 2 || data_in[0] != '&')
2192 return;
2193
2194 key = makestring(data_in + 1, data_len - 2);
2195 if (!key)
2196 return;
2197
2198 value = PyDict_GetItem(self->entity, key);
2199
2200 if (value) {
2201 if (TreeBuilder_CheckExact(self->target))
2202 res = treebuilder_handle_data(
2203 (TreeBuilderObject*) self->target, value
2204 );
2205 else if (self->handle_data)
2206 res = PyObject_CallFunction(self->handle_data, "O", value);
2207 else
2208 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002210 } else if (!PyErr_Occurred()) {
2211 /* Report the first error, not the last */
2212 char message[128];
2213 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2214 expat_set_error(
2215 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216 EXPAT(GetErrorLineNumber)(self->parser),
2217 EXPAT(GetErrorColumnNumber)(self->parser)
2218 );
2219 }
2220
2221 Py_DECREF(key);
2222}
2223
2224static void
2225expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2226 const XML_Char **attrib_in)
2227{
2228 PyObject* res;
2229 PyObject* tag;
2230 PyObject* attrib;
2231 int ok;
2232
2233 /* tag name */
2234 tag = makeuniversal(self, tag_in);
2235 if (!tag)
2236 return; /* parser will look for errors */
2237
2238 /* attributes */
2239 if (attrib_in[0]) {
2240 attrib = PyDict_New();
2241 if (!attrib)
2242 return;
2243 while (attrib_in[0] && attrib_in[1]) {
2244 PyObject* key = makeuniversal(self, attrib_in[0]);
2245 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2246 if (!key || !value) {
2247 Py_XDECREF(value);
2248 Py_XDECREF(key);
2249 Py_DECREF(attrib);
2250 return;
2251 }
2252 ok = PyDict_SetItem(attrib, key, value);
2253 Py_DECREF(value);
2254 Py_DECREF(key);
2255 if (ok < 0) {
2256 Py_DECREF(attrib);
2257 return;
2258 }
2259 attrib_in += 2;
2260 }
2261 } else {
2262 Py_INCREF(Py_None);
2263 attrib = Py_None;
2264 }
2265
2266 if (TreeBuilder_CheckExact(self->target))
2267 /* shortcut */
2268 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2269 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002270 else if (self->handle_start) {
2271 if (attrib == Py_None) {
2272 Py_DECREF(attrib);
2273 attrib = PyDict_New();
2274 if (!attrib)
2275 return;
2276 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002278 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279 res = NULL;
2280
2281 Py_DECREF(tag);
2282 Py_DECREF(attrib);
2283
2284 Py_XDECREF(res);
2285}
2286
2287static void
2288expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2289 int data_len)
2290{
2291 PyObject* data;
2292 PyObject* res;
2293
2294 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002295 if (!data)
2296 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297
2298 if (TreeBuilder_CheckExact(self->target))
2299 /* shortcut */
2300 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2301 else if (self->handle_data)
2302 res = PyObject_CallFunction(self->handle_data, "O", data);
2303 else
2304 res = NULL;
2305
2306 Py_DECREF(data);
2307
2308 Py_XDECREF(res);
2309}
2310
2311static void
2312expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2313{
2314 PyObject* tag;
2315 PyObject* res = NULL;
2316
2317 if (TreeBuilder_CheckExact(self->target))
2318 /* shortcut */
2319 /* the standard tree builder doesn't look at the end tag */
2320 res = treebuilder_handle_end(
2321 (TreeBuilderObject*) self->target, Py_None
2322 );
2323 else if (self->handle_end) {
2324 tag = makeuniversal(self, tag_in);
2325 if (tag) {
2326 res = PyObject_CallFunction(self->handle_end, "O", tag);
2327 Py_DECREF(tag);
2328 }
2329 }
2330
2331 Py_XDECREF(res);
2332}
2333
2334static void
2335expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2336 const XML_Char *uri)
2337{
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002338 PyObject* sprefix = NULL;
2339 PyObject* suri = NULL;
2340
2341 suri = makestring(uri, strlen(uri));
2342 if (!suri)
2343 return;
2344
2345 if (prefix)
2346 sprefix = makestring(prefix, strlen(prefix));
2347 else
2348 sprefix = PyString_FromStringAndSize("", 0);
2349 if (!sprefix) {
2350 Py_DECREF(suri);
2351 return;
2352 }
2353
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354 treebuilder_handle_namespace(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002355 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356 );
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002357
2358 Py_DECREF(sprefix);
2359 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002360}
2361
2362static void
2363expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2364{
2365 treebuilder_handle_namespace(
2366 (TreeBuilderObject*) self->target, 0, NULL, NULL
2367 );
2368}
2369
2370static void
2371expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2372{
2373 PyObject* comment;
2374 PyObject* res;
2375
2376 if (self->handle_comment) {
2377 comment = makestring(comment_in, strlen(comment_in));
2378 if (comment) {
2379 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2380 Py_XDECREF(res);
2381 Py_DECREF(comment);
2382 }
2383 }
2384}
2385
2386static void
2387expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2388 const XML_Char* data_in)
2389{
2390 PyObject* target;
2391 PyObject* data;
2392 PyObject* res;
2393
2394 if (self->handle_pi) {
2395 target = makestring(target_in, strlen(target_in));
2396 data = makestring(data_in, strlen(data_in));
2397 if (target && data) {
2398 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2399 Py_XDECREF(res);
2400 Py_DECREF(data);
2401 Py_DECREF(target);
2402 } else {
2403 Py_XDECREF(data);
2404 Py_XDECREF(target);
2405 }
2406 }
2407}
2408
2409#if defined(Py_USING_UNICODE)
2410static int
2411expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2412 XML_Encoding *info)
2413{
2414 PyObject* u;
2415 Py_UNICODE* p;
2416 unsigned char s[256];
2417 int i;
2418
2419 memset(info, 0, sizeof(XML_Encoding));
2420
2421 for (i = 0; i < 256; i++)
2422 s[i] = i;
2423
Fredrik Lundhc3389992005-12-25 11:40:19 +00002424 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425 if (!u)
2426 return XML_STATUS_ERROR;
2427
2428 if (PyUnicode_GET_SIZE(u) != 256) {
2429 Py_DECREF(u);
Eli Benderskyb6717012013-08-04 06:09:49 -07002430 PyErr_SetString(PyExc_ValueError,
2431 "multi-byte encodings are not supported");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432 return XML_STATUS_ERROR;
2433 }
2434
2435 p = PyUnicode_AS_UNICODE(u);
2436
2437 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002438 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2439 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002440 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002441 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002442 }
2443
2444 Py_DECREF(u);
2445
2446 return XML_STATUS_OK;
2447}
2448#endif
2449
2450/* -------------------------------------------------------------------- */
2451/* constructor and destructor */
2452
2453static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002454xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455{
2456 XMLParserObject* self;
2457 /* FIXME: does this need to be static? */
2458 static XML_Memory_Handling_Suite memory_handler;
2459
2460 PyObject* target = NULL;
2461 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002462 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002463 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2464 &target, &encoding))
2465 return NULL;
2466
2467#if defined(USE_PYEXPAT_CAPI)
2468 if (!expat_capi) {
2469 PyErr_SetString(
2470 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2471 );
2472 return NULL;
2473 }
2474#endif
2475
2476 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2477 if (self == NULL)
2478 return NULL;
2479
2480 self->entity = PyDict_New();
2481 if (!self->entity) {
2482 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002483 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002484 }
2485
2486 self->names = PyDict_New();
2487 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002488 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002490 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 }
2492
2493 memory_handler.malloc_fcn = PyObject_Malloc;
2494 memory_handler.realloc_fcn = PyObject_Realloc;
2495 memory_handler.free_fcn = PyObject_Free;
2496
2497 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2498 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002499 PyObject_Del(self->names);
2500 PyObject_Del(self->entity);
2501 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002503 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504 }
2505
2506 /* setup target handlers */
2507 if (!target) {
2508 target = treebuilder_new();
2509 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002510 EXPAT(ParserFree)(self->parser);
2511 PyObject_Del(self->names);
2512 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002514 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515 }
2516 } else
2517 Py_INCREF(target);
2518 self->target = target;
2519
2520 self->handle_xml = PyObject_GetAttrString(target, "xml");
2521 self->handle_start = PyObject_GetAttrString(target, "start");
2522 self->handle_data = PyObject_GetAttrString(target, "data");
2523 self->handle_end = PyObject_GetAttrString(target, "end");
2524 self->handle_comment = PyObject_GetAttrString(target, "comment");
2525 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002526 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527
2528 PyErr_Clear();
2529
2530 /* configure parser */
2531 EXPAT(SetUserData)(self->parser, self);
2532 EXPAT(SetElementHandler)(
2533 self->parser,
2534 (XML_StartElementHandler) expat_start_handler,
2535 (XML_EndElementHandler) expat_end_handler
2536 );
2537 EXPAT(SetDefaultHandlerExpand)(
2538 self->parser,
2539 (XML_DefaultHandler) expat_default_handler
2540 );
2541 EXPAT(SetCharacterDataHandler)(
2542 self->parser,
2543 (XML_CharacterDataHandler) expat_data_handler
2544 );
2545 if (self->handle_comment)
2546 EXPAT(SetCommentHandler)(
2547 self->parser,
2548 (XML_CommentHandler) expat_comment_handler
2549 );
2550 if (self->handle_pi)
2551 EXPAT(SetProcessingInstructionHandler)(
2552 self->parser,
2553 (XML_ProcessingInstructionHandler) expat_pi_handler
2554 );
2555#if defined(Py_USING_UNICODE)
2556 EXPAT(SetUnknownEncodingHandler)(
2557 self->parser,
2558 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2559 );
2560#endif
2561
2562 ALLOC(sizeof(XMLParserObject), "create expatparser");
2563
2564 return (PyObject*) self;
2565}
2566
2567static void
2568xmlparser_dealloc(XMLParserObject* self)
2569{
2570 EXPAT(ParserFree)(self->parser);
2571
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002572 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 Py_XDECREF(self->handle_pi);
2574 Py_XDECREF(self->handle_comment);
2575 Py_XDECREF(self->handle_end);
2576 Py_XDECREF(self->handle_data);
2577 Py_XDECREF(self->handle_start);
2578 Py_XDECREF(self->handle_xml);
2579
2580 Py_DECREF(self->target);
2581 Py_DECREF(self->entity);
2582 Py_DECREF(self->names);
2583
2584 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2585
2586 PyObject_Del(self);
2587}
2588
2589/* -------------------------------------------------------------------- */
2590/* methods (in alphabetical order) */
2591
2592LOCAL(PyObject*)
2593expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2594{
2595 int ok;
2596
2597 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2598
2599 if (PyErr_Occurred())
2600 return NULL;
2601
2602 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002603 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2605 EXPAT(GetErrorLineNumber)(self->parser),
2606 EXPAT(GetErrorColumnNumber)(self->parser)
2607 );
2608 return NULL;
2609 }
2610
2611 Py_RETURN_NONE;
2612}
2613
2614static PyObject*
2615xmlparser_close(XMLParserObject* self, PyObject* args)
2616{
2617 /* end feeding data to parser */
2618
2619 PyObject* res;
2620 if (!PyArg_ParseTuple(args, ":close"))
2621 return NULL;
2622
2623 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002624 if (!res)
2625 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002627 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628 Py_DECREF(res);
2629 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002630 } if (self->handle_close) {
2631 Py_DECREF(res);
2632 return PyObject_CallFunction(self->handle_close, "");
2633 } else
2634 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635}
2636
2637static PyObject*
2638xmlparser_feed(XMLParserObject* self, PyObject* args)
2639{
2640 /* feed data to parser */
2641
2642 char* data;
2643 int data_len;
2644 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2645 return NULL;
2646
2647 return expat_parse(self, data, data_len, 0);
2648}
2649
2650static PyObject*
2651xmlparser_parse(XMLParserObject* self, PyObject* args)
2652{
2653 /* (internal) parse until end of input stream */
2654
2655 PyObject* reader;
2656 PyObject* buffer;
2657 PyObject* res;
2658
2659 PyObject* fileobj;
2660 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2661 return NULL;
2662
2663 reader = PyObject_GetAttrString(fileobj, "read");
2664 if (!reader)
2665 return NULL;
2666
2667 /* read from open file object */
2668 for (;;) {
2669
2670 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2671
2672 if (!buffer) {
2673 /* read failed (e.g. due to KeyboardInterrupt) */
2674 Py_DECREF(reader);
2675 return NULL;
2676 }
2677
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002678 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679 Py_DECREF(buffer);
2680 break;
2681 }
2682
2683 res = expat_parse(
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002684 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685 );
2686
2687 Py_DECREF(buffer);
2688
2689 if (!res) {
2690 Py_DECREF(reader);
2691 return NULL;
2692 }
2693 Py_DECREF(res);
2694
2695 }
2696
2697 Py_DECREF(reader);
2698
2699 res = expat_parse(self, "", 0, 1);
2700
2701 if (res && TreeBuilder_CheckExact(self->target)) {
2702 Py_DECREF(res);
2703 return treebuilder_done((TreeBuilderObject*) self->target);
2704 }
2705
2706 return res;
2707}
2708
2709static PyObject*
2710xmlparser_setevents(XMLParserObject* self, PyObject* args)
2711{
2712 /* activate element event reporting */
2713
Neal Norwitzc7074382006-06-12 02:06:17 +00002714 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715 TreeBuilderObject* target;
2716
2717 PyObject* events; /* event collector */
2718 PyObject* event_set = Py_None;
2719 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2720 &event_set))
2721 return NULL;
2722
2723 if (!TreeBuilder_CheckExact(self->target)) {
2724 PyErr_SetString(
2725 PyExc_TypeError,
2726 "event handling only supported for cElementTree.Treebuilder "
2727 "targets"
2728 );
2729 return NULL;
2730 }
2731
2732 target = (TreeBuilderObject*) self->target;
2733
2734 Py_INCREF(events);
2735 Py_XDECREF(target->events);
2736 target->events = events;
2737
2738 /* clear out existing events */
2739 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2740 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2741 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2742 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2743
2744 if (event_set == Py_None) {
2745 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002746 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747 Py_RETURN_NONE;
2748 }
2749
2750 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2751 goto error;
2752
2753 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2754 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2755 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002756 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757 goto error;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002758 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759 if (strcmp(event, "start") == 0) {
2760 Py_INCREF(item);
2761 target->start_event_obj = item;
2762 } else if (strcmp(event, "end") == 0) {
2763 Py_INCREF(item);
2764 Py_XDECREF(target->end_event_obj);
2765 target->end_event_obj = item;
2766 } else if (strcmp(event, "start-ns") == 0) {
2767 Py_INCREF(item);
2768 Py_XDECREF(target->start_ns_event_obj);
2769 target->start_ns_event_obj = item;
2770 EXPAT(SetNamespaceDeclHandler)(
2771 self->parser,
2772 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2773 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2774 );
2775 } else if (strcmp(event, "end-ns") == 0) {
2776 Py_INCREF(item);
2777 Py_XDECREF(target->end_ns_event_obj);
2778 target->end_ns_event_obj = item;
2779 EXPAT(SetNamespaceDeclHandler)(
2780 self->parser,
2781 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2782 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2783 );
2784 } else {
2785 PyErr_Format(
2786 PyExc_ValueError,
2787 "unknown event '%s'", event
2788 );
2789 return NULL;
2790 }
2791 }
2792
2793 Py_RETURN_NONE;
2794
2795 error:
2796 PyErr_SetString(
2797 PyExc_TypeError,
2798 "invalid event tuple"
2799 );
2800 return NULL;
2801}
2802
2803static PyMethodDef xmlparser_methods[] = {
2804 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2805 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2806 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2807 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2808 {NULL, NULL}
2809};
2810
2811static PyObject*
2812xmlparser_getattr(XMLParserObject* self, char* name)
2813{
2814 PyObject* res;
2815
2816 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2817 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002818 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002819
2820 PyErr_Clear();
2821
2822 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002823 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002825 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826 else if (strcmp(name, "version") == 0) {
2827 char buffer[100];
2828 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2829 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002830 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831 } else {
2832 PyErr_SetString(PyExc_AttributeError, name);
2833 return NULL;
2834 }
2835
2836 Py_INCREF(res);
2837 return res;
2838}
2839
2840statichere PyTypeObject XMLParser_Type = {
2841 PyObject_HEAD_INIT(NULL)
2842 0, "XMLParser", sizeof(XMLParserObject), 0,
2843 /* methods */
2844 (destructor)xmlparser_dealloc, /* tp_dealloc */
2845 0, /* tp_print */
2846 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2847};
2848
2849#endif
2850
2851/* ==================================================================== */
2852/* python module interface */
2853
2854static PyMethodDef _functions[] = {
2855 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2856 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2857 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2858#if defined(USE_EXPAT)
2859 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2860 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2861#endif
2862 {NULL, NULL}
2863};
2864
2865DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002866init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002867{
2868 PyObject* m;
2869 PyObject* g;
2870 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002871
2872 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002873 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002874#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002875 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002876#endif
2877
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002878 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002879 if (!m)
2880 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002881
2882 /* python glue code */
2883
2884 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002885 if (!g)
2886 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887
2888 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2889
2890 bootstrap = (
2891
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892 "from copy import copy, deepcopy\n"
2893
2894 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002895 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002896 "except ImportError:\n"
2897 " import ElementTree\n"
2898 "ET = ElementTree\n"
2899 "del ElementTree\n"
2900
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002901 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902
2903 "try:\n" /* check if copy works as is */
2904 " copy(cElementTree.Element('x'))\n"
2905 "except:\n"
2906 " def copyelement(elem):\n"
2907 " return elem\n"
2908
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002909 "class CommentProxy:\n"
2910 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002911 " element = cElementTree.Element(ET.Comment)\n"
2912 " element.text = text\n"
2913 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002914 " def __cmp__(self, other):\n"
2915 " return cmp(ET.Comment, other)\n"
2916 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002917
2918 "class ElementTree(ET.ElementTree):\n" /* public */
2919 " def parse(self, source, parser=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002920 " close_source = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002921 " if not hasattr(source, 'read'):\n"
2922 " source = open(source, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002923 " close_source = False\n"
2924 " try:\n"
2925 " if parser is not None:\n"
2926 " while 1:\n"
2927 " data = source.read(65536)\n"
2928 " if not data:\n"
2929 " break\n"
2930 " parser.feed(data)\n"
2931 " self._root = parser.close()\n"
2932 " else:\n"
2933 " parser = cElementTree.XMLParser()\n"
2934 " self._root = parser._parse(source)\n"
2935 " return self._root\n"
2936 " finally:\n"
2937 " if close_source:\n"
2938 " source.close()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 "cElementTree.ElementTree = ElementTree\n"
2940
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002941 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002942 " if tag == '*':\n"
2943 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 " if tag is None or node.tag == tag:\n"
2945 " yield node\n"
2946 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002947 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002949
2950 "def itertext(node):\n" /* helper */
2951 " if node.text:\n"
2952 " yield node.text\n"
2953 " for e in node:\n"
2954 " for s in e.itertext():\n"
2955 " yield s\n"
2956 " if e.tail:\n"
2957 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958
2959 "def parse(source, parser=None):\n" /* public */
2960 " tree = ElementTree()\n"
2961 " tree.parse(source, parser)\n"
2962 " return tree\n"
2963 "cElementTree.parse = parse\n"
2964
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 "class iterparse(object):\n"
2966 " root = None\n"
2967 " def __init__(self, file, events=None):\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002968 " self._close_file = False\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 " if not hasattr(file, 'read'):\n"
2970 " file = open(file, 'rb')\n"
Florent Xicluna67d5d0e2011-10-29 03:38:56 +02002971 " self._close_file = True\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002973 " self._events = []\n"
2974 " self._index = 0\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002975 " self._error = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002976 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002978 " self._parser = cElementTree.XMLParser(b)\n"
2979 " self._parser._setevents(self._events, events)\n"
2980 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002982 " try:\n"
2983 " item = self._events[self._index]\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002984 " self._index += 1\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002985 " return item\n"
Florent Xicluna0965ee22011-11-01 23:34:41 +01002986 " except IndexError:\n"
2987 " pass\n"
2988 " if self._error:\n"
2989 " e = self._error\n"
2990 " self._error = None\n"
2991 " raise e\n"
2992 " if self._parser is None:\n"
2993 " self.root = self._root\n"
2994 " if self._close_file:\n"
2995 " self._file.close()\n"
2996 " raise StopIteration\n"
2997 " # load event buffer\n"
2998 " del self._events[:]\n"
2999 " self._index = 0\n"
3000 " data = self._file.read(16384)\n"
3001 " if data:\n"
3002 " try:\n"
3003 " self._parser.feed(data)\n"
3004 " except SyntaxError as exc:\n"
3005 " self._error = exc\n"
3006 " else:\n"
3007 " self._root = self._parser.close()\n"
3008 " self._parser = None\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003009 " def __iter__(self):\n"
3010 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003011 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003013 "class PIProxy:\n"
3014 " def __call__(self, target, text=None):\n"
3015 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 " element.text = target\n"
3017 " if text:\n"
3018 " element.text = element.text + ' ' + text\n"
3019 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003020 " def __cmp__(self, other):\n"
3021 " return cmp(ET.PI, other)\n"
3022 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023
3024 "def XML(text):\n" /* public */
3025 " parser = cElementTree.XMLParser()\n"
3026 " parser.feed(text)\n"
3027 " return parser.close()\n"
3028 "cElementTree.XML = cElementTree.fromstring = XML\n"
3029
3030 "def XMLID(text):\n" /* public */
3031 " tree = XML(text)\n"
3032 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003033 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 " id = elem.get('id')\n"
3035 " if id:\n"
3036 " ids[id] = elem\n"
3037 " return tree, ids\n"
3038 "cElementTree.XMLID = XMLID\n"
3039
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003040 "try:\n"
3041 " register_namespace = ET.register_namespace\n"
3042 "except AttributeError:\n"
3043 " def register_namespace(prefix, uri):\n"
3044 " ET._namespace_map[uri] = prefix\n"
3045 "cElementTree.register_namespace = register_namespace\n"
3046
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003047 "cElementTree.dump = ET.dump\n"
3048 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3049 "cElementTree.iselement = ET.iselement\n"
3050 "cElementTree.QName = ET.QName\n"
3051 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003052 "cElementTree.fromstringlist = ET.fromstringlist\n"
3053 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003054 "cElementTree.VERSION = '" VERSION "'\n"
3055 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056
3057 );
3058
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003059 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3060 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003061
3062 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3063
3064 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3065 if (elementtree_copyelement_obj) {
3066 /* reduce hack needed; enable reduce method */
3067 PyMethodDef* mp;
3068 for (mp = element_methods; mp->ml_name; mp++)
3069 if (mp->ml_meth == (PyCFunction) element_reduce) {
3070 mp->ml_name = "__reduce__";
3071 break;
3072 }
3073 } else
3074 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003075
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003077 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3078 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079
3080#if defined(USE_PYEXPAT_CAPI)
3081 /* link against pyexpat, if possible */
Larry Hastings402b73f2010-03-25 00:54:54 +00003082 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003083 if (expat_capi) {
3084 /* check that it's usable */
3085 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3086 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3087 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3088 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3089 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3090 expat_capi = NULL;
3091 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092#endif
3093
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003094 elementtree_parseerror_obj = PyErr_NewException(
3095 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3096 );
3097 Py_INCREF(elementtree_parseerror_obj);
3098 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099}