blob: 62aee8504da57c3c8e698cf5e6f4d07b98552ca6 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xicluna3e8c1892010-03-11 14:36:19 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xicluna3e8c1892010-03-11 14:36:19 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xicluna3e8c1892010-03-11 14:36:19 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Fredrik Lundhdc075b92006-08-16 16:47:07 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xicluna3e8c1892010-03-11 14:36:19 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
73 eight bytes. For the current version of cElementTree, this means
74 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
97/* compatibility macros */
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098#if (PY_VERSION_HEX < 0x02060000)
99#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
100#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
101#endif
102
Martin v. Löwis18e16552006-02-15 17:27:45 +0000103#if (PY_VERSION_HEX < 0x02050000)
104typedef int Py_ssize_t;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000105#define lenfunc inquiry
Martin v. Löwis18e16552006-02-15 17:27:45 +0000106#endif
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108#if (PY_VERSION_HEX < 0x02040000)
109#define PyDict_CheckExact PyDict_Check
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111#if !defined(Py_RETURN_NONE)
112#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
113#endif
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000114#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000115
116/* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
123
124/* glue functions (see the init function for details) */
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000125static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126static PyObject* elementtree_copyelement_obj;
127static PyObject* elementtree_deepcopy_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000128static PyObject* elementtree_iter_obj;
129static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130static PyObject* elementpath_obj;
131
132/* helpers */
133
134LOCAL(PyObject*)
135deepcopy(PyObject* object, PyObject* memo)
136{
137 /* do a deep copy of the given object */
138
139 PyObject* args;
140 PyObject* result;
141
142 if (!elementtree_deepcopy_obj) {
143 PyErr_SetString(
144 PyExc_RuntimeError,
145 "deepcopy helper not found"
146 );
147 return NULL;
148 }
149
150 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000151 if (!args)
152 return NULL;
153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
155 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156
157 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158
159 Py_DECREF(args);
160
161 return result;
162}
163
164LOCAL(PyObject*)
165list_join(PyObject* list)
166{
167 /* join list elements (destroying the list in the process) */
168
169 PyObject* joiner;
170 PyObject* function;
171 PyObject* args;
172 PyObject* result;
173
174 switch (PyList_GET_SIZE(list)) {
175 case 0:
176 Py_DECREF(list);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000177 return PyString_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 case 1:
179 result = PyList_GET_ITEM(list, 0);
180 Py_INCREF(result);
181 Py_DECREF(list);
182 return result;
183 }
184
185 /* two or more elements: slice out a suitable separator from the
186 first member, and use that to join the entire list */
187
188 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
189 if (!joiner)
190 return NULL;
191
192 function = PyObject_GetAttrString(joiner, "join");
193 if (!function) {
194 Py_DECREF(joiner);
195 return NULL;
196 }
197
198 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000199 if (!args)
200 return NULL;
201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202 PyTuple_SET_ITEM(args, 0, list);
203
204 result = PyObject_CallObject(function, args);
205
206 Py_DECREF(args); /* also removes list */
207 Py_DECREF(function);
208 Py_DECREF(joiner);
209
210 return result;
211}
212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213/* -------------------------------------------------------------------- */
214/* the element type */
215
216typedef struct {
217
218 /* attributes (a dictionary object), or None if no attributes */
219 PyObject* attrib;
220
221 /* child elements */
222 int length; /* actual number of items */
223 int allocated; /* allocated items */
224
225 /* this either points to _children or to a malloced buffer */
226 PyObject* *children;
227
228 PyObject* _children[STATIC_CHILDREN];
229
230} ElementObjectExtra;
231
232typedef struct {
233 PyObject_HEAD
234
235 /* element tag (a string). */
236 PyObject* tag;
237
238 /* text before first child. note that this is a tagged pointer;
239 use JOIN_OBJ to get the object pointer. the join flag is used
240 to distinguish lists created by the tree builder from lists
241 assigned to the attribute by application code; the former
242 should be joined before being returned to the user, the latter
243 should be left intact. */
244 PyObject* text;
245
246 /* text after this element, in parent. note that this is a tagged
247 pointer; use JOIN_OBJ to get the object pointer. */
248 PyObject* tail;
249
250 ElementObjectExtra* extra;
251
252} ElementObject;
253
254staticforward PyTypeObject Element_Type;
255
Christian Heimese93237d2007-12-19 02:37:44 +0000256#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000257
258/* -------------------------------------------------------------------- */
259/* element constructor and destructor */
260
261LOCAL(int)
262element_new_extra(ElementObject* self, PyObject* attrib)
263{
264 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
265 if (!self->extra)
266 return -1;
267
268 if (!attrib)
269 attrib = Py_None;
270
271 Py_INCREF(attrib);
272 self->extra->attrib = attrib;
273
274 self->extra->length = 0;
275 self->extra->allocated = STATIC_CHILDREN;
276 self->extra->children = self->extra->_children;
277
278 return 0;
279}
280
281LOCAL(void)
282element_dealloc_extra(ElementObject* self)
283{
284 int i;
285
286 Py_DECREF(self->extra->attrib);
287
288 for (i = 0; i < self->extra->length; i++)
289 Py_DECREF(self->extra->children[i]);
290
291 if (self->extra->children != self->extra->_children)
292 PyObject_Free(self->extra->children);
293
294 PyObject_Free(self->extra);
295}
296
297LOCAL(PyObject*)
298element_new(PyObject* tag, PyObject* attrib)
299{
300 ElementObject* self;
301
302 self = PyObject_New(ElementObject, &Element_Type);
303 if (self == NULL)
304 return NULL;
305
306 /* use None for empty dictionaries */
307 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
308 attrib = Py_None;
309
310 self->extra = NULL;
311
312 if (attrib != Py_None) {
313
Neal Norwitzc6a989a2006-05-10 06:57:58 +0000314 if (element_new_extra(self, attrib) < 0) {
315 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000316 return NULL;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000317 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000318
319 self->extra->length = 0;
320 self->extra->allocated = STATIC_CHILDREN;
321 self->extra->children = self->extra->_children;
322
323 }
324
325 Py_INCREF(tag);
326 self->tag = tag;
327
328 Py_INCREF(Py_None);
329 self->text = Py_None;
330
331 Py_INCREF(Py_None);
332 self->tail = Py_None;
333
334 ALLOC(sizeof(ElementObject), "create element");
335
336 return (PyObject*) self;
337}
338
339LOCAL(int)
340element_resize(ElementObject* self, int extra)
341{
342 int size;
343 PyObject* *children;
344
345 /* make sure self->children can hold the given number of extra
346 elements. set an exception and return -1 if allocation failed */
347
348 if (!self->extra)
349 element_new_extra(self, NULL);
350
351 size = self->extra->length + extra;
352
353 if (size > self->extra->allocated) {
354 /* use Python 2.4's list growth strategy */
355 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000356 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
357 * which needs at least 4 bytes.
358 * Although it's a false alarm always assume at least one child to
359 * be safe.
360 */
361 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000362 if (self->extra->children != self->extra->_children) {
Christian Heimes87dcf3d2008-01-18 08:04:57 +0000363 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
364 * "children", which needs at least 4 bytes. Although it's a
365 * false alarm always assume at least one child to be safe.
366 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000367 children = PyObject_Realloc(self->extra->children,
368 size * sizeof(PyObject*));
369 if (!children)
370 goto nomemory;
371 } else {
372 children = PyObject_Malloc(size * sizeof(PyObject*));
373 if (!children)
374 goto nomemory;
375 /* copy existing children from static area to malloc buffer */
376 memcpy(children, self->extra->children,
377 self->extra->length * sizeof(PyObject*));
378 }
379 self->extra->children = children;
380 self->extra->allocated = size;
381 }
382
383 return 0;
384
385 nomemory:
386 PyErr_NoMemory();
387 return -1;
388}
389
390LOCAL(int)
391element_add_subelement(ElementObject* self, PyObject* element)
392{
393 /* add a child element to a parent */
394
395 if (element_resize(self, 1) < 0)
396 return -1;
397
398 Py_INCREF(element);
399 self->extra->children[self->extra->length] = element;
400
401 self->extra->length++;
402
403 return 0;
404}
405
406LOCAL(PyObject*)
407element_get_attrib(ElementObject* self)
408{
409 /* return borrowed reference to attrib dictionary */
410 /* note: this function assumes that the extra section exists */
411
412 PyObject* res = self->extra->attrib;
413
414 if (res == Py_None) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000415 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000416 /* create missing dictionary */
417 res = PyDict_New();
418 if (!res)
419 return NULL;
420 self->extra->attrib = res;
421 }
422
423 return res;
424}
425
426LOCAL(PyObject*)
427element_get_text(ElementObject* self)
428{
429 /* return borrowed reference to text attribute */
430
431 PyObject* res = self->text;
432
433 if (JOIN_GET(res)) {
434 res = JOIN_OBJ(res);
435 if (PyList_CheckExact(res)) {
436 res = list_join(res);
437 if (!res)
438 return NULL;
439 self->text = res;
440 }
441 }
442
443 return res;
444}
445
446LOCAL(PyObject*)
447element_get_tail(ElementObject* self)
448{
449 /* return borrowed reference to text attribute */
450
451 PyObject* res = self->tail;
452
453 if (JOIN_GET(res)) {
454 res = JOIN_OBJ(res);
455 if (PyList_CheckExact(res)) {
456 res = list_join(res);
457 if (!res)
458 return NULL;
459 self->tail = res;
460 }
461 }
462
463 return res;
464}
465
466static PyObject*
467element(PyObject* self, PyObject* args, PyObject* kw)
468{
469 PyObject* elem;
470
471 PyObject* tag;
472 PyObject* attrib = NULL;
473 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
474 &PyDict_Type, &attrib))
475 return NULL;
476
477 if (attrib || kw) {
478 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
479 if (!attrib)
480 return NULL;
481 if (kw)
482 PyDict_Update(attrib, kw);
483 } else {
484 Py_INCREF(Py_None);
485 attrib = Py_None;
486 }
487
488 elem = element_new(tag, attrib);
489
490 Py_DECREF(attrib);
491
492 return elem;
493}
494
495static PyObject*
496subelement(PyObject* self, PyObject* args, PyObject* kw)
497{
498 PyObject* elem;
499
500 ElementObject* parent;
501 PyObject* tag;
502 PyObject* attrib = NULL;
503 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
504 &Element_Type, &parent, &tag,
505 &PyDict_Type, &attrib))
506 return NULL;
507
508 if (attrib || kw) {
509 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
510 if (!attrib)
511 return NULL;
512 if (kw)
513 PyDict_Update(attrib, kw);
514 } else {
515 Py_INCREF(Py_None);
516 attrib = Py_None;
517 }
518
519 elem = element_new(tag, attrib);
520
521 Py_DECREF(attrib);
522
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000523 if (element_add_subelement(parent, elem) < 0) {
524 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000527
528 return elem;
529}
530
531static void
532element_dealloc(ElementObject* self)
533{
534 if (self->extra)
535 element_dealloc_extra(self);
536
537 /* discard attributes */
538 Py_DECREF(self->tag);
539 Py_DECREF(JOIN_OBJ(self->text));
540 Py_DECREF(JOIN_OBJ(self->tail));
541
542 RELEASE(sizeof(ElementObject), "destroy element");
543
544 PyObject_Del(self);
545}
546
547/* -------------------------------------------------------------------- */
548/* methods (in alphabetical order) */
549
550static PyObject*
551element_append(ElementObject* self, PyObject* args)
552{
553 PyObject* element;
554 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
555 return NULL;
556
557 if (element_add_subelement(self, element) < 0)
558 return NULL;
559
560 Py_RETURN_NONE;
561}
562
563static PyObject*
564element_clear(ElementObject* self, PyObject* args)
565{
566 if (!PyArg_ParseTuple(args, ":clear"))
567 return NULL;
568
569 if (self->extra) {
570 element_dealloc_extra(self);
571 self->extra = NULL;
572 }
573
574 Py_INCREF(Py_None);
575 Py_DECREF(JOIN_OBJ(self->text));
576 self->text = Py_None;
577
578 Py_INCREF(Py_None);
579 Py_DECREF(JOIN_OBJ(self->tail));
580 self->tail = Py_None;
581
582 Py_RETURN_NONE;
583}
584
585static PyObject*
586element_copy(ElementObject* self, PyObject* args)
587{
588 int i;
589 ElementObject* element;
590
591 if (!PyArg_ParseTuple(args, ":__copy__"))
592 return NULL;
593
594 element = (ElementObject*) element_new(
595 self->tag, (self->extra) ? self->extra->attrib : Py_None
596 );
597 if (!element)
598 return NULL;
599
600 Py_DECREF(JOIN_OBJ(element->text));
601 element->text = self->text;
602 Py_INCREF(JOIN_OBJ(element->text));
603
604 Py_DECREF(JOIN_OBJ(element->tail));
605 element->tail = self->tail;
606 Py_INCREF(JOIN_OBJ(element->tail));
607
608 if (self->extra) {
609
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 if (element_resize(element, self->extra->length) < 0) {
611 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000613 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614
615 for (i = 0; i < self->extra->length; i++) {
616 Py_INCREF(self->extra->children[i]);
617 element->extra->children[i] = self->extra->children[i];
618 }
619
620 element->extra->length = self->extra->length;
621
622 }
623
624 return (PyObject*) element;
625}
626
627static PyObject*
628element_deepcopy(ElementObject* self, PyObject* args)
629{
630 int i;
631 ElementObject* element;
632 PyObject* tag;
633 PyObject* attrib;
634 PyObject* text;
635 PyObject* tail;
636 PyObject* id;
637
638 PyObject* memo;
639 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
640 return NULL;
641
642 tag = deepcopy(self->tag, memo);
643 if (!tag)
644 return NULL;
645
646 if (self->extra) {
647 attrib = deepcopy(self->extra->attrib, memo);
648 if (!attrib) {
649 Py_DECREF(tag);
650 return NULL;
651 }
652 } else {
653 Py_INCREF(Py_None);
654 attrib = Py_None;
655 }
656
657 element = (ElementObject*) element_new(tag, attrib);
658
659 Py_DECREF(tag);
660 Py_DECREF(attrib);
661
662 if (!element)
663 return NULL;
664
665 text = deepcopy(JOIN_OBJ(self->text), memo);
666 if (!text)
667 goto error;
668 Py_DECREF(element->text);
669 element->text = JOIN_SET(text, JOIN_GET(self->text));
670
671 tail = deepcopy(JOIN_OBJ(self->tail), memo);
672 if (!tail)
673 goto error;
674 Py_DECREF(element->tail);
675 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
676
677 if (self->extra) {
678
679 if (element_resize(element, self->extra->length) < 0)
680 goto error;
681
682 for (i = 0; i < self->extra->length; i++) {
683 PyObject* child = deepcopy(self->extra->children[i], memo);
684 if (!child) {
685 element->extra->length = i;
686 goto error;
687 }
688 element->extra->children[i] = child;
689 }
690
691 element->extra->length = self->extra->length;
692
693 }
694
695 /* add object to memo dictionary (so deepcopy won't visit it again) */
696 id = PyInt_FromLong((Py_uintptr_t) self);
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000697 if (!id)
698 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 i = PyDict_SetItem(memo, id, (PyObject*) element);
701
702 Py_DECREF(id);
703
704 if (i < 0)
705 goto error;
706
707 return (PyObject*) element;
708
709 error:
710 Py_DECREF(element);
711 return NULL;
712}
713
714LOCAL(int)
715checkpath(PyObject* tag)
716{
Neal Norwitzc7074382006-06-12 02:06:17 +0000717 Py_ssize_t i;
718 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 /* check if a tag contains an xpath character */
721
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000722#define PATHCHAR(ch) \
723 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724
725#if defined(Py_USING_UNICODE)
726 if (PyUnicode_Check(tag)) {
727 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
728 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
729 if (p[i] == '{')
730 check = 0;
731 else if (p[i] == '}')
732 check = 1;
733 else if (check && PATHCHAR(p[i]))
734 return 1;
735 }
736 return 0;
737 }
738#endif
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000739 if (PyString_Check(tag)) {
740 char *p = PyString_AS_STRING(tag);
741 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742 if (p[i] == '{')
743 check = 0;
744 else if (p[i] == '}')
745 check = 1;
746 else if (check && PATHCHAR(p[i]))
747 return 1;
748 }
749 return 0;
750 }
751
752 return 1; /* unknown type; might be path expression */
753}
754
755static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000756element_extend(ElementObject* self, PyObject* args)
757{
758 PyObject* seq;
759 Py_ssize_t i, seqlen = 0;
760
761 PyObject* seq_in;
762 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
763 return NULL;
764
765 seq = PySequence_Fast(seq_in, "");
766 if (!seq) {
767 PyErr_Format(
768 PyExc_TypeError,
769 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
770 );
771 return NULL;
772 }
773
774 seqlen = PySequence_Size(seq);
775 for (i = 0; i < seqlen; i++) {
776 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
777 if (element_add_subelement(self, element) < 0) {
778 Py_DECREF(seq);
779 return NULL;
780 }
781 }
782
783 Py_DECREF(seq);
784
785 Py_RETURN_NONE;
786}
787
788static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789element_find(ElementObject* self, PyObject* args)
790{
791 int i;
792
793 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000794 PyObject* namespaces = Py_None;
795 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796 return NULL;
797
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000798 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000800 elementpath_obj, "find", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 );
802
803 if (!self->extra)
804 Py_RETURN_NONE;
805
806 for (i = 0; i < self->extra->length; i++) {
807 PyObject* item = self->extra->children[i];
808 if (Element_CheckExact(item) &&
809 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
810 Py_INCREF(item);
811 return item;
812 }
813 }
814
815 Py_RETURN_NONE;
816}
817
818static PyObject*
819element_findtext(ElementObject* self, PyObject* args)
820{
821 int i;
822
823 PyObject* tag;
824 PyObject* default_value = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000825 PyObject* namespaces = Py_None;
826 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000827 return NULL;
828
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000829 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000831 elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000832 );
833
834 if (!self->extra) {
835 Py_INCREF(default_value);
836 return default_value;
837 }
838
839 for (i = 0; i < self->extra->length; i++) {
840 ElementObject* item = (ElementObject*) self->extra->children[i];
841 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
842 PyObject* text = element_get_text(item);
843 if (text == Py_None)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000844 return PyString_FromString("");
Neal Norwitz6f5ff3f2006-08-12 01:43:40 +0000845 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000846 return text;
847 }
848 }
849
850 Py_INCREF(default_value);
851 return default_value;
852}
853
854static PyObject*
855element_findall(ElementObject* self, PyObject* args)
856{
857 int i;
858 PyObject* out;
859
860 PyObject* tag;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000861 PyObject* namespaces = Py_None;
862 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000863 return NULL;
864
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000865 if (checkpath(tag) || namespaces != Py_None)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000866 return PyObject_CallMethod(
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000867 elementpath_obj, "findall", "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000868 );
869
870 out = PyList_New(0);
871 if (!out)
872 return NULL;
873
874 if (!self->extra)
875 return out;
876
877 for (i = 0; i < self->extra->length; i++) {
878 PyObject* item = self->extra->children[i];
879 if (Element_CheckExact(item) &&
880 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
881 if (PyList_Append(out, item) < 0) {
882 Py_DECREF(out);
883 return NULL;
884 }
885 }
886 }
887
888 return out;
889}
890
891static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000892element_iterfind(ElementObject* self, PyObject* args)
893{
894 PyObject* tag;
895 PyObject* namespaces = Py_None;
896 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
897 return NULL;
898
899 return PyObject_CallMethod(
900 elementpath_obj, "iterfind", "OOO", self, tag, namespaces
901 );
902}
903
904static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905element_get(ElementObject* self, PyObject* args)
906{
907 PyObject* value;
908
909 PyObject* key;
910 PyObject* default_value = Py_None;
911 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
912 return NULL;
913
914 if (!self->extra || self->extra->attrib == Py_None)
915 value = default_value;
916 else {
917 value = PyDict_GetItem(self->extra->attrib, key);
918 if (!value)
919 value = default_value;
920 }
921
922 Py_INCREF(value);
923 return value;
924}
925
926static PyObject*
927element_getchildren(ElementObject* self, PyObject* args)
928{
929 int i;
930 PyObject* list;
931
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000932 /* FIXME: report as deprecated? */
933
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000934 if (!PyArg_ParseTuple(args, ":getchildren"))
935 return NULL;
936
937 if (!self->extra)
938 return PyList_New(0);
939
940 list = PyList_New(self->extra->length);
941 if (!list)
942 return NULL;
943
944 for (i = 0; i < self->extra->length; i++) {
945 PyObject* item = self->extra->children[i];
946 Py_INCREF(item);
947 PyList_SET_ITEM(list, i, item);
948 }
949
950 return list;
951}
952
953static PyObject*
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000954element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000955{
956 PyObject* result;
957
958 PyObject* tag = Py_None;
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000959 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000960 return NULL;
961
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000962 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000963 PyErr_SetString(
964 PyExc_RuntimeError,
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000965 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000966 );
967 return NULL;
968 }
969
970 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000971 if (!args)
972 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +0000973
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000974 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
975 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
976
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000977 result = PyObject_CallObject(elementtree_iter_obj, args);
978
979 Py_DECREF(args);
980
981 return result;
982}
983
984
985static PyObject*
986element_itertext(ElementObject* self, PyObject* args)
987{
988 PyObject* result;
989
990 if (!PyArg_ParseTuple(args, ":itertext"))
991 return NULL;
992
993 if (!elementtree_itertext_obj) {
994 PyErr_SetString(
995 PyExc_RuntimeError,
996 "itertext helper not found"
997 );
998 return NULL;
999 }
1000
1001 args = PyTuple_New(1);
1002 if (!args)
1003 return NULL;
1004
1005 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1006
1007 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008
1009 Py_DECREF(args);
1010
1011 return result;
1012}
1013
1014static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001015element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001016{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001017 ElementObject* self = (ElementObject*) self_;
1018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001019 if (!self->extra || index < 0 || index >= self->extra->length) {
1020 PyErr_SetString(
1021 PyExc_IndexError,
1022 "child index out of range"
1023 );
1024 return NULL;
1025 }
1026
1027 Py_INCREF(self->extra->children[index]);
1028 return self->extra->children[index];
1029}
1030
1031static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001032element_insert(ElementObject* self, PyObject* args)
1033{
1034 int i;
1035
1036 int index;
1037 PyObject* element;
1038 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1039 &Element_Type, &element))
1040 return NULL;
1041
1042 if (!self->extra)
1043 element_new_extra(self, NULL);
1044
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001045 if (index < 0) {
1046 index += self->extra->length;
1047 if (index < 0)
1048 index = 0;
1049 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001050 if (index > self->extra->length)
1051 index = self->extra->length;
1052
1053 if (element_resize(self, 1) < 0)
1054 return NULL;
1055
1056 for (i = self->extra->length; i > index; i--)
1057 self->extra->children[i] = self->extra->children[i-1];
1058
1059 Py_INCREF(element);
1060 self->extra->children[index] = element;
1061
1062 self->extra->length++;
1063
1064 Py_RETURN_NONE;
1065}
1066
1067static PyObject*
1068element_items(ElementObject* self, PyObject* args)
1069{
1070 if (!PyArg_ParseTuple(args, ":items"))
1071 return NULL;
1072
1073 if (!self->extra || self->extra->attrib == Py_None)
1074 return PyList_New(0);
1075
1076 return PyDict_Items(self->extra->attrib);
1077}
1078
1079static PyObject*
1080element_keys(ElementObject* self, PyObject* args)
1081{
1082 if (!PyArg_ParseTuple(args, ":keys"))
1083 return NULL;
1084
1085 if (!self->extra || self->extra->attrib == Py_None)
1086 return PyList_New(0);
1087
1088 return PyDict_Keys(self->extra->attrib);
1089}
1090
Martin v. Löwis18e16552006-02-15 17:27:45 +00001091static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092element_length(ElementObject* self)
1093{
1094 if (!self->extra)
1095 return 0;
1096
1097 return self->extra->length;
1098}
1099
1100static PyObject*
1101element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1102{
1103 PyObject* elem;
1104
1105 PyObject* tag;
1106 PyObject* attrib;
1107 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1108 return NULL;
1109
1110 attrib = PyDict_Copy(attrib);
1111 if (!attrib)
1112 return NULL;
1113
1114 elem = element_new(tag, attrib);
1115
1116 Py_DECREF(attrib);
1117
1118 return elem;
1119}
1120
1121static PyObject*
1122element_reduce(ElementObject* self, PyObject* args)
1123{
1124 if (!PyArg_ParseTuple(args, ":__reduce__"))
1125 return NULL;
1126
1127 /* Hack alert: This method is used to work around a __copy__
1128 problem on certain 2.3 and 2.4 versions. To save time and
1129 simplify the code, we create the copy in here, and use a dummy
1130 copyelement helper to trick the copy module into doing the
1131 right thing. */
1132
1133 if (!elementtree_copyelement_obj) {
1134 PyErr_SetString(
1135 PyExc_RuntimeError,
1136 "copyelement helper not found"
1137 );
1138 return NULL;
1139 }
1140
1141 return Py_BuildValue(
1142 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1143 );
1144}
1145
1146static PyObject*
1147element_remove(ElementObject* self, PyObject* args)
1148{
1149 int i;
1150
1151 PyObject* element;
1152 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1153 return NULL;
1154
1155 if (!self->extra) {
1156 /* element has no children, so raise exception */
1157 PyErr_SetString(
1158 PyExc_ValueError,
1159 "list.remove(x): x not in list"
1160 );
1161 return NULL;
1162 }
1163
1164 for (i = 0; i < self->extra->length; i++) {
1165 if (self->extra->children[i] == element)
1166 break;
1167 if (PyObject_Compare(self->extra->children[i], element) == 0)
1168 break;
1169 }
1170
1171 if (i == self->extra->length) {
1172 /* element is not in children, so raise exception */
1173 PyErr_SetString(
1174 PyExc_ValueError,
1175 "list.remove(x): x not in list"
1176 );
1177 return NULL;
1178 }
1179
1180 Py_DECREF(self->extra->children[i]);
1181
1182 self->extra->length--;
1183
1184 for (; i < self->extra->length; i++)
1185 self->extra->children[i] = self->extra->children[i+1];
1186
1187 Py_RETURN_NONE;
1188}
1189
1190static PyObject*
1191element_repr(ElementObject* self)
1192{
1193 PyObject* repr;
1194 char buffer[100];
1195
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001196 repr = PyString_FromString("<Element ");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001197
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001198 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199
1200 sprintf(buffer, " at %p>", self);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001201 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001202
1203 return repr;
1204}
1205
1206static PyObject*
1207element_set(ElementObject* self, PyObject* args)
1208{
1209 PyObject* attrib;
1210
1211 PyObject* key;
1212 PyObject* value;
1213 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1214 return NULL;
1215
1216 if (!self->extra)
1217 element_new_extra(self, NULL);
1218
1219 attrib = element_get_attrib(self);
1220 if (!attrib)
1221 return NULL;
1222
1223 if (PyDict_SetItem(attrib, key, value) < 0)
1224 return NULL;
1225
1226 Py_RETURN_NONE;
1227}
1228
1229static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001230element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001232 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001233 int i;
1234 PyObject* old;
1235
1236 if (!self->extra || index < 0 || index >= self->extra->length) {
1237 PyErr_SetString(
1238 PyExc_IndexError,
1239 "child assignment index out of range");
1240 return -1;
1241 }
1242
1243 old = self->extra->children[index];
1244
1245 if (item) {
1246 Py_INCREF(item);
1247 self->extra->children[index] = item;
1248 } else {
1249 self->extra->length--;
1250 for (i = index; i < self->extra->length; i++)
1251 self->extra->children[i] = self->extra->children[i+1];
1252 }
1253
1254 Py_DECREF(old);
1255
1256 return 0;
1257}
1258
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001259static PyObject*
1260element_subscr(PyObject* self_, PyObject* item)
1261{
1262 ElementObject* self = (ElementObject*) self_;
1263
1264#if (PY_VERSION_HEX < 0x02050000)
1265 if (PyInt_Check(item) || PyLong_Check(item)) {
1266 long i = PyInt_AsLong(item);
1267#else
1268 if (PyIndex_Check(item)) {
1269 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1270#endif
1271
1272 if (i == -1 && PyErr_Occurred()) {
1273 return NULL;
1274 }
1275 if (i < 0 && self->extra)
1276 i += self->extra->length;
1277 return element_getitem(self_, i);
1278 }
1279 else if (PySlice_Check(item)) {
1280 Py_ssize_t start, stop, step, slicelen, cur, i;
1281 PyObject* list;
1282
1283 if (!self->extra)
1284 return PyList_New(0);
1285
1286 if (PySlice_GetIndicesEx((PySliceObject *)item,
1287 self->extra->length,
1288 &start, &stop, &step, &slicelen) < 0) {
1289 return NULL;
1290 }
1291
1292 if (slicelen <= 0)
1293 return PyList_New(0);
1294 else {
1295 list = PyList_New(slicelen);
1296 if (!list)
1297 return NULL;
1298
1299 for (cur = start, i = 0; i < slicelen;
1300 cur += step, i++) {
1301 PyObject* item = self->extra->children[cur];
1302 Py_INCREF(item);
1303 PyList_SET_ITEM(list, i, item);
1304 }
1305
1306 return list;
1307 }
1308 }
1309 else {
1310 PyErr_SetString(PyExc_TypeError,
1311 "element indices must be integers");
1312 return NULL;
1313 }
1314}
1315
1316static int
1317element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1318{
1319 ElementObject* self = (ElementObject*) self_;
1320
1321#if (PY_VERSION_HEX < 0x02050000)
1322 if (PyInt_Check(item) || PyLong_Check(item)) {
1323 long i = PyInt_AsLong(item);
1324#else
1325 if (PyIndex_Check(item)) {
1326 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1327#endif
1328
1329 if (i == -1 && PyErr_Occurred()) {
1330 return -1;
1331 }
1332 if (i < 0 && self->extra)
1333 i += self->extra->length;
1334 return element_setitem(self_, i, value);
1335 }
1336 else if (PySlice_Check(item)) {
1337 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1338
1339 PyObject* recycle = NULL;
1340 PyObject* seq = NULL;
1341
1342 if (!self->extra)
1343 element_new_extra(self, NULL);
1344
1345 if (PySlice_GetIndicesEx((PySliceObject *)item,
1346 self->extra->length,
1347 &start, &stop, &step, &slicelen) < 0) {
1348 return -1;
1349 }
1350
1351 if (value == NULL)
1352 newlen = 0;
1353 else {
1354 seq = PySequence_Fast(value, "");
1355 if (!seq) {
1356 PyErr_Format(
1357 PyExc_TypeError,
1358 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1359 );
1360 return -1;
1361 }
1362 newlen = PySequence_Size(seq);
1363 }
1364
1365 if (step != 1 && newlen != slicelen)
1366 {
1367 PyErr_Format(PyExc_ValueError,
1368#if (PY_VERSION_HEX < 0x02050000)
1369 "attempt to assign sequence of size %d "
1370 "to extended slice of size %d",
1371#else
1372 "attempt to assign sequence of size %zd "
1373 "to extended slice of size %zd",
1374#endif
1375 newlen, slicelen
1376 );
1377 return -1;
1378 }
1379
1380
1381 /* Resize before creating the recycle bin, to prevent refleaks. */
1382 if (newlen > slicelen) {
1383 if (element_resize(self, newlen - slicelen) < 0) {
1384 if (seq) {
1385 Py_DECREF(seq);
1386 }
1387 return -1;
1388 }
1389 }
1390
1391 if (slicelen > 0) {
1392 /* to avoid recursive calls to this method (via decref), move
1393 old items to the recycle bin here, and get rid of them when
1394 we're done modifying the element */
1395 recycle = PyList_New(slicelen);
1396 if (!recycle) {
1397 if (seq) {
1398 Py_DECREF(seq);
1399 }
1400 return -1;
1401 }
1402 for (cur = start, i = 0; i < slicelen;
1403 cur += step, i++)
1404 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1405 }
1406
1407 if (newlen < slicelen) {
1408 /* delete slice */
1409 for (i = stop; i < self->extra->length; i++)
1410 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1411 } else if (newlen > slicelen) {
1412 /* insert slice */
1413 for (i = self->extra->length-1; i >= stop; i--)
1414 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1415 }
1416
1417 /* replace the slice */
1418 for (cur = start, i = 0; i < newlen;
1419 cur += step, i++) {
1420 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1421 Py_INCREF(element);
1422 self->extra->children[cur] = element;
1423 }
1424
1425 self->extra->length += newlen - slicelen;
1426
1427 if (seq) {
1428 Py_DECREF(seq);
1429 }
1430
1431 /* discard the recycle bin, and everything in it */
1432 Py_XDECREF(recycle);
1433
1434 return 0;
1435 }
1436 else {
1437 PyErr_SetString(PyExc_TypeError,
1438 "element indices must be integers");
1439 return -1;
1440 }
1441}
1442
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001443static PyMethodDef element_methods[] = {
1444
1445 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1446
1447 {"get", (PyCFunction) element_get, METH_VARARGS},
1448 {"set", (PyCFunction) element_set, METH_VARARGS},
1449
1450 {"find", (PyCFunction) element_find, METH_VARARGS},
1451 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1452 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1453
1454 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001455 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1457 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1458
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001459 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1460 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1461 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1462
1463 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1465
1466 {"items", (PyCFunction) element_items, METH_VARARGS},
1467 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1468
1469 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1470
1471 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1472 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1473
1474 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1475 C objects correctly, so we have to fake it using a __reduce__-
1476 based hack (see the element_reduce implementation above for
1477 details). */
1478
1479 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1480 using a runtime test to figure out if we need to fake things
1481 or now (see the init code below). The following entry is
1482 enabled only if the hack is needed. */
1483
1484 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1485
1486 {NULL, NULL}
1487};
1488
1489static PyObject*
1490element_getattr(ElementObject* self, char* name)
1491{
1492 PyObject* res;
1493
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001494 /* handle common attributes first */
1495 if (strcmp(name, "tag") == 0) {
1496 res = self->tag;
1497 Py_INCREF(res);
1498 return res;
1499 } else if (strcmp(name, "text") == 0) {
1500 res = element_get_text(self);
1501 Py_INCREF(res);
1502 return res;
1503 }
1504
1505 /* methods */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001506 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1507 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001508 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509
1510 PyErr_Clear();
1511
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001512 /* less common attributes */
1513 if (strcmp(name, "tail") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514 res = element_get_tail(self);
1515 } else if (strcmp(name, "attrib") == 0) {
1516 if (!self->extra)
1517 element_new_extra(self, NULL);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001518 res = element_get_attrib(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519 } else {
1520 PyErr_SetString(PyExc_AttributeError, name);
1521 return NULL;
1522 }
1523
1524 if (!res)
1525 return NULL;
1526
1527 Py_INCREF(res);
1528 return res;
1529}
1530
1531static int
1532element_setattr(ElementObject* self, const char* name, PyObject* value)
1533{
1534 if (value == NULL) {
1535 PyErr_SetString(
1536 PyExc_AttributeError,
1537 "can't delete element attributes"
1538 );
1539 return -1;
1540 }
1541
1542 if (strcmp(name, "tag") == 0) {
1543 Py_DECREF(self->tag);
1544 self->tag = value;
1545 Py_INCREF(self->tag);
1546 } else if (strcmp(name, "text") == 0) {
1547 Py_DECREF(JOIN_OBJ(self->text));
1548 self->text = value;
1549 Py_INCREF(self->text);
1550 } else if (strcmp(name, "tail") == 0) {
1551 Py_DECREF(JOIN_OBJ(self->tail));
1552 self->tail = value;
1553 Py_INCREF(self->tail);
1554 } else if (strcmp(name, "attrib") == 0) {
1555 if (!self->extra)
1556 element_new_extra(self, NULL);
1557 Py_DECREF(self->extra->attrib);
1558 self->extra->attrib = value;
1559 Py_INCREF(self->extra->attrib);
1560 } else {
1561 PyErr_SetString(PyExc_AttributeError, name);
1562 return -1;
1563 }
1564
1565 return 0;
1566}
1567
1568static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001569 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001570 0, /* sq_concat */
1571 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001572 element_getitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001573 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001574 element_setitem,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001575 0,
1576};
1577
1578static PyMappingMethods element_as_mapping = {
1579 (lenfunc) element_length,
1580 (binaryfunc) element_subscr,
1581 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582};
1583
1584statichere PyTypeObject Element_Type = {
1585 PyObject_HEAD_INIT(NULL)
1586 0, "Element", sizeof(ElementObject), 0,
1587 /* methods */
1588 (destructor)element_dealloc, /* tp_dealloc */
1589 0, /* tp_print */
1590 (getattrfunc)element_getattr, /* tp_getattr */
1591 (setattrfunc)element_setattr, /* tp_setattr */
1592 0, /* tp_compare */
1593 (reprfunc)element_repr, /* tp_repr */
1594 0, /* tp_as_number */
1595 &element_as_sequence, /* tp_as_sequence */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001596 &element_as_mapping, /* tp_as_mapping */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597};
1598
1599/* ==================================================================== */
1600/* the tree builder type */
1601
1602typedef struct {
1603 PyObject_HEAD
1604
1605 PyObject* root; /* root node (first created node) */
1606
1607 ElementObject* this; /* current node */
1608 ElementObject* last; /* most recently created node */
1609
1610 PyObject* data; /* data collector (string or list), or NULL */
1611
1612 PyObject* stack; /* element stack */
Neal Norwitzc7074382006-06-12 02:06:17 +00001613 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001614
1615 /* element tracing */
1616 PyObject* events; /* list of events, or NULL if not collecting */
1617 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1618 PyObject* end_event_obj;
1619 PyObject* start_ns_event_obj;
1620 PyObject* end_ns_event_obj;
1621
1622} TreeBuilderObject;
1623
1624staticforward PyTypeObject TreeBuilder_Type;
1625
Christian Heimese93237d2007-12-19 02:37:44 +00001626#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627
1628/* -------------------------------------------------------------------- */
1629/* constructor and destructor */
1630
1631LOCAL(PyObject*)
1632treebuilder_new(void)
1633{
1634 TreeBuilderObject* self;
1635
1636 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1637 if (self == NULL)
1638 return NULL;
1639
1640 self->root = NULL;
1641
1642 Py_INCREF(Py_None);
1643 self->this = (ElementObject*) Py_None;
1644
1645 Py_INCREF(Py_None);
1646 self->last = (ElementObject*) Py_None;
1647
1648 self->data = NULL;
1649
1650 self->stack = PyList_New(20);
1651 self->index = 0;
1652
1653 self->events = NULL;
1654 self->start_event_obj = self->end_event_obj = NULL;
1655 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1656
1657 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1658
1659 return (PyObject*) self;
1660}
1661
1662static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00001663treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664{
1665 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1666 return NULL;
1667
1668 return treebuilder_new();
1669}
1670
1671static void
1672treebuilder_dealloc(TreeBuilderObject* self)
1673{
1674 Py_XDECREF(self->end_ns_event_obj);
1675 Py_XDECREF(self->start_ns_event_obj);
1676 Py_XDECREF(self->end_event_obj);
1677 Py_XDECREF(self->start_event_obj);
1678 Py_XDECREF(self->events);
1679 Py_DECREF(self->stack);
1680 Py_XDECREF(self->data);
1681 Py_DECREF(self->last);
1682 Py_DECREF(self->this);
1683 Py_XDECREF(self->root);
1684
1685 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1686
1687 PyObject_Del(self);
1688}
1689
1690/* -------------------------------------------------------------------- */
1691/* handlers */
1692
1693LOCAL(PyObject*)
1694treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1695 PyObject* standalone)
1696{
1697 Py_RETURN_NONE;
1698}
1699
1700LOCAL(PyObject*)
1701treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1702 PyObject* attrib)
1703{
1704 PyObject* node;
1705 PyObject* this;
1706
1707 if (self->data) {
1708 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001709 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001710 self->last->text = JOIN_SET(
1711 self->data, PyList_CheckExact(self->data)
1712 );
1713 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001714 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001715 self->last->tail = JOIN_SET(
1716 self->data, PyList_CheckExact(self->data)
1717 );
1718 }
1719 self->data = NULL;
1720 }
1721
1722 node = element_new(tag, attrib);
1723 if (!node)
1724 return NULL;
1725
1726 this = (PyObject*) self->this;
1727
1728 if (this != Py_None) {
1729 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001730 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731 } else {
1732 if (self->root) {
1733 PyErr_SetString(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001734 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001735 "multiple elements on top level"
1736 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001737 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001738 }
1739 Py_INCREF(node);
1740 self->root = node;
1741 }
1742
1743 if (self->index < PyList_GET_SIZE(self->stack)) {
1744 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001745 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001746 Py_INCREF(this);
1747 } else {
1748 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001749 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750 }
1751 self->index++;
1752
1753 Py_DECREF(this);
1754 Py_INCREF(node);
1755 self->this = (ElementObject*) node;
1756
1757 Py_DECREF(self->last);
1758 Py_INCREF(node);
1759 self->last = (ElementObject*) node;
1760
1761 if (self->start_event_obj) {
1762 PyObject* res;
1763 PyObject* action = self->start_event_obj;
1764 res = PyTuple_New(2);
1765 if (res) {
1766 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1767 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1768 PyList_Append(self->events, res);
1769 Py_DECREF(res);
1770 } else
1771 PyErr_Clear(); /* FIXME: propagate error */
1772 }
1773
1774 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001775
1776 error:
1777 Py_DECREF(node);
1778 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001779}
1780
1781LOCAL(PyObject*)
1782treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1783{
1784 if (!self->data) {
Fredrik Lundhdc075b92006-08-16 16:47:07 +00001785 if (self->last == (ElementObject*) Py_None) {
1786 /* ignore calls to data before the first call to start */
1787 Py_RETURN_NONE;
1788 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789 /* store the first item as is */
1790 Py_INCREF(data); self->data = data;
1791 } else {
1792 /* more than one item; use a list to collect items */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001793 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1794 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001795 /* expat often generates single character data sections; handle
1796 the most common case by resizing the existing string... */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001797 Py_ssize_t size = PyString_GET_SIZE(self->data);
1798 if (_PyString_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001799 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001800 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001801 } else if (PyList_CheckExact(self->data)) {
1802 if (PyList_Append(self->data, data) < 0)
1803 return NULL;
1804 } else {
1805 PyObject* list = PyList_New(2);
1806 if (!list)
1807 return NULL;
1808 PyList_SET_ITEM(list, 0, self->data);
1809 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1810 self->data = list;
1811 }
1812 }
1813
1814 Py_RETURN_NONE;
1815}
1816
1817LOCAL(PyObject*)
1818treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1819{
1820 PyObject* item;
1821
1822 if (self->data) {
1823 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001824 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001825 self->last->text = JOIN_SET(
1826 self->data, PyList_CheckExact(self->data)
1827 );
1828 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001829 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001830 self->last->tail = JOIN_SET(
1831 self->data, PyList_CheckExact(self->data)
1832 );
1833 }
1834 self->data = NULL;
1835 }
1836
1837 if (self->index == 0) {
1838 PyErr_SetString(
1839 PyExc_IndexError,
1840 "pop from empty stack"
1841 );
1842 return NULL;
1843 }
1844
1845 self->index--;
1846
1847 item = PyList_GET_ITEM(self->stack, self->index);
1848 Py_INCREF(item);
1849
1850 Py_DECREF(self->last);
1851
1852 self->last = (ElementObject*) self->this;
1853 self->this = (ElementObject*) item;
1854
1855 if (self->end_event_obj) {
1856 PyObject* res;
1857 PyObject* action = self->end_event_obj;
1858 PyObject* node = (PyObject*) self->last;
1859 res = PyTuple_New(2);
1860 if (res) {
1861 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1862 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1863 PyList_Append(self->events, res);
1864 Py_DECREF(res);
1865 } else
1866 PyErr_Clear(); /* FIXME: propagate error */
1867 }
1868
1869 Py_INCREF(self->last);
1870 return (PyObject*) self->last;
1871}
1872
1873LOCAL(void)
1874treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001875 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001876{
1877 PyObject* res;
1878 PyObject* action;
1879 PyObject* parcel;
1880
1881 if (!self->events)
1882 return;
1883
1884 if (start) {
1885 if (!self->start_ns_event_obj)
1886 return;
1887 action = self->start_ns_event_obj;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001888 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001889 if (!parcel)
1890 return;
1891 Py_INCREF(action);
1892 } else {
1893 if (!self->end_ns_event_obj)
1894 return;
1895 action = self->end_ns_event_obj;
1896 Py_INCREF(action);
1897 parcel = Py_None;
1898 Py_INCREF(parcel);
1899 }
1900
1901 res = PyTuple_New(2);
1902
1903 if (res) {
1904 PyTuple_SET_ITEM(res, 0, action);
1905 PyTuple_SET_ITEM(res, 1, parcel);
1906 PyList_Append(self->events, res);
1907 Py_DECREF(res);
1908 } else
1909 PyErr_Clear(); /* FIXME: propagate error */
1910}
1911
1912/* -------------------------------------------------------------------- */
1913/* methods (in alphabetical order) */
1914
1915static PyObject*
1916treebuilder_data(TreeBuilderObject* self, PyObject* args)
1917{
1918 PyObject* data;
1919 if (!PyArg_ParseTuple(args, "O:data", &data))
1920 return NULL;
1921
1922 return treebuilder_handle_data(self, data);
1923}
1924
1925static PyObject*
1926treebuilder_end(TreeBuilderObject* self, PyObject* args)
1927{
1928 PyObject* tag;
1929 if (!PyArg_ParseTuple(args, "O:end", &tag))
1930 return NULL;
1931
1932 return treebuilder_handle_end(self, tag);
1933}
1934
1935LOCAL(PyObject*)
1936treebuilder_done(TreeBuilderObject* self)
1937{
1938 PyObject* res;
1939
1940 /* FIXME: check stack size? */
1941
1942 if (self->root)
1943 res = self->root;
1944 else
1945 res = Py_None;
1946
1947 Py_INCREF(res);
1948 return res;
1949}
1950
1951static PyObject*
1952treebuilder_close(TreeBuilderObject* self, PyObject* args)
1953{
1954 if (!PyArg_ParseTuple(args, ":close"))
1955 return NULL;
1956
1957 return treebuilder_done(self);
1958}
1959
1960static PyObject*
1961treebuilder_start(TreeBuilderObject* self, PyObject* args)
1962{
1963 PyObject* tag;
1964 PyObject* attrib = Py_None;
1965 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1966 return NULL;
1967
1968 return treebuilder_handle_start(self, tag, attrib);
1969}
1970
1971static PyObject*
1972treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1973{
1974 PyObject* encoding;
1975 PyObject* standalone;
1976 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1977 return NULL;
1978
1979 return treebuilder_handle_xml(self, encoding, standalone);
1980}
1981
1982static PyMethodDef treebuilder_methods[] = {
1983 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1984 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1985 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1986 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1987 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1988 {NULL, NULL}
1989};
1990
1991static PyObject*
1992treebuilder_getattr(TreeBuilderObject* self, char* name)
1993{
1994 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1995}
1996
1997statichere PyTypeObject TreeBuilder_Type = {
1998 PyObject_HEAD_INIT(NULL)
1999 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
2000 /* methods */
2001 (destructor)treebuilder_dealloc, /* tp_dealloc */
2002 0, /* tp_print */
2003 (getattrfunc)treebuilder_getattr, /* tp_getattr */
2004};
2005
2006/* ==================================================================== */
2007/* the expat interface */
2008
2009#if defined(USE_EXPAT)
2010
2011#include "expat.h"
2012
2013#if defined(USE_PYEXPAT_CAPI)
2014#include "pyexpat.h"
2015static struct PyExpat_CAPI* expat_capi;
2016#define EXPAT(func) (expat_capi->func)
2017#else
2018#define EXPAT(func) (XML_##func)
2019#endif
2020
2021typedef struct {
2022 PyObject_HEAD
2023
2024 XML_Parser parser;
2025
2026 PyObject* target;
2027 PyObject* entity;
2028
2029 PyObject* names;
2030
2031 PyObject* handle_xml;
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002032
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002033 PyObject* handle_start;
2034 PyObject* handle_data;
2035 PyObject* handle_end;
2036
2037 PyObject* handle_comment;
2038 PyObject* handle_pi;
2039
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002040 PyObject* handle_close;
2041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002042} XMLParserObject;
2043
2044staticforward PyTypeObject XMLParser_Type;
2045
2046/* helpers */
2047
2048#if defined(Py_USING_UNICODE)
2049LOCAL(int)
2050checkstring(const char* string, int size)
2051{
2052 int i;
2053
2054 /* check if an 8-bit string contains UTF-8 characters */
2055 for (i = 0; i < size; i++)
2056 if (string[i] & 0x80)
2057 return 1;
2058
2059 return 0;
2060}
2061#endif
2062
2063LOCAL(PyObject*)
2064makestring(const char* string, int size)
2065{
2066 /* convert a UTF-8 string to either a 7-bit ascii string or a
2067 Unicode string */
2068
2069#if defined(Py_USING_UNICODE)
2070 if (checkstring(string, size))
2071 return PyUnicode_DecodeUTF8(string, size, "strict");
2072#endif
2073
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002074 return PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002075}
2076
2077LOCAL(PyObject*)
2078makeuniversal(XMLParserObject* self, const char* string)
2079{
2080 /* convert a UTF-8 tag/attribute name from the expat parser
2081 to a universal name string */
2082
2083 int size = strlen(string);
2084 PyObject* key;
2085 PyObject* value;
2086
2087 /* look the 'raw' name up in the names dictionary */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002088 key = PyString_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002089 if (!key)
2090 return NULL;
2091
2092 value = PyDict_GetItem(self->names, key);
2093
2094 if (value) {
2095 Py_INCREF(value);
2096 } else {
2097 /* new name. convert to universal name, and decode as
2098 necessary */
2099
2100 PyObject* tag;
2101 char* p;
2102 int i;
2103
2104 /* look for namespace separator */
2105 for (i = 0; i < size; i++)
2106 if (string[i] == '}')
2107 break;
2108 if (i != size) {
2109 /* convert to universal name */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002110 tag = PyString_FromStringAndSize(NULL, size+1);
2111 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002112 p[0] = '{';
2113 memcpy(p+1, string, size);
2114 size++;
2115 } else {
2116 /* plain name; use key as tag */
2117 Py_INCREF(key);
2118 tag = key;
2119 }
2120
2121 /* decode universal name */
2122#if defined(Py_USING_UNICODE)
2123 /* inline makestring, to avoid duplicating the source string if
2124 it's not an utf-8 string */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002125 p = PyString_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002126 if (checkstring(p, size)) {
2127 value = PyUnicode_DecodeUTF8(p, size, "strict");
2128 Py_DECREF(tag);
2129 if (!value) {
2130 Py_DECREF(key);
2131 return NULL;
2132 }
2133 } else
2134#endif
2135 value = tag; /* use tag as is */
2136
2137 /* add to names dictionary */
2138 if (PyDict_SetItem(self->names, key, value) < 0) {
2139 Py_DECREF(key);
2140 Py_DECREF(value);
2141 return NULL;
2142 }
2143 }
2144
2145 Py_DECREF(key);
2146 return value;
2147}
2148
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002149static void
2150expat_set_error(const char* message, int line, int column)
2151{
2152 PyObject *error;
2153 PyObject *position;
2154 char buffer[256];
2155
2156 sprintf(buffer, "%s: line %d, column %d", message, line, column);
2157
2158 error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
2159 if (!error)
2160 return;
2161
2162 /* add position attribute */
2163 position = Py_BuildValue("(ii)", line, column);
2164 if (!position) {
2165 Py_DECREF(error);
2166 return;
2167 }
2168 if (PyObject_SetAttrString(error, "position", position) == -1) {
2169 Py_DECREF(error);
2170 Py_DECREF(position);
2171 return;
2172 }
2173 Py_DECREF(position);
2174
2175 PyErr_SetObject(elementtree_parseerror_obj, error);
2176 Py_DECREF(error);
2177}
2178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002179/* -------------------------------------------------------------------- */
2180/* handlers */
2181
2182static void
2183expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2184 int data_len)
2185{
2186 PyObject* key;
2187 PyObject* value;
2188 PyObject* res;
2189
2190 if (data_len < 2 || data_in[0] != '&')
2191 return;
2192
2193 key = makestring(data_in + 1, data_len - 2);
2194 if (!key)
2195 return;
2196
2197 value = PyDict_GetItem(self->entity, key);
2198
2199 if (value) {
2200 if (TreeBuilder_CheckExact(self->target))
2201 res = treebuilder_handle_data(
2202 (TreeBuilderObject*) self->target, value
2203 );
2204 else if (self->handle_data)
2205 res = PyObject_CallFunction(self->handle_data, "O", value);
2206 else
2207 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002208 Py_XDECREF(res);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002209 } else if (!PyErr_Occurred()) {
2210 /* Report the first error, not the last */
2211 char message[128];
2212 sprintf(message, "undefined entity &%.100s;", PyString_AS_STRING(key));
2213 expat_set_error(
2214 message,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002215 EXPAT(GetErrorLineNumber)(self->parser),
2216 EXPAT(GetErrorColumnNumber)(self->parser)
2217 );
2218 }
2219
2220 Py_DECREF(key);
2221}
2222
2223static void
2224expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2225 const XML_Char **attrib_in)
2226{
2227 PyObject* res;
2228 PyObject* tag;
2229 PyObject* attrib;
2230 int ok;
2231
2232 /* tag name */
2233 tag = makeuniversal(self, tag_in);
2234 if (!tag)
2235 return; /* parser will look for errors */
2236
2237 /* attributes */
2238 if (attrib_in[0]) {
2239 attrib = PyDict_New();
2240 if (!attrib)
2241 return;
2242 while (attrib_in[0] && attrib_in[1]) {
2243 PyObject* key = makeuniversal(self, attrib_in[0]);
2244 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2245 if (!key || !value) {
2246 Py_XDECREF(value);
2247 Py_XDECREF(key);
2248 Py_DECREF(attrib);
2249 return;
2250 }
2251 ok = PyDict_SetItem(attrib, key, value);
2252 Py_DECREF(value);
2253 Py_DECREF(key);
2254 if (ok < 0) {
2255 Py_DECREF(attrib);
2256 return;
2257 }
2258 attrib_in += 2;
2259 }
2260 } else {
2261 Py_INCREF(Py_None);
2262 attrib = Py_None;
2263 }
2264
2265 if (TreeBuilder_CheckExact(self->target))
2266 /* shortcut */
2267 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2268 tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002269 else if (self->handle_start) {
2270 if (attrib == Py_None) {
2271 Py_DECREF(attrib);
2272 attrib = PyDict_New();
2273 if (!attrib)
2274 return;
2275 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002276 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002277 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278 res = NULL;
2279
2280 Py_DECREF(tag);
2281 Py_DECREF(attrib);
2282
2283 Py_XDECREF(res);
2284}
2285
2286static void
2287expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2288 int data_len)
2289{
2290 PyObject* data;
2291 PyObject* res;
2292
2293 data = makestring(data_in, data_len);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002294 if (!data)
2295 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296
2297 if (TreeBuilder_CheckExact(self->target))
2298 /* shortcut */
2299 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2300 else if (self->handle_data)
2301 res = PyObject_CallFunction(self->handle_data, "O", data);
2302 else
2303 res = NULL;
2304
2305 Py_DECREF(data);
2306
2307 Py_XDECREF(res);
2308}
2309
2310static void
2311expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2312{
2313 PyObject* tag;
2314 PyObject* res = NULL;
2315
2316 if (TreeBuilder_CheckExact(self->target))
2317 /* shortcut */
2318 /* the standard tree builder doesn't look at the end tag */
2319 res = treebuilder_handle_end(
2320 (TreeBuilderObject*) self->target, Py_None
2321 );
2322 else if (self->handle_end) {
2323 tag = makeuniversal(self, tag_in);
2324 if (tag) {
2325 res = PyObject_CallFunction(self->handle_end, "O", tag);
2326 Py_DECREF(tag);
2327 }
2328 }
2329
2330 Py_XDECREF(res);
2331}
2332
2333static void
2334expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2335 const XML_Char *uri)
2336{
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002337 PyObject* sprefix = NULL;
2338 PyObject* suri = NULL;
2339
2340 suri = makestring(uri, strlen(uri));
2341 if (!suri)
2342 return;
2343
2344 if (prefix)
2345 sprefix = makestring(prefix, strlen(prefix));
2346 else
2347 sprefix = PyString_FromStringAndSize("", 0);
2348 if (!sprefix) {
2349 Py_DECREF(suri);
2350 return;
2351 }
2352
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002353 treebuilder_handle_namespace(
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002354 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355 );
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002356
2357 Py_DECREF(sprefix);
2358 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002359}
2360
2361static void
2362expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2363{
2364 treebuilder_handle_namespace(
2365 (TreeBuilderObject*) self->target, 0, NULL, NULL
2366 );
2367}
2368
2369static void
2370expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2371{
2372 PyObject* comment;
2373 PyObject* res;
2374
2375 if (self->handle_comment) {
2376 comment = makestring(comment_in, strlen(comment_in));
2377 if (comment) {
2378 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2379 Py_XDECREF(res);
2380 Py_DECREF(comment);
2381 }
2382 }
2383}
2384
2385static void
2386expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2387 const XML_Char* data_in)
2388{
2389 PyObject* target;
2390 PyObject* data;
2391 PyObject* res;
2392
2393 if (self->handle_pi) {
2394 target = makestring(target_in, strlen(target_in));
2395 data = makestring(data_in, strlen(data_in));
2396 if (target && data) {
2397 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2398 Py_XDECREF(res);
2399 Py_DECREF(data);
2400 Py_DECREF(target);
2401 } else {
2402 Py_XDECREF(data);
2403 Py_XDECREF(target);
2404 }
2405 }
2406}
2407
2408#if defined(Py_USING_UNICODE)
2409static int
2410expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2411 XML_Encoding *info)
2412{
2413 PyObject* u;
2414 Py_UNICODE* p;
2415 unsigned char s[256];
2416 int i;
2417
2418 memset(info, 0, sizeof(XML_Encoding));
2419
2420 for (i = 0; i < 256; i++)
2421 s[i] = i;
2422
Fredrik Lundhc3389992005-12-25 11:40:19 +00002423 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424 if (!u)
2425 return XML_STATUS_ERROR;
2426
2427 if (PyUnicode_GET_SIZE(u) != 256) {
2428 Py_DECREF(u);
2429 return XML_STATUS_ERROR;
2430 }
2431
2432 p = PyUnicode_AS_UNICODE(u);
2433
2434 for (i = 0; i < 256; i++) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002435 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2436 info->map[i] = p[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002437 else
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002438 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439 }
2440
2441 Py_DECREF(u);
2442
2443 return XML_STATUS_OK;
2444}
2445#endif
2446
2447/* -------------------------------------------------------------------- */
2448/* constructor and destructor */
2449
2450static PyObject*
Fredrik Lundh81707f12006-06-03 21:56:05 +00002451xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452{
2453 XMLParserObject* self;
2454 /* FIXME: does this need to be static? */
2455 static XML_Memory_Handling_Suite memory_handler;
2456
2457 PyObject* target = NULL;
2458 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002459 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002460 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2461 &target, &encoding))
2462 return NULL;
2463
2464#if defined(USE_PYEXPAT_CAPI)
2465 if (!expat_capi) {
2466 PyErr_SetString(
2467 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2468 );
2469 return NULL;
2470 }
2471#endif
2472
2473 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2474 if (self == NULL)
2475 return NULL;
2476
2477 self->entity = PyDict_New();
2478 if (!self->entity) {
2479 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002480 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481 }
2482
2483 self->names = PyDict_New();
2484 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002485 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002486 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002487 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488 }
2489
2490 memory_handler.malloc_fcn = PyObject_Malloc;
2491 memory_handler.realloc_fcn = PyObject_Realloc;
2492 memory_handler.free_fcn = PyObject_Free;
2493
2494 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2495 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002496 PyObject_Del(self->names);
2497 PyObject_Del(self->entity);
2498 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002500 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002501 }
2502
2503 /* setup target handlers */
2504 if (!target) {
2505 target = treebuilder_new();
2506 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002507 EXPAT(ParserFree)(self->parser);
2508 PyObject_Del(self->names);
2509 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002511 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512 }
2513 } else
2514 Py_INCREF(target);
2515 self->target = target;
2516
2517 self->handle_xml = PyObject_GetAttrString(target, "xml");
2518 self->handle_start = PyObject_GetAttrString(target, "start");
2519 self->handle_data = PyObject_GetAttrString(target, "data");
2520 self->handle_end = PyObject_GetAttrString(target, "end");
2521 self->handle_comment = PyObject_GetAttrString(target, "comment");
2522 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002523 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002524
2525 PyErr_Clear();
2526
2527 /* configure parser */
2528 EXPAT(SetUserData)(self->parser, self);
2529 EXPAT(SetElementHandler)(
2530 self->parser,
2531 (XML_StartElementHandler) expat_start_handler,
2532 (XML_EndElementHandler) expat_end_handler
2533 );
2534 EXPAT(SetDefaultHandlerExpand)(
2535 self->parser,
2536 (XML_DefaultHandler) expat_default_handler
2537 );
2538 EXPAT(SetCharacterDataHandler)(
2539 self->parser,
2540 (XML_CharacterDataHandler) expat_data_handler
2541 );
2542 if (self->handle_comment)
2543 EXPAT(SetCommentHandler)(
2544 self->parser,
2545 (XML_CommentHandler) expat_comment_handler
2546 );
2547 if (self->handle_pi)
2548 EXPAT(SetProcessingInstructionHandler)(
2549 self->parser,
2550 (XML_ProcessingInstructionHandler) expat_pi_handler
2551 );
2552#if defined(Py_USING_UNICODE)
2553 EXPAT(SetUnknownEncodingHandler)(
2554 self->parser,
2555 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2556 );
2557#endif
2558
2559 ALLOC(sizeof(XMLParserObject), "create expatparser");
2560
2561 return (PyObject*) self;
2562}
2563
2564static void
2565xmlparser_dealloc(XMLParserObject* self)
2566{
2567 EXPAT(ParserFree)(self->parser);
2568
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002569 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570 Py_XDECREF(self->handle_pi);
2571 Py_XDECREF(self->handle_comment);
2572 Py_XDECREF(self->handle_end);
2573 Py_XDECREF(self->handle_data);
2574 Py_XDECREF(self->handle_start);
2575 Py_XDECREF(self->handle_xml);
2576
2577 Py_DECREF(self->target);
2578 Py_DECREF(self->entity);
2579 Py_DECREF(self->names);
2580
2581 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2582
2583 PyObject_Del(self);
2584}
2585
2586/* -------------------------------------------------------------------- */
2587/* methods (in alphabetical order) */
2588
2589LOCAL(PyObject*)
2590expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2591{
2592 int ok;
2593
2594 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2595
2596 if (PyErr_Occurred())
2597 return NULL;
2598
2599 if (!ok) {
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002600 expat_set_error(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2602 EXPAT(GetErrorLineNumber)(self->parser),
2603 EXPAT(GetErrorColumnNumber)(self->parser)
2604 );
2605 return NULL;
2606 }
2607
2608 Py_RETURN_NONE;
2609}
2610
2611static PyObject*
2612xmlparser_close(XMLParserObject* self, PyObject* args)
2613{
2614 /* end feeding data to parser */
2615
2616 PyObject* res;
2617 if (!PyArg_ParseTuple(args, ":close"))
2618 return NULL;
2619
2620 res = expat_parse(self, "", 0, 1);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002621 if (!res)
2622 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002624 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625 Py_DECREF(res);
2626 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002627 } if (self->handle_close) {
2628 Py_DECREF(res);
2629 return PyObject_CallFunction(self->handle_close, "");
2630 } else
2631 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632}
2633
2634static PyObject*
2635xmlparser_feed(XMLParserObject* self, PyObject* args)
2636{
2637 /* feed data to parser */
2638
2639 char* data;
2640 int data_len;
2641 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2642 return NULL;
2643
2644 return expat_parse(self, data, data_len, 0);
2645}
2646
2647static PyObject*
2648xmlparser_parse(XMLParserObject* self, PyObject* args)
2649{
2650 /* (internal) parse until end of input stream */
2651
2652 PyObject* reader;
2653 PyObject* buffer;
2654 PyObject* res;
2655
2656 PyObject* fileobj;
2657 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2658 return NULL;
2659
2660 reader = PyObject_GetAttrString(fileobj, "read");
2661 if (!reader)
2662 return NULL;
2663
2664 /* read from open file object */
2665 for (;;) {
2666
2667 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2668
2669 if (!buffer) {
2670 /* read failed (e.g. due to KeyboardInterrupt) */
2671 Py_DECREF(reader);
2672 return NULL;
2673 }
2674
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002675 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676 Py_DECREF(buffer);
2677 break;
2678 }
2679
2680 res = expat_parse(
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002681 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002682 );
2683
2684 Py_DECREF(buffer);
2685
2686 if (!res) {
2687 Py_DECREF(reader);
2688 return NULL;
2689 }
2690 Py_DECREF(res);
2691
2692 }
2693
2694 Py_DECREF(reader);
2695
2696 res = expat_parse(self, "", 0, 1);
2697
2698 if (res && TreeBuilder_CheckExact(self->target)) {
2699 Py_DECREF(res);
2700 return treebuilder_done((TreeBuilderObject*) self->target);
2701 }
2702
2703 return res;
2704}
2705
2706static PyObject*
2707xmlparser_setevents(XMLParserObject* self, PyObject* args)
2708{
2709 /* activate element event reporting */
2710
Neal Norwitzc7074382006-06-12 02:06:17 +00002711 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712 TreeBuilderObject* target;
2713
2714 PyObject* events; /* event collector */
2715 PyObject* event_set = Py_None;
2716 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2717 &event_set))
2718 return NULL;
2719
2720 if (!TreeBuilder_CheckExact(self->target)) {
2721 PyErr_SetString(
2722 PyExc_TypeError,
2723 "event handling only supported for cElementTree.Treebuilder "
2724 "targets"
2725 );
2726 return NULL;
2727 }
2728
2729 target = (TreeBuilderObject*) self->target;
2730
2731 Py_INCREF(events);
2732 Py_XDECREF(target->events);
2733 target->events = events;
2734
2735 /* clear out existing events */
2736 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2737 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2738 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2739 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2740
2741 if (event_set == Py_None) {
2742 /* default is "end" only */
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002743 target->end_event_obj = PyString_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744 Py_RETURN_NONE;
2745 }
2746
2747 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2748 goto error;
2749
2750 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2751 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2752 char* event;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002753 if (!PyString_Check(item))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754 goto error;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002755 event = PyString_AS_STRING(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756 if (strcmp(event, "start") == 0) {
2757 Py_INCREF(item);
2758 target->start_event_obj = item;
2759 } else if (strcmp(event, "end") == 0) {
2760 Py_INCREF(item);
2761 Py_XDECREF(target->end_event_obj);
2762 target->end_event_obj = item;
2763 } else if (strcmp(event, "start-ns") == 0) {
2764 Py_INCREF(item);
2765 Py_XDECREF(target->start_ns_event_obj);
2766 target->start_ns_event_obj = item;
2767 EXPAT(SetNamespaceDeclHandler)(
2768 self->parser,
2769 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2770 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2771 );
2772 } else if (strcmp(event, "end-ns") == 0) {
2773 Py_INCREF(item);
2774 Py_XDECREF(target->end_ns_event_obj);
2775 target->end_ns_event_obj = item;
2776 EXPAT(SetNamespaceDeclHandler)(
2777 self->parser,
2778 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2779 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2780 );
2781 } else {
2782 PyErr_Format(
2783 PyExc_ValueError,
2784 "unknown event '%s'", event
2785 );
2786 return NULL;
2787 }
2788 }
2789
2790 Py_RETURN_NONE;
2791
2792 error:
2793 PyErr_SetString(
2794 PyExc_TypeError,
2795 "invalid event tuple"
2796 );
2797 return NULL;
2798}
2799
2800static PyMethodDef xmlparser_methods[] = {
2801 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2802 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2803 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2804 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2805 {NULL, NULL}
2806};
2807
2808static PyObject*
2809xmlparser_getattr(XMLParserObject* self, char* name)
2810{
2811 PyObject* res;
2812
2813 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2814 if (res)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002815 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002816
2817 PyErr_Clear();
2818
2819 if (strcmp(name, "entity") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002820 res = self->entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821 else if (strcmp(name, "target") == 0)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002822 res = self->target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823 else if (strcmp(name, "version") == 0) {
2824 char buffer[100];
2825 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2826 XML_MINOR_VERSION, XML_MICRO_VERSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002827 return PyString_FromString(buffer);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828 } else {
2829 PyErr_SetString(PyExc_AttributeError, name);
2830 return NULL;
2831 }
2832
2833 Py_INCREF(res);
2834 return res;
2835}
2836
2837statichere PyTypeObject XMLParser_Type = {
2838 PyObject_HEAD_INIT(NULL)
2839 0, "XMLParser", sizeof(XMLParserObject), 0,
2840 /* methods */
2841 (destructor)xmlparser_dealloc, /* tp_dealloc */
2842 0, /* tp_print */
2843 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2844};
2845
2846#endif
2847
2848/* ==================================================================== */
2849/* python module interface */
2850
2851static PyMethodDef _functions[] = {
2852 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2853 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2854 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2855#if defined(USE_EXPAT)
2856 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2857 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2858#endif
2859 {NULL, NULL}
2860};
2861
2862DL_EXPORT(void)
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002863init_elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002864{
2865 PyObject* m;
2866 PyObject* g;
2867 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002868
2869 /* Patch object type */
Christian Heimese93237d2007-12-19 02:37:44 +00002870 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002871#if defined(USE_EXPAT)
Christian Heimese93237d2007-12-19 02:37:44 +00002872 Py_TYPE(&XMLParser_Type) = &PyType_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002873#endif
2874
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002875 m = Py_InitModule("_elementtree", _functions);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002876 if (!m)
2877 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002878
2879 /* python glue code */
2880
2881 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002882 if (!g)
2883 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002884
2885 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2886
2887 bootstrap = (
2888
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002889 "from copy import copy, deepcopy\n"
2890
2891 "try:\n"
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002892 " from xml.etree import ElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002893 "except ImportError:\n"
2894 " import ElementTree\n"
2895 "ET = ElementTree\n"
2896 "del ElementTree\n"
2897
Fredrik Lundh6d52b552005-12-16 22:06:43 +00002898 "import _elementtree as cElementTree\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899
2900 "try:\n" /* check if copy works as is */
2901 " copy(cElementTree.Element('x'))\n"
2902 "except:\n"
2903 " def copyelement(elem):\n"
2904 " return elem\n"
2905
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002906 "class CommentProxy:\n"
2907 " def __call__(self, text=None):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908 " element = cElementTree.Element(ET.Comment)\n"
2909 " element.text = text\n"
2910 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002911 " def __cmp__(self, other):\n"
2912 " return cmp(ET.Comment, other)\n"
2913 "cElementTree.Comment = CommentProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914
2915 "class ElementTree(ET.ElementTree):\n" /* public */
2916 " def parse(self, source, parser=None):\n"
2917 " if not hasattr(source, 'read'):\n"
2918 " source = open(source, 'rb')\n"
2919 " if parser is not None:\n"
2920 " while 1:\n"
2921 " data = source.read(65536)\n"
2922 " if not data:\n"
2923 " break\n"
2924 " parser.feed(data)\n"
2925 " self._root = parser.close()\n"
2926 " else:\n"
2927 " parser = cElementTree.XMLParser()\n"
2928 " self._root = parser._parse(source)\n"
2929 " return self._root\n"
2930 "cElementTree.ElementTree = ElementTree\n"
2931
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002932 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 " if tag == '*':\n"
2934 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935 " if tag is None or node.tag == tag:\n"
2936 " yield node\n"
2937 " for node in node:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002938 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 " yield node\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002940
2941 "def itertext(node):\n" /* helper */
2942 " if node.text:\n"
2943 " yield node.text\n"
2944 " for e in node:\n"
2945 " for s in e.itertext():\n"
2946 " yield s\n"
2947 " if e.tail:\n"
2948 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949
2950 "def parse(source, parser=None):\n" /* public */
2951 " tree = ElementTree()\n"
2952 " tree.parse(source, parser)\n"
2953 " return tree\n"
2954 "cElementTree.parse = parse\n"
2955
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 "class iterparse(object):\n"
2957 " root = None\n"
2958 " def __init__(self, file, events=None):\n"
2959 " if not hasattr(file, 'read'):\n"
2960 " file = open(file, 'rb')\n"
2961 " self._file = file\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002962 " self._events = []\n"
2963 " self._index = 0\n"
2964 " self.root = self._root = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 " b = cElementTree.TreeBuilder()\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002966 " self._parser = cElementTree.XMLParser(b)\n"
2967 " self._parser._setevents(self._events, events)\n"
2968 " def next(self):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 " while 1:\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002970 " try:\n"
2971 " item = self._events[self._index]\n"
2972 " except IndexError:\n"
2973 " if self._parser is None:\n"
2974 " self.root = self._root\n"
2975 " raise StopIteration\n"
2976 " # load event buffer\n"
2977 " del self._events[:]\n"
2978 " self._index = 0\n"
2979 " data = self._file.read(16384)\n"
2980 " if data:\n"
2981 " self._parser.feed(data)\n"
2982 " else:\n"
2983 " self._root = self._parser.close()\n"
2984 " self._parser = None\n"
2985 " else:\n"
2986 " self._index = self._index + 1\n"
2987 " return item\n"
2988 " def __iter__(self):\n"
2989 " return self\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990 "cElementTree.iterparse = iterparse\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002991
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002992 "class PIProxy:\n"
2993 " def __call__(self, target, text=None):\n"
2994 " element = cElementTree.Element(ET.PI)\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 " element.text = target\n"
2996 " if text:\n"
2997 " element.text = element.text + ' ' + text\n"
2998 " return element\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002999 " def __cmp__(self, other):\n"
3000 " return cmp(ET.PI, other)\n"
3001 "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002
3003 "def XML(text):\n" /* public */
3004 " parser = cElementTree.XMLParser()\n"
3005 " parser.feed(text)\n"
3006 " return parser.close()\n"
3007 "cElementTree.XML = cElementTree.fromstring = XML\n"
3008
3009 "def XMLID(text):\n" /* public */
3010 " tree = XML(text)\n"
3011 " ids = {}\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003012 " for elem in tree.iter():\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003013 " id = elem.get('id')\n"
3014 " if id:\n"
3015 " ids[id] = elem\n"
3016 " return tree, ids\n"
3017 "cElementTree.XMLID = XMLID\n"
3018
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003019 "try:\n"
3020 " register_namespace = ET.register_namespace\n"
3021 "except AttributeError:\n"
3022 " def register_namespace(prefix, uri):\n"
3023 " ET._namespace_map[uri] = prefix\n"
3024 "cElementTree.register_namespace = register_namespace\n"
3025
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 "cElementTree.dump = ET.dump\n"
3027 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
3028 "cElementTree.iselement = ET.iselement\n"
3029 "cElementTree.QName = ET.QName\n"
3030 "cElementTree.tostring = ET.tostring\n"
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003031 "cElementTree.fromstringlist = ET.fromstringlist\n"
3032 "cElementTree.tostringlist = ET.tostringlist\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003033 "cElementTree.VERSION = '" VERSION "'\n"
3034 "cElementTree.__version__ = '" VERSION "'\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003035
3036 );
3037
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003038 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3039 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040
3041 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
3042
3043 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
3044 if (elementtree_copyelement_obj) {
3045 /* reduce hack needed; enable reduce method */
3046 PyMethodDef* mp;
3047 for (mp = element_methods; mp->ml_name; mp++)
3048 if (mp->ml_meth == (PyCFunction) element_reduce) {
3049 mp->ml_name = "__reduce__";
3050 break;
3051 }
3052 } else
3053 PyErr_Clear();
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003054
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003056 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3057 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058
3059#if defined(USE_PYEXPAT_CAPI)
3060 /* link against pyexpat, if possible */
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003061 expat_capi = PyCObject_Import("pyexpat", "expat_CAPI");
3062 if (expat_capi) {
3063 /* check that it's usable */
3064 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3065 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3066 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3067 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3068 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3069 expat_capi = NULL;
3070 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071#endif
3072
Florent Xicluna3e8c1892010-03-11 14:36:19 +00003073 elementtree_parseerror_obj = PyErr_NewException(
3074 "cElementTree.ParseError", PyExc_SyntaxError, NULL
3075 );
3076 Py_INCREF(elementtree_parseerror_obj);
3077 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003078}