blob: e8309df2997f0edb948b019cb78a736b33268be3 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
51
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000052#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000053
54/* -------------------------------------------------------------------- */
55/* configuration */
56
57/* Leave defined to include the expat-based XMLParser type */
58#define USE_EXPAT
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* #define USE_PYEXPAT_CAPI */
62
63/* An element can hold this many children without extra memory
64 allocations. */
65#define STATIC_CHILDREN 4
66
67/* For best performance, chose a value so that 80-90% of all nodes
68 have no more than the given number of children. Set this to zero
69 to minimize the size of the element structure itself (this only
70 helps if you have lots of leaf nodes with attributes). */
71
72/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010073 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000074 that the number of children should be an even number, at least on
75 32-bit platforms. */
76
77/* -------------------------------------------------------------------- */
78
79#if 0
80static int memory = 0;
81#define ALLOC(size, comment)\
82do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
83#define RELEASE(size, comment)\
84do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85#else
86#define ALLOC(size, comment)
87#define RELEASE(size, comment)
88#endif
89
90/* compiler tweaks */
91#if defined(_MSC_VER)
92#define LOCAL(type) static __inline type __fastcall
93#else
94#define LOCAL(type) static type
95#endif
96
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000097/* macros used to store 'join' flags in string object pointers. note
98 that all use of text and tail as object pointers must be wrapped in
99 JOIN_OBJ. see comments in the ElementObject definition for more
100 info. */
101#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
102#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
103#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104
105/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000107static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000108static PyObject* elementtree_iter_obj;
109static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000110static PyObject* elementpath_obj;
111
112/* helpers */
113
114LOCAL(PyObject*)
115deepcopy(PyObject* object, PyObject* memo)
116{
117 /* do a deep copy of the given object */
118
119 PyObject* args;
120 PyObject* result;
121
122 if (!elementtree_deepcopy_obj) {
123 PyErr_SetString(
124 PyExc_RuntimeError,
125 "deepcopy helper not found"
126 );
127 return NULL;
128 }
129
130 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000131 if (!args)
132 return NULL;
133
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000134 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
135 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
136
137 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
138
139 Py_DECREF(args);
140
141 return result;
142}
143
144LOCAL(PyObject*)
145list_join(PyObject* list)
146{
147 /* join list elements (destroying the list in the process) */
148
149 PyObject* joiner;
150 PyObject* function;
151 PyObject* args;
152 PyObject* result;
153
154 switch (PyList_GET_SIZE(list)) {
155 case 0:
156 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000157 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 case 1:
159 result = PyList_GET_ITEM(list, 0);
160 Py_INCREF(result);
161 Py_DECREF(list);
162 return result;
163 }
164
165 /* two or more elements: slice out a suitable separator from the
166 first member, and use that to join the entire list */
167
168 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
169 if (!joiner)
170 return NULL;
171
172 function = PyObject_GetAttrString(joiner, "join");
173 if (!function) {
174 Py_DECREF(joiner);
175 return NULL;
176 }
177
178 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000179 if (!args)
180 return NULL;
181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000182 PyTuple_SET_ITEM(args, 0, list);
183
184 result = PyObject_CallObject(function, args);
185
186 Py_DECREF(args); /* also removes list */
187 Py_DECREF(function);
188 Py_DECREF(joiner);
189
190 return result;
191}
192
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000193/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200194/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000195
196typedef struct {
197
198 /* attributes (a dictionary object), or None if no attributes */
199 PyObject* attrib;
200
201 /* child elements */
202 int length; /* actual number of items */
203 int allocated; /* allocated items */
204
205 /* this either points to _children or to a malloced buffer */
206 PyObject* *children;
207
208 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210} ElementObjectExtra;
211
212typedef struct {
213 PyObject_HEAD
214
215 /* element tag (a string). */
216 PyObject* tag;
217
218 /* text before first child. note that this is a tagged pointer;
219 use JOIN_OBJ to get the object pointer. the join flag is used
220 to distinguish lists created by the tree builder from lists
221 assigned to the attribute by application code; the former
222 should be joined before being returned to the user, the latter
223 should be left intact. */
224 PyObject* text;
225
226 /* text after this element, in parent. note that this is a tagged
227 pointer; use JOIN_OBJ to get the object pointer. */
228 PyObject* tail;
229
230 ElementObjectExtra* extra;
231
232} ElementObject;
233
Neal Norwitz227b5332006-03-22 09:28:35 +0000234static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235
Christian Heimes90aa7642007-12-19 02:45:37 +0000236#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237
238/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200239/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243{
244 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
245 if (!self->extra)
246 return -1;
247
248 if (!attrib)
249 attrib = Py_None;
250
251 Py_INCREF(attrib);
252 self->extra->attrib = attrib;
253
254 self->extra->length = 0;
255 self->extra->allocated = STATIC_CHILDREN;
256 self->extra->children = self->extra->_children;
257
258 return 0;
259}
260
261LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200262dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263{
264 int i;
265
266 Py_DECREF(self->extra->attrib);
267
268 for (i = 0; i < self->extra->length; i++)
269 Py_DECREF(self->extra->children[i]);
270
271 if (self->extra->children != self->extra->_children)
272 PyObject_Free(self->extra->children);
273
274 PyObject_Free(self->extra);
275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
285 self = PyObject_New(ElementObject, &Element_Type);
286 if (self == NULL)
287 return NULL;
288
289 /* use None for empty dictionaries */
290 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
291 attrib = Py_None;
292
293 self->extra = NULL;
294
295 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200296 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000297 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000299 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000300 }
301
302 Py_INCREF(tag);
303 self->tag = tag;
304
305 Py_INCREF(Py_None);
306 self->text = Py_None;
307
308 Py_INCREF(Py_None);
309 self->tail = Py_None;
310
311 ALLOC(sizeof(ElementObject), "create element");
312
313 return (PyObject*) self;
314}
315
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
331 }
332 return (PyObject *)e;
333}
334
335static int
336element_init(PyObject *self, PyObject *args, PyObject *kwds)
337{
338 PyObject *tag;
339 PyObject *tmp;
340 PyObject *attrib = NULL;
341 ElementObject *self_elem;
342
343 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
344 return -1;
345
346 if (attrib || kwds) {
347 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
348 if (!attrib)
349 return -1;
350 if (kwds)
351 PyDict_Update(attrib, kwds);
352 } else {
353 Py_INCREF(Py_None);
354 attrib = Py_None;
355 }
356
357 self_elem = (ElementObject *)self;
358
359 /* Use None for empty dictionaries */
360 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
361 Py_INCREF(Py_None);
362 attrib = Py_None;
363 }
364
365 if (attrib != Py_None) {
366 if (create_extra(self_elem, attrib) < 0) {
367 PyObject_Del(self_elem);
368 return -1;
369 }
370 }
371
372 /* If create_extra needed attrib, it took a reference to it, so we can
373 * release ours anyway.
374 */
375 Py_DECREF(attrib);
376
377 /* Replace the objects already pointed to by tag, text and tail. */
378 tmp = self_elem->tag;
379 self_elem->tag = tag;
380 Py_INCREF(tag);
381 Py_DECREF(tmp);
382
383 tmp = self_elem->text;
384 self_elem->text = Py_None;
385 Py_INCREF(Py_None);
386 Py_DECREF(JOIN_OBJ(tmp));
387
388 tmp = self_elem->tail;
389 self_elem->tail = Py_None;
390 Py_INCREF(Py_None);
391 Py_DECREF(JOIN_OBJ(tmp));
392
393 return 0;
394}
395
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000396LOCAL(int)
397element_resize(ElementObject* self, int extra)
398{
399 int size;
400 PyObject* *children;
401
402 /* make sure self->children can hold the given number of extra
403 elements. set an exception and return -1 if allocation failed */
404
405 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000407
408 size = self->extra->length + extra;
409
410 if (size > self->extra->allocated) {
411 /* use Python 2.4's list growth strategy */
412 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000413 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100414 * which needs at least 4 bytes.
415 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000416 * be safe.
417 */
418 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000420 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100421 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000422 * false alarm always assume at least one child to be safe.
423 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000424 children = PyObject_Realloc(self->extra->children,
425 size * sizeof(PyObject*));
426 if (!children)
427 goto nomemory;
428 } else {
429 children = PyObject_Malloc(size * sizeof(PyObject*));
430 if (!children)
431 goto nomemory;
432 /* copy existing children from static area to malloc buffer */
433 memcpy(children, self->extra->children,
434 self->extra->length * sizeof(PyObject*));
435 }
436 self->extra->children = children;
437 self->extra->allocated = size;
438 }
439
440 return 0;
441
442 nomemory:
443 PyErr_NoMemory();
444 return -1;
445}
446
447LOCAL(int)
448element_add_subelement(ElementObject* self, PyObject* element)
449{
450 /* add a child element to a parent */
451
452 if (element_resize(self, 1) < 0)
453 return -1;
454
455 Py_INCREF(element);
456 self->extra->children[self->extra->length] = element;
457
458 self->extra->length++;
459
460 return 0;
461}
462
463LOCAL(PyObject*)
464element_get_attrib(ElementObject* self)
465{
466 /* return borrowed reference to attrib dictionary */
467 /* note: this function assumes that the extra section exists */
468
469 PyObject* res = self->extra->attrib;
470
471 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000472 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 /* create missing dictionary */
474 res = PyDict_New();
475 if (!res)
476 return NULL;
477 self->extra->attrib = res;
478 }
479
480 return res;
481}
482
483LOCAL(PyObject*)
484element_get_text(ElementObject* self)
485{
486 /* return borrowed reference to text attribute */
487
488 PyObject* res = self->text;
489
490 if (JOIN_GET(res)) {
491 res = JOIN_OBJ(res);
492 if (PyList_CheckExact(res)) {
493 res = list_join(res);
494 if (!res)
495 return NULL;
496 self->text = res;
497 }
498 }
499
500 return res;
501}
502
503LOCAL(PyObject*)
504element_get_tail(ElementObject* self)
505{
506 /* return borrowed reference to text attribute */
507
508 PyObject* res = self->tail;
509
510 if (JOIN_GET(res)) {
511 res = JOIN_OBJ(res);
512 if (PyList_CheckExact(res)) {
513 res = list_join(res);
514 if (!res)
515 return NULL;
516 self->tail = res;
517 }
518 }
519
520 return res;
521}
522
523static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524subelement(PyObject* self, PyObject* args, PyObject* kw)
525{
526 PyObject* elem;
527
528 ElementObject* parent;
529 PyObject* tag;
530 PyObject* attrib = NULL;
531 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
532 &Element_Type, &parent, &tag,
533 &PyDict_Type, &attrib))
534 return NULL;
535
536 if (attrib || kw) {
537 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
538 if (!attrib)
539 return NULL;
540 if (kw)
541 PyDict_Update(attrib, kw);
542 } else {
543 Py_INCREF(Py_None);
544 attrib = Py_None;
545 }
546
Eli Bendersky092af1f2012-03-04 07:14:03 +0200547 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548
549 Py_DECREF(attrib);
550
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000551 if (element_add_subelement(parent, elem) < 0) {
552 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000554 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000555
556 return elem;
557}
558
559static void
560element_dealloc(ElementObject* self)
561{
562 if (self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200563 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000564
565 /* discard attributes */
566 Py_DECREF(self->tag);
567 Py_DECREF(JOIN_OBJ(self->text));
568 Py_DECREF(JOIN_OBJ(self->tail));
569
570 RELEASE(sizeof(ElementObject), "destroy element");
571
Eli Bendersky092af1f2012-03-04 07:14:03 +0200572 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573}
574
575/* -------------------------------------------------------------------- */
576/* methods (in alphabetical order) */
577
578static PyObject*
579element_append(ElementObject* self, PyObject* args)
580{
581 PyObject* element;
582 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
583 return NULL;
584
585 if (element_add_subelement(self, element) < 0)
586 return NULL;
587
588 Py_RETURN_NONE;
589}
590
591static PyObject*
592element_clear(ElementObject* self, PyObject* args)
593{
594 if (!PyArg_ParseTuple(args, ":clear"))
595 return NULL;
596
597 if (self->extra) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200598 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 self->extra = NULL;
600 }
601
602 Py_INCREF(Py_None);
603 Py_DECREF(JOIN_OBJ(self->text));
604 self->text = Py_None;
605
606 Py_INCREF(Py_None);
607 Py_DECREF(JOIN_OBJ(self->tail));
608 self->tail = Py_None;
609
610 Py_RETURN_NONE;
611}
612
613static PyObject*
614element_copy(ElementObject* self, PyObject* args)
615{
616 int i;
617 ElementObject* element;
618
619 if (!PyArg_ParseTuple(args, ":__copy__"))
620 return NULL;
621
Eli Bendersky092af1f2012-03-04 07:14:03 +0200622 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 self->tag, (self->extra) ? self->extra->attrib : Py_None
624 );
625 if (!element)
626 return NULL;
627
628 Py_DECREF(JOIN_OBJ(element->text));
629 element->text = self->text;
630 Py_INCREF(JOIN_OBJ(element->text));
631
632 Py_DECREF(JOIN_OBJ(element->tail));
633 element->tail = self->tail;
634 Py_INCREF(JOIN_OBJ(element->tail));
635
636 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100637
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000638 if (element_resize(element, self->extra->length) < 0) {
639 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000641 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
643 for (i = 0; i < self->extra->length; i++) {
644 Py_INCREF(self->extra->children[i]);
645 element->extra->children[i] = self->extra->children[i];
646 }
647
648 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100649
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650 }
651
652 return (PyObject*) element;
653}
654
655static PyObject*
656element_deepcopy(ElementObject* self, PyObject* args)
657{
658 int i;
659 ElementObject* element;
660 PyObject* tag;
661 PyObject* attrib;
662 PyObject* text;
663 PyObject* tail;
664 PyObject* id;
665
666 PyObject* memo;
667 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
668 return NULL;
669
670 tag = deepcopy(self->tag, memo);
671 if (!tag)
672 return NULL;
673
674 if (self->extra) {
675 attrib = deepcopy(self->extra->attrib, memo);
676 if (!attrib) {
677 Py_DECREF(tag);
678 return NULL;
679 }
680 } else {
681 Py_INCREF(Py_None);
682 attrib = Py_None;
683 }
684
Eli Bendersky092af1f2012-03-04 07:14:03 +0200685 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000686
687 Py_DECREF(tag);
688 Py_DECREF(attrib);
689
690 if (!element)
691 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 text = deepcopy(JOIN_OBJ(self->text), memo);
694 if (!text)
695 goto error;
696 Py_DECREF(element->text);
697 element->text = JOIN_SET(text, JOIN_GET(self->text));
698
699 tail = deepcopy(JOIN_OBJ(self->tail), memo);
700 if (!tail)
701 goto error;
702 Py_DECREF(element->tail);
703 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
704
705 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 if (element_resize(element, self->extra->length) < 0)
708 goto error;
709
710 for (i = 0; i < self->extra->length; i++) {
711 PyObject* child = deepcopy(self->extra->children[i], memo);
712 if (!child) {
713 element->extra->length = i;
714 goto error;
715 }
716 element->extra->children[i] = child;
717 }
718
719 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 }
722
723 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000724 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000725 if (!id)
726 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727
728 i = PyDict_SetItem(memo, id, (PyObject*) element);
729
730 Py_DECREF(id);
731
732 if (i < 0)
733 goto error;
734
735 return (PyObject*) element;
736
737 error:
738 Py_DECREF(element);
739 return NULL;
740}
741
742LOCAL(int)
743checkpath(PyObject* tag)
744{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000745 Py_ssize_t i;
746 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747
748 /* check if a tag contains an xpath character */
749
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000750#define PATHCHAR(ch) \
751 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
755 void *data = PyUnicode_DATA(tag);
756 unsigned int kind = PyUnicode_KIND(tag);
757 for (i = 0; i < len; i++) {
758 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
759 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000760 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 return 1;
765 }
766 return 0;
767 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000768 if (PyBytes_Check(tag)) {
769 char *p = PyBytes_AS_STRING(tag);
770 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 if (p[i] == '{')
772 check = 0;
773 else if (p[i] == '}')
774 check = 1;
775 else if (check && PATHCHAR(p[i]))
776 return 1;
777 }
778 return 0;
779 }
780
781 return 1; /* unknown type; might be path expression */
782}
783
784static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000785element_extend(ElementObject* self, PyObject* args)
786{
787 PyObject* seq;
788 Py_ssize_t i, seqlen = 0;
789
790 PyObject* seq_in;
791 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
792 return NULL;
793
794 seq = PySequence_Fast(seq_in, "");
795 if (!seq) {
796 PyErr_Format(
797 PyExc_TypeError,
798 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
799 );
800 return NULL;
801 }
802
803 seqlen = PySequence_Size(seq);
804 for (i = 0; i < seqlen; i++) {
805 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200806 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
807 Py_DECREF(seq);
808 PyErr_Format(
809 PyExc_TypeError,
810 "expected an Element, not \"%.200s\"",
811 Py_TYPE(element)->tp_name);
812 return NULL;
813 }
814
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000815 if (element_add_subelement(self, element) < 0) {
816 Py_DECREF(seq);
817 return NULL;
818 }
819 }
820
821 Py_DECREF(seq);
822
823 Py_RETURN_NONE;
824}
825
826static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000827element_find(ElementObject* self, PyObject* args)
828{
829 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000831 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200832
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000833 if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000834 return NULL;
835
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200836 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200837 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200838 return _PyObject_CallMethodId(
839 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000840 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200841 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000842
843 if (!self->extra)
844 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100845
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000846 for (i = 0; i < self->extra->length; i++) {
847 PyObject* item = self->extra->children[i];
848 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000849 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850 Py_INCREF(item);
851 return item;
852 }
853 }
854
855 Py_RETURN_NONE;
856}
857
858static PyObject*
859element_findtext(ElementObject* self, PyObject* args)
860{
861 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000862 PyObject* tag;
863 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000864 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200865 _Py_IDENTIFIER(findtext);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200866
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000867 if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000868 return NULL;
869
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000870 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200871 return _PyObject_CallMethodId(
872 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000873 );
874
875 if (!self->extra) {
876 Py_INCREF(default_value);
877 return default_value;
878 }
879
880 for (i = 0; i < self->extra->length; i++) {
881 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000882 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
883
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000884 PyObject* text = element_get_text(item);
885 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000886 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000887 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000888 return text;
889 }
890 }
891
892 Py_INCREF(default_value);
893 return default_value;
894}
895
896static PyObject*
897element_findall(ElementObject* self, PyObject* args)
898{
899 int i;
900 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000901 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000902 PyObject* namespaces = Py_None;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200903
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000904 if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905 return NULL;
906
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200907 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200908 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200909 return _PyObject_CallMethodId(
910 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000911 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200912 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000913
914 out = PyList_New(0);
915 if (!out)
916 return NULL;
917
918 if (!self->extra)
919 return out;
920
921 for (i = 0; i < self->extra->length; i++) {
922 PyObject* item = self->extra->children[i];
923 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000924 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000925 if (PyList_Append(out, item) < 0) {
926 Py_DECREF(out);
927 return NULL;
928 }
929 }
930 }
931
932 return out;
933}
934
935static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000936element_iterfind(ElementObject* self, PyObject* args)
937{
938 PyObject* tag;
939 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200940 _Py_IDENTIFIER(iterfind);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200941
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000942 if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
943 return NULL;
944
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200945 return _PyObject_CallMethodId(
946 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000947 );
948}
949
950static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000951element_get(ElementObject* self, PyObject* args)
952{
953 PyObject* value;
954
955 PyObject* key;
956 PyObject* default_value = Py_None;
957 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
958 return NULL;
959
960 if (!self->extra || self->extra->attrib == Py_None)
961 value = default_value;
962 else {
963 value = PyDict_GetItem(self->extra->attrib, key);
964 if (!value)
965 value = default_value;
966 }
967
968 Py_INCREF(value);
969 return value;
970}
971
972static PyObject*
973element_getchildren(ElementObject* self, PyObject* args)
974{
975 int i;
976 PyObject* list;
977
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000978 /* FIXME: report as deprecated? */
979
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 if (!PyArg_ParseTuple(args, ":getchildren"))
981 return NULL;
982
983 if (!self->extra)
984 return PyList_New(0);
985
986 list = PyList_New(self->extra->length);
987 if (!list)
988 return NULL;
989
990 for (i = 0; i < self->extra->length; i++) {
991 PyObject* item = self->extra->children[i];
992 Py_INCREF(item);
993 PyList_SET_ITEM(list, i, item);
994 }
995
996 return list;
997}
998
999static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001000element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001001{
1002 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001003
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001005 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001006 return NULL;
1007
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001008 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001009 PyErr_SetString(
1010 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001011 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001012 );
1013 return NULL;
1014 }
1015
1016 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001017 if (!args)
1018 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001019
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1021 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1022
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001023 result = PyObject_CallObject(elementtree_iter_obj, args);
1024
1025 Py_DECREF(args);
1026
1027 return result;
1028}
1029
1030
1031static PyObject*
1032element_itertext(ElementObject* self, PyObject* args)
1033{
1034 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001035
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001036 if (!PyArg_ParseTuple(args, ":itertext"))
1037 return NULL;
1038
1039 if (!elementtree_itertext_obj) {
1040 PyErr_SetString(
1041 PyExc_RuntimeError,
1042 "itertext helper not found"
1043 );
1044 return NULL;
1045 }
1046
1047 args = PyTuple_New(1);
1048 if (!args)
1049 return NULL;
1050
1051 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1052
1053 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001054
1055 Py_DECREF(args);
1056
1057 return result;
1058}
1059
1060static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001061element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001063 ElementObject* self = (ElementObject*) self_;
1064
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001065 if (!self->extra || index < 0 || index >= self->extra->length) {
1066 PyErr_SetString(
1067 PyExc_IndexError,
1068 "child index out of range"
1069 );
1070 return NULL;
1071 }
1072
1073 Py_INCREF(self->extra->children[index]);
1074 return self->extra->children[index];
1075}
1076
1077static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078element_insert(ElementObject* self, PyObject* args)
1079{
1080 int i;
1081
1082 int index;
1083 PyObject* element;
1084 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1085 &Element_Type, &element))
1086 return NULL;
1087
1088 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001089 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001091 if (index < 0) {
1092 index += self->extra->length;
1093 if (index < 0)
1094 index = 0;
1095 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096 if (index > self->extra->length)
1097 index = self->extra->length;
1098
1099 if (element_resize(self, 1) < 0)
1100 return NULL;
1101
1102 for (i = self->extra->length; i > index; i--)
1103 self->extra->children[i] = self->extra->children[i-1];
1104
1105 Py_INCREF(element);
1106 self->extra->children[index] = element;
1107
1108 self->extra->length++;
1109
1110 Py_RETURN_NONE;
1111}
1112
1113static PyObject*
1114element_items(ElementObject* self, PyObject* args)
1115{
1116 if (!PyArg_ParseTuple(args, ":items"))
1117 return NULL;
1118
1119 if (!self->extra || self->extra->attrib == Py_None)
1120 return PyList_New(0);
1121
1122 return PyDict_Items(self->extra->attrib);
1123}
1124
1125static PyObject*
1126element_keys(ElementObject* self, PyObject* args)
1127{
1128 if (!PyArg_ParseTuple(args, ":keys"))
1129 return NULL;
1130
1131 if (!self->extra || self->extra->attrib == Py_None)
1132 return PyList_New(0);
1133
1134 return PyDict_Keys(self->extra->attrib);
1135}
1136
Martin v. Löwis18e16552006-02-15 17:27:45 +00001137static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138element_length(ElementObject* self)
1139{
1140 if (!self->extra)
1141 return 0;
1142
1143 return self->extra->length;
1144}
1145
1146static PyObject*
1147element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1148{
1149 PyObject* elem;
1150
1151 PyObject* tag;
1152 PyObject* attrib;
1153 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1154 return NULL;
1155
1156 attrib = PyDict_Copy(attrib);
1157 if (!attrib)
1158 return NULL;
1159
Eli Bendersky092af1f2012-03-04 07:14:03 +02001160 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001161
1162 Py_DECREF(attrib);
1163
1164 return elem;
1165}
1166
1167static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168element_remove(ElementObject* self, PyObject* args)
1169{
1170 int i;
1171
1172 PyObject* element;
1173 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1174 return NULL;
1175
1176 if (!self->extra) {
1177 /* element has no children, so raise exception */
1178 PyErr_SetString(
1179 PyExc_ValueError,
1180 "list.remove(x): x not in list"
1181 );
1182 return NULL;
1183 }
1184
1185 for (i = 0; i < self->extra->length; i++) {
1186 if (self->extra->children[i] == element)
1187 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001188 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001189 break;
1190 }
1191
1192 if (i == self->extra->length) {
1193 /* element is not in children, so raise exception */
1194 PyErr_SetString(
1195 PyExc_ValueError,
1196 "list.remove(x): x not in list"
1197 );
1198 return NULL;
1199 }
1200
1201 Py_DECREF(self->extra->children[i]);
1202
1203 self->extra->length--;
1204
1205 for (; i < self->extra->length; i++)
1206 self->extra->children[i] = self->extra->children[i+1];
1207
1208 Py_RETURN_NONE;
1209}
1210
1211static PyObject*
1212element_repr(ElementObject* self)
1213{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001214 if (self->tag)
1215 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1216 else
1217 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218}
1219
1220static PyObject*
1221element_set(ElementObject* self, PyObject* args)
1222{
1223 PyObject* attrib;
1224
1225 PyObject* key;
1226 PyObject* value;
1227 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1228 return NULL;
1229
1230 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001231 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232
1233 attrib = element_get_attrib(self);
1234 if (!attrib)
1235 return NULL;
1236
1237 if (PyDict_SetItem(attrib, key, value) < 0)
1238 return NULL;
1239
1240 Py_RETURN_NONE;
1241}
1242
1243static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001244element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001245{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001246 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001247 int i;
1248 PyObject* old;
1249
1250 if (!self->extra || index < 0 || index >= self->extra->length) {
1251 PyErr_SetString(
1252 PyExc_IndexError,
1253 "child assignment index out of range");
1254 return -1;
1255 }
1256
1257 old = self->extra->children[index];
1258
1259 if (item) {
1260 Py_INCREF(item);
1261 self->extra->children[index] = item;
1262 } else {
1263 self->extra->length--;
1264 for (i = index; i < self->extra->length; i++)
1265 self->extra->children[i] = self->extra->children[i+1];
1266 }
1267
1268 Py_DECREF(old);
1269
1270 return 0;
1271}
1272
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001273static PyObject*
1274element_subscr(PyObject* self_, PyObject* item)
1275{
1276 ElementObject* self = (ElementObject*) self_;
1277
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001278 if (PyIndex_Check(item)) {
1279 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001280
1281 if (i == -1 && PyErr_Occurred()) {
1282 return NULL;
1283 }
1284 if (i < 0 && self->extra)
1285 i += self->extra->length;
1286 return element_getitem(self_, i);
1287 }
1288 else if (PySlice_Check(item)) {
1289 Py_ssize_t start, stop, step, slicelen, cur, i;
1290 PyObject* list;
1291
1292 if (!self->extra)
1293 return PyList_New(0);
1294
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001295 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001296 self->extra->length,
1297 &start, &stop, &step, &slicelen) < 0) {
1298 return NULL;
1299 }
1300
1301 if (slicelen <= 0)
1302 return PyList_New(0);
1303 else {
1304 list = PyList_New(slicelen);
1305 if (!list)
1306 return NULL;
1307
1308 for (cur = start, i = 0; i < slicelen;
1309 cur += step, i++) {
1310 PyObject* item = self->extra->children[cur];
1311 Py_INCREF(item);
1312 PyList_SET_ITEM(list, i, item);
1313 }
1314
1315 return list;
1316 }
1317 }
1318 else {
1319 PyErr_SetString(PyExc_TypeError,
1320 "element indices must be integers");
1321 return NULL;
1322 }
1323}
1324
1325static int
1326element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1327{
1328 ElementObject* self = (ElementObject*) self_;
1329
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001330 if (PyIndex_Check(item)) {
1331 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001332
1333 if (i == -1 && PyErr_Occurred()) {
1334 return -1;
1335 }
1336 if (i < 0 && self->extra)
1337 i += self->extra->length;
1338 return element_setitem(self_, i, value);
1339 }
1340 else if (PySlice_Check(item)) {
1341 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1342
1343 PyObject* recycle = NULL;
1344 PyObject* seq = NULL;
1345
1346 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001347 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001348
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001349 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001350 self->extra->length,
1351 &start, &stop, &step, &slicelen) < 0) {
1352 return -1;
1353 }
1354
Eli Bendersky865756a2012-03-09 13:38:15 +02001355 if (value == NULL) {
1356 /* Delete slice */
1357 size_t cur;
1358 Py_ssize_t i;
1359
1360 if (slicelen <= 0)
1361 return 0;
1362
1363 /* Since we're deleting, the direction of the range doesn't matter,
1364 * so for simplicity make it always ascending.
1365 */
1366 if (step < 0) {
1367 stop = start + 1;
1368 start = stop + step * (slicelen - 1) - 1;
1369 step = -step;
1370 }
1371
1372 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1373
1374 /* recycle is a list that will contain all the children
1375 * scheduled for removal.
1376 */
1377 if (!(recycle = PyList_New(slicelen))) {
1378 PyErr_NoMemory();
1379 return -1;
1380 }
1381
1382 /* This loop walks over all the children that have to be deleted,
1383 * with cur pointing at them. num_moved is the amount of children
1384 * until the next deleted child that have to be "shifted down" to
1385 * occupy the deleted's places.
1386 * Note that in the ith iteration, shifting is done i+i places down
1387 * because i children were already removed.
1388 */
1389 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1390 /* Compute how many children have to be moved, clipping at the
1391 * list end.
1392 */
1393 Py_ssize_t num_moved = step - 1;
1394 if (cur + step >= (size_t)self->extra->length) {
1395 num_moved = self->extra->length - cur - 1;
1396 }
1397
1398 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1399
1400 memmove(
1401 self->extra->children + cur - i,
1402 self->extra->children + cur + 1,
1403 num_moved * sizeof(PyObject *));
1404 }
1405
1406 /* Leftover "tail" after the last removed child */
1407 cur = start + (size_t)slicelen * step;
1408 if (cur < (size_t)self->extra->length) {
1409 memmove(
1410 self->extra->children + cur - slicelen,
1411 self->extra->children + cur,
1412 (self->extra->length - cur) * sizeof(PyObject *));
1413 }
1414
1415 self->extra->length -= slicelen;
1416
1417 /* Discard the recycle list with all the deleted sub-elements */
1418 Py_XDECREF(recycle);
1419 return 0;
1420 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001421 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001422 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001423 seq = PySequence_Fast(value, "");
1424 if (!seq) {
1425 PyErr_Format(
1426 PyExc_TypeError,
1427 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1428 );
1429 return -1;
1430 }
1431 newlen = PySequence_Size(seq);
1432 }
1433
1434 if (step != 1 && newlen != slicelen)
1435 {
1436 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001437 "attempt to assign sequence of size %zd "
1438 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001439 newlen, slicelen
1440 );
1441 return -1;
1442 }
1443
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001444 /* Resize before creating the recycle bin, to prevent refleaks. */
1445 if (newlen > slicelen) {
1446 if (element_resize(self, newlen - slicelen) < 0) {
1447 if (seq) {
1448 Py_DECREF(seq);
1449 }
1450 return -1;
1451 }
1452 }
1453
1454 if (slicelen > 0) {
1455 /* to avoid recursive calls to this method (via decref), move
1456 old items to the recycle bin here, and get rid of them when
1457 we're done modifying the element */
1458 recycle = PyList_New(slicelen);
1459 if (!recycle) {
1460 if (seq) {
1461 Py_DECREF(seq);
1462 }
1463 return -1;
1464 }
1465 for (cur = start, i = 0; i < slicelen;
1466 cur += step, i++)
1467 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1468 }
1469
1470 if (newlen < slicelen) {
1471 /* delete slice */
1472 for (i = stop; i < self->extra->length; i++)
1473 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1474 } else if (newlen > slicelen) {
1475 /* insert slice */
1476 for (i = self->extra->length-1; i >= stop; i--)
1477 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1478 }
1479
1480 /* replace the slice */
1481 for (cur = start, i = 0; i < newlen;
1482 cur += step, i++) {
1483 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1484 Py_INCREF(element);
1485 self->extra->children[cur] = element;
1486 }
1487
1488 self->extra->length += newlen - slicelen;
1489
1490 if (seq) {
1491 Py_DECREF(seq);
1492 }
1493
1494 /* discard the recycle bin, and everything in it */
1495 Py_XDECREF(recycle);
1496
1497 return 0;
1498 }
1499 else {
1500 PyErr_SetString(PyExc_TypeError,
1501 "element indices must be integers");
1502 return -1;
1503 }
1504}
1505
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001506static PyMethodDef element_methods[] = {
1507
1508 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1509
1510 {"get", (PyCFunction) element_get, METH_VARARGS},
1511 {"set", (PyCFunction) element_set, METH_VARARGS},
1512
1513 {"find", (PyCFunction) element_find, METH_VARARGS},
1514 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1515 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1516
1517 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001518 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1520 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1521
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001522 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1523 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
1524 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS},
1525
1526 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1528
1529 {"items", (PyCFunction) element_items, METH_VARARGS},
1530 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1531
1532 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1533
1534 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1535 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1536
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001537 {NULL, NULL}
1538};
1539
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001540static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001541element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542{
1543 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001544 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001546 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001547 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001548
Alexander Belopolskye239d232010-12-08 23:31:48 +00001549 if (name == NULL)
1550 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001551
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001552 /* handle common attributes first */
1553 if (strcmp(name, "tag") == 0) {
1554 res = self->tag;
1555 Py_INCREF(res);
1556 return res;
1557 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001558 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001559 Py_INCREF(res);
1560 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561 }
1562
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001563 /* methods */
1564 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1565 if (res)
1566 return res;
1567
1568 /* less common attributes */
1569 if (strcmp(name, "tail") == 0) {
1570 PyErr_Clear();
1571 res = element_get_tail(self);
1572 } else if (strcmp(name, "attrib") == 0) {
1573 PyErr_Clear();
1574 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001575 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001576 res = element_get_attrib(self);
1577 }
1578
1579 if (!res)
1580 return NULL;
1581
1582 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 return res;
1584}
1585
1586static int
1587element_setattr(ElementObject* self, const char* name, PyObject* value)
1588{
1589 if (value == NULL) {
1590 PyErr_SetString(
1591 PyExc_AttributeError,
1592 "can't delete element attributes"
1593 );
1594 return -1;
1595 }
1596
1597 if (strcmp(name, "tag") == 0) {
1598 Py_DECREF(self->tag);
1599 self->tag = value;
1600 Py_INCREF(self->tag);
1601 } else if (strcmp(name, "text") == 0) {
1602 Py_DECREF(JOIN_OBJ(self->text));
1603 self->text = value;
1604 Py_INCREF(self->text);
1605 } else if (strcmp(name, "tail") == 0) {
1606 Py_DECREF(JOIN_OBJ(self->tail));
1607 self->tail = value;
1608 Py_INCREF(self->tail);
1609 } else if (strcmp(name, "attrib") == 0) {
1610 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001611 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612 Py_DECREF(self->extra->attrib);
1613 self->extra->attrib = value;
1614 Py_INCREF(self->extra->attrib);
1615 } else {
1616 PyErr_SetString(PyExc_AttributeError, name);
1617 return -1;
1618 }
1619
1620 return 0;
1621}
1622
1623static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001624 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001625 0, /* sq_concat */
1626 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001627 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001628 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001629 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001630 0,
1631};
1632
1633static PyMappingMethods element_as_mapping = {
1634 (lenfunc) element_length,
1635 (binaryfunc) element_subscr,
1636 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637};
1638
Neal Norwitz227b5332006-03-22 09:28:35 +00001639static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001640 PyVarObject_HEAD_INIT(NULL, 0)
1641 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001643 (destructor)element_dealloc, /* tp_dealloc */
1644 0, /* tp_print */
1645 0, /* tp_getattr */
1646 (setattrfunc)element_setattr, /* tp_setattr */
1647 0, /* tp_reserved */
1648 (reprfunc)element_repr, /* tp_repr */
1649 0, /* tp_as_number */
1650 &element_as_sequence, /* tp_as_sequence */
1651 &element_as_mapping, /* tp_as_mapping */
1652 0, /* tp_hash */
1653 0, /* tp_call */
1654 0, /* tp_str */
1655 (getattrofunc)element_getattro, /* tp_getattro */
1656 0, /* tp_setattro */
1657 0, /* tp_as_buffer */
1658 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1659 0, /* tp_doc */
1660 0, /* tp_traverse */
1661 0, /* tp_clear */
1662 0, /* tp_richcompare */
1663 0, /* tp_weaklistoffset */
1664 0, /* tp_iter */
1665 0, /* tp_iternext */
1666 element_methods, /* tp_methods */
1667 0, /* tp_members */
1668 0, /* tp_getset */
1669 0, /* tp_base */
1670 0, /* tp_dict */
1671 0, /* tp_descr_get */
1672 0, /* tp_descr_set */
1673 0, /* tp_dictoffset */
1674 (initproc)element_init, /* tp_init */
1675 PyType_GenericAlloc, /* tp_alloc */
1676 element_new, /* tp_new */
1677 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678};
1679
1680/* ==================================================================== */
1681/* the tree builder type */
1682
1683typedef struct {
1684 PyObject_HEAD
1685
1686 PyObject* root; /* root node (first created node) */
1687
1688 ElementObject* this; /* current node */
1689 ElementObject* last; /* most recently created node */
1690
1691 PyObject* data; /* data collector (string or list), or NULL */
1692
1693 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001694 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695
1696 /* element tracing */
1697 PyObject* events; /* list of events, or NULL if not collecting */
1698 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1699 PyObject* end_event_obj;
1700 PyObject* start_ns_event_obj;
1701 PyObject* end_ns_event_obj;
1702
1703} TreeBuilderObject;
1704
Neal Norwitz227b5332006-03-22 09:28:35 +00001705static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001706
Christian Heimes90aa7642007-12-19 02:45:37 +00001707#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001708
1709/* -------------------------------------------------------------------- */
1710/* constructor and destructor */
1711
1712LOCAL(PyObject*)
1713treebuilder_new(void)
1714{
1715 TreeBuilderObject* self;
1716
1717 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1718 if (self == NULL)
1719 return NULL;
1720
1721 self->root = NULL;
1722
1723 Py_INCREF(Py_None);
1724 self->this = (ElementObject*) Py_None;
1725
1726 Py_INCREF(Py_None);
1727 self->last = (ElementObject*) Py_None;
1728
1729 self->data = NULL;
1730
1731 self->stack = PyList_New(20);
1732 self->index = 0;
1733
1734 self->events = NULL;
1735 self->start_event_obj = self->end_event_obj = NULL;
1736 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1737
1738 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1739
1740 return (PyObject*) self;
1741}
1742
1743static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001744treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001745{
1746 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1747 return NULL;
1748
1749 return treebuilder_new();
1750}
1751
1752static void
1753treebuilder_dealloc(TreeBuilderObject* self)
1754{
1755 Py_XDECREF(self->end_ns_event_obj);
1756 Py_XDECREF(self->start_ns_event_obj);
1757 Py_XDECREF(self->end_event_obj);
1758 Py_XDECREF(self->start_event_obj);
1759 Py_XDECREF(self->events);
1760 Py_DECREF(self->stack);
1761 Py_XDECREF(self->data);
1762 Py_DECREF(self->last);
1763 Py_DECREF(self->this);
1764 Py_XDECREF(self->root);
1765
1766 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1767
1768 PyObject_Del(self);
1769}
1770
1771/* -------------------------------------------------------------------- */
1772/* handlers */
1773
1774LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001775treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1776 PyObject* attrib)
1777{
1778 PyObject* node;
1779 PyObject* this;
1780
1781 if (self->data) {
1782 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001783 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784 self->last->text = JOIN_SET(
1785 self->data, PyList_CheckExact(self->data)
1786 );
1787 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001788 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789 self->last->tail = JOIN_SET(
1790 self->data, PyList_CheckExact(self->data)
1791 );
1792 }
1793 self->data = NULL;
1794 }
1795
Eli Bendersky092af1f2012-03-04 07:14:03 +02001796 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001797 if (!node)
1798 return NULL;
1799
1800 this = (PyObject*) self->this;
1801
1802 if (this != Py_None) {
1803 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805 } else {
1806 if (self->root) {
1807 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001808 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001809 "multiple elements on top level"
1810 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001811 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001812 }
1813 Py_INCREF(node);
1814 self->root = node;
1815 }
1816
1817 if (self->index < PyList_GET_SIZE(self->stack)) {
1818 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001819 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001820 Py_INCREF(this);
1821 } else {
1822 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001823 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001824 }
1825 self->index++;
1826
1827 Py_DECREF(this);
1828 Py_INCREF(node);
1829 self->this = (ElementObject*) node;
1830
1831 Py_DECREF(self->last);
1832 Py_INCREF(node);
1833 self->last = (ElementObject*) node;
1834
1835 if (self->start_event_obj) {
1836 PyObject* res;
1837 PyObject* action = self->start_event_obj;
1838 res = PyTuple_New(2);
1839 if (res) {
1840 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1841 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1842 PyList_Append(self->events, res);
1843 Py_DECREF(res);
1844 } else
1845 PyErr_Clear(); /* FIXME: propagate error */
1846 }
1847
1848 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001849
1850 error:
1851 Py_DECREF(node);
1852 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853}
1854
1855LOCAL(PyObject*)
1856treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1857{
1858 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001859 if (self->last == (ElementObject*) Py_None) {
1860 /* ignore calls to data before the first call to start */
1861 Py_RETURN_NONE;
1862 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001863 /* store the first item as is */
1864 Py_INCREF(data); self->data = data;
1865 } else {
1866 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001867 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1868 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001869 /* expat often generates single character data sections; handle
1870 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001871 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1872 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001873 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001874 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001875 } else if (PyList_CheckExact(self->data)) {
1876 if (PyList_Append(self->data, data) < 0)
1877 return NULL;
1878 } else {
1879 PyObject* list = PyList_New(2);
1880 if (!list)
1881 return NULL;
1882 PyList_SET_ITEM(list, 0, self->data);
1883 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1884 self->data = list;
1885 }
1886 }
1887
1888 Py_RETURN_NONE;
1889}
1890
1891LOCAL(PyObject*)
1892treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1893{
1894 PyObject* item;
1895
1896 if (self->data) {
1897 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001898 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001899 self->last->text = JOIN_SET(
1900 self->data, PyList_CheckExact(self->data)
1901 );
1902 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001903 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904 self->last->tail = JOIN_SET(
1905 self->data, PyList_CheckExact(self->data)
1906 );
1907 }
1908 self->data = NULL;
1909 }
1910
1911 if (self->index == 0) {
1912 PyErr_SetString(
1913 PyExc_IndexError,
1914 "pop from empty stack"
1915 );
1916 return NULL;
1917 }
1918
1919 self->index--;
1920
1921 item = PyList_GET_ITEM(self->stack, self->index);
1922 Py_INCREF(item);
1923
1924 Py_DECREF(self->last);
1925
1926 self->last = (ElementObject*) self->this;
1927 self->this = (ElementObject*) item;
1928
1929 if (self->end_event_obj) {
1930 PyObject* res;
1931 PyObject* action = self->end_event_obj;
1932 PyObject* node = (PyObject*) self->last;
1933 res = PyTuple_New(2);
1934 if (res) {
1935 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1936 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1937 PyList_Append(self->events, res);
1938 Py_DECREF(res);
1939 } else
1940 PyErr_Clear(); /* FIXME: propagate error */
1941 }
1942
1943 Py_INCREF(self->last);
1944 return (PyObject*) self->last;
1945}
1946
1947LOCAL(void)
1948treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001949 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001950{
1951 PyObject* res;
1952 PyObject* action;
1953 PyObject* parcel;
1954
1955 if (!self->events)
1956 return;
1957
1958 if (start) {
1959 if (!self->start_ns_event_obj)
1960 return;
1961 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001962 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001963 if (!parcel)
1964 return;
1965 Py_INCREF(action);
1966 } else {
1967 if (!self->end_ns_event_obj)
1968 return;
1969 action = self->end_ns_event_obj;
1970 Py_INCREF(action);
1971 parcel = Py_None;
1972 Py_INCREF(parcel);
1973 }
1974
1975 res = PyTuple_New(2);
1976
1977 if (res) {
1978 PyTuple_SET_ITEM(res, 0, action);
1979 PyTuple_SET_ITEM(res, 1, parcel);
1980 PyList_Append(self->events, res);
1981 Py_DECREF(res);
1982 } else
1983 PyErr_Clear(); /* FIXME: propagate error */
1984}
1985
1986/* -------------------------------------------------------------------- */
1987/* methods (in alphabetical order) */
1988
1989static PyObject*
1990treebuilder_data(TreeBuilderObject* self, PyObject* args)
1991{
1992 PyObject* data;
1993 if (!PyArg_ParseTuple(args, "O:data", &data))
1994 return NULL;
1995
1996 return treebuilder_handle_data(self, data);
1997}
1998
1999static PyObject*
2000treebuilder_end(TreeBuilderObject* self, PyObject* args)
2001{
2002 PyObject* tag;
2003 if (!PyArg_ParseTuple(args, "O:end", &tag))
2004 return NULL;
2005
2006 return treebuilder_handle_end(self, tag);
2007}
2008
2009LOCAL(PyObject*)
2010treebuilder_done(TreeBuilderObject* self)
2011{
2012 PyObject* res;
2013
2014 /* FIXME: check stack size? */
2015
2016 if (self->root)
2017 res = self->root;
2018 else
2019 res = Py_None;
2020
2021 Py_INCREF(res);
2022 return res;
2023}
2024
2025static PyObject*
2026treebuilder_close(TreeBuilderObject* self, PyObject* args)
2027{
2028 if (!PyArg_ParseTuple(args, ":close"))
2029 return NULL;
2030
2031 return treebuilder_done(self);
2032}
2033
2034static PyObject*
2035treebuilder_start(TreeBuilderObject* self, PyObject* args)
2036{
2037 PyObject* tag;
2038 PyObject* attrib = Py_None;
2039 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2040 return NULL;
2041
2042 return treebuilder_handle_start(self, tag, attrib);
2043}
2044
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002045static PyMethodDef treebuilder_methods[] = {
2046 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2047 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2048 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002049 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2050 {NULL, NULL}
2051};
2052
Neal Norwitz227b5332006-03-22 09:28:35 +00002053static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002054 PyVarObject_HEAD_INIT(NULL, 0)
2055 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002056 /* methods */
2057 (destructor)treebuilder_dealloc, /* tp_dealloc */
2058 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002059 0, /* tp_getattr */
2060 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002061 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002062 0, /* tp_repr */
2063 0, /* tp_as_number */
2064 0, /* tp_as_sequence */
2065 0, /* tp_as_mapping */
2066 0, /* tp_hash */
2067 0, /* tp_call */
2068 0, /* tp_str */
2069 0, /* tp_getattro */
2070 0, /* tp_setattro */
2071 0, /* tp_as_buffer */
2072 Py_TPFLAGS_DEFAULT, /* tp_flags */
2073 0, /* tp_doc */
2074 0, /* tp_traverse */
2075 0, /* tp_clear */
2076 0, /* tp_richcompare */
2077 0, /* tp_weaklistoffset */
2078 0, /* tp_iter */
2079 0, /* tp_iternext */
2080 treebuilder_methods, /* tp_methods */
2081 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002082};
2083
2084/* ==================================================================== */
2085/* the expat interface */
2086
2087#if defined(USE_EXPAT)
2088
2089#include "expat.h"
2090
2091#if defined(USE_PYEXPAT_CAPI)
2092#include "pyexpat.h"
2093static struct PyExpat_CAPI* expat_capi;
2094#define EXPAT(func) (expat_capi->func)
2095#else
2096#define EXPAT(func) (XML_##func)
2097#endif
2098
2099typedef struct {
2100 PyObject_HEAD
2101
2102 XML_Parser parser;
2103
2104 PyObject* target;
2105 PyObject* entity;
2106
2107 PyObject* names;
2108
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002109 PyObject* handle_start;
2110 PyObject* handle_data;
2111 PyObject* handle_end;
2112
2113 PyObject* handle_comment;
2114 PyObject* handle_pi;
2115
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002116 PyObject* handle_close;
2117
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002118} XMLParserObject;
2119
Neal Norwitz227b5332006-03-22 09:28:35 +00002120static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002121
2122/* helpers */
2123
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002124LOCAL(PyObject*)
2125makeuniversal(XMLParserObject* self, const char* string)
2126{
2127 /* convert a UTF-8 tag/attribute name from the expat parser
2128 to a universal name string */
2129
2130 int size = strlen(string);
2131 PyObject* key;
2132 PyObject* value;
2133
2134 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002135 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002136 if (!key)
2137 return NULL;
2138
2139 value = PyDict_GetItem(self->names, key);
2140
2141 if (value) {
2142 Py_INCREF(value);
2143 } else {
2144 /* new name. convert to universal name, and decode as
2145 necessary */
2146
2147 PyObject* tag;
2148 char* p;
2149 int i;
2150
2151 /* look for namespace separator */
2152 for (i = 0; i < size; i++)
2153 if (string[i] == '}')
2154 break;
2155 if (i != size) {
2156 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002157 tag = PyBytes_FromStringAndSize(NULL, size+1);
2158 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002159 p[0] = '{';
2160 memcpy(p+1, string, size);
2161 size++;
2162 } else {
2163 /* plain name; use key as tag */
2164 Py_INCREF(key);
2165 tag = key;
2166 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002168 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002169 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002170 value = PyUnicode_DecodeUTF8(p, size, "strict");
2171 Py_DECREF(tag);
2172 if (!value) {
2173 Py_DECREF(key);
2174 return NULL;
2175 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176
2177 /* add to names dictionary */
2178 if (PyDict_SetItem(self->names, key, value) < 0) {
2179 Py_DECREF(key);
2180 Py_DECREF(value);
2181 return NULL;
2182 }
2183 }
2184
2185 Py_DECREF(key);
2186 return value;
2187}
2188
Eli Bendersky5b77d812012-03-16 08:20:05 +02002189/* Set the ParseError exception with the given parameters.
2190 * If message is not NULL, it's used as the error string. Otherwise, the
2191 * message string is the default for the given error_code.
2192*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002193static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002194expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002195{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002196 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002197
Victor Stinner499dfcf2011-03-21 13:26:24 +01002198 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002199 message ? message : EXPAT(ErrorString)(error_code),
2200 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002201 if (errmsg == NULL)
2202 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002203
Victor Stinner499dfcf2011-03-21 13:26:24 +01002204 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2205 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002206 if (!error)
2207 return;
2208
Eli Bendersky5b77d812012-03-16 08:20:05 +02002209 /* Add code and position attributes */
2210 code = PyLong_FromLong((long)error_code);
2211 if (!code) {
2212 Py_DECREF(error);
2213 return;
2214 }
2215 if (PyObject_SetAttrString(error, "code", code) == -1) {
2216 Py_DECREF(error);
2217 Py_DECREF(code);
2218 return;
2219 }
2220 Py_DECREF(code);
2221
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002222 position = Py_BuildValue("(ii)", line, column);
2223 if (!position) {
2224 Py_DECREF(error);
2225 return;
2226 }
2227 if (PyObject_SetAttrString(error, "position", position) == -1) {
2228 Py_DECREF(error);
2229 Py_DECREF(position);
2230 return;
2231 }
2232 Py_DECREF(position);
2233
2234 PyErr_SetObject(elementtree_parseerror_obj, error);
2235 Py_DECREF(error);
2236}
2237
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002238/* -------------------------------------------------------------------- */
2239/* handlers */
2240
2241static void
2242expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2243 int data_len)
2244{
2245 PyObject* key;
2246 PyObject* value;
2247 PyObject* res;
2248
2249 if (data_len < 2 || data_in[0] != '&')
2250 return;
2251
Neal Norwitz0269b912007-08-08 06:56:02 +00002252 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002253 if (!key)
2254 return;
2255
2256 value = PyDict_GetItem(self->entity, key);
2257
2258 if (value) {
2259 if (TreeBuilder_CheckExact(self->target))
2260 res = treebuilder_handle_data(
2261 (TreeBuilderObject*) self->target, value
2262 );
2263 else if (self->handle_data)
2264 res = PyObject_CallFunction(self->handle_data, "O", value);
2265 else
2266 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002267 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002268 } else if (!PyErr_Occurred()) {
2269 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002270 char message[128] = "undefined entity ";
2271 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002272 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002273 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002274 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002275 EXPAT(GetErrorColumnNumber)(self->parser),
2276 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277 );
2278 }
2279
2280 Py_DECREF(key);
2281}
2282
2283static void
2284expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2285 const XML_Char **attrib_in)
2286{
2287 PyObject* res;
2288 PyObject* tag;
2289 PyObject* attrib;
2290 int ok;
2291
2292 /* tag name */
2293 tag = makeuniversal(self, tag_in);
2294 if (!tag)
2295 return; /* parser will look for errors */
2296
2297 /* attributes */
2298 if (attrib_in[0]) {
2299 attrib = PyDict_New();
2300 if (!attrib)
2301 return;
2302 while (attrib_in[0] && attrib_in[1]) {
2303 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002304 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002305 if (!key || !value) {
2306 Py_XDECREF(value);
2307 Py_XDECREF(key);
2308 Py_DECREF(attrib);
2309 return;
2310 }
2311 ok = PyDict_SetItem(attrib, key, value);
2312 Py_DECREF(value);
2313 Py_DECREF(key);
2314 if (ok < 0) {
2315 Py_DECREF(attrib);
2316 return;
2317 }
2318 attrib_in += 2;
2319 }
2320 } else {
2321 Py_INCREF(Py_None);
2322 attrib = Py_None;
2323 }
2324
2325 if (TreeBuilder_CheckExact(self->target))
2326 /* shortcut */
2327 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2328 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002329 else if (self->handle_start) {
2330 if (attrib == Py_None) {
2331 Py_DECREF(attrib);
2332 attrib = PyDict_New();
2333 if (!attrib)
2334 return;
2335 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002336 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002337 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338 res = NULL;
2339
2340 Py_DECREF(tag);
2341 Py_DECREF(attrib);
2342
2343 Py_XDECREF(res);
2344}
2345
2346static void
2347expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2348 int data_len)
2349{
2350 PyObject* data;
2351 PyObject* res;
2352
Neal Norwitz0269b912007-08-08 06:56:02 +00002353 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002354 if (!data)
2355 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356
2357 if (TreeBuilder_CheckExact(self->target))
2358 /* shortcut */
2359 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2360 else if (self->handle_data)
2361 res = PyObject_CallFunction(self->handle_data, "O", data);
2362 else
2363 res = NULL;
2364
2365 Py_DECREF(data);
2366
2367 Py_XDECREF(res);
2368}
2369
2370static void
2371expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2372{
2373 PyObject* tag;
2374 PyObject* res = NULL;
2375
2376 if (TreeBuilder_CheckExact(self->target))
2377 /* shortcut */
2378 /* the standard tree builder doesn't look at the end tag */
2379 res = treebuilder_handle_end(
2380 (TreeBuilderObject*) self->target, Py_None
2381 );
2382 else if (self->handle_end) {
2383 tag = makeuniversal(self, tag_in);
2384 if (tag) {
2385 res = PyObject_CallFunction(self->handle_end, "O", tag);
2386 Py_DECREF(tag);
2387 }
2388 }
2389
2390 Py_XDECREF(res);
2391}
2392
2393static void
2394expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2395 const XML_Char *uri)
2396{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002397 PyObject* sprefix = NULL;
2398 PyObject* suri = NULL;
2399
2400 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2401 if (!suri)
2402 return;
2403
2404 if (prefix)
2405 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2406 else
2407 sprefix = PyUnicode_FromString("");
2408 if (!sprefix) {
2409 Py_DECREF(suri);
2410 return;
2411 }
2412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002413 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002414 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002415 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002416
2417 Py_DECREF(sprefix);
2418 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002419}
2420
2421static void
2422expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2423{
2424 treebuilder_handle_namespace(
2425 (TreeBuilderObject*) self->target, 0, NULL, NULL
2426 );
2427}
2428
2429static void
2430expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2431{
2432 PyObject* comment;
2433 PyObject* res;
2434
2435 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002436 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002437 if (comment) {
2438 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2439 Py_XDECREF(res);
2440 Py_DECREF(comment);
2441 }
2442 }
2443}
2444
2445static void
2446expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2447 const XML_Char* data_in)
2448{
2449 PyObject* target;
2450 PyObject* data;
2451 PyObject* res;
2452
2453 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002454 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2455 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002456 if (target && data) {
2457 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2458 Py_XDECREF(res);
2459 Py_DECREF(data);
2460 Py_DECREF(target);
2461 } else {
2462 Py_XDECREF(data);
2463 Py_XDECREF(target);
2464 }
2465 }
2466}
2467
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002468static int
2469expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2470 XML_Encoding *info)
2471{
2472 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473 unsigned char s[256];
2474 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002475 void *data;
2476 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477
2478 memset(info, 0, sizeof(XML_Encoding));
2479
2480 for (i = 0; i < 256; i++)
2481 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002482
Fredrik Lundhc3389992005-12-25 11:40:19 +00002483 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002484 if (!u)
2485 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002486 if (PyUnicode_READY(u))
2487 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002489 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 Py_DECREF(u);
2491 return XML_STATUS_ERROR;
2492 }
2493
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002494 kind = PyUnicode_KIND(u);
2495 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002497 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2498 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2499 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002501 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502 }
2503
2504 Py_DECREF(u);
2505
2506 return XML_STATUS_OK;
2507}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508
2509/* -------------------------------------------------------------------- */
2510/* constructor and destructor */
2511
2512static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002513xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514{
2515 XMLParserObject* self;
2516 /* FIXME: does this need to be static? */
2517 static XML_Memory_Handling_Suite memory_handler;
2518
2519 PyObject* target = NULL;
2520 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002521 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2523 &target, &encoding))
2524 return NULL;
2525
2526#if defined(USE_PYEXPAT_CAPI)
2527 if (!expat_capi) {
2528 PyErr_SetString(
2529 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2530 );
2531 return NULL;
2532 }
2533#endif
2534
2535 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2536 if (self == NULL)
2537 return NULL;
2538
2539 self->entity = PyDict_New();
2540 if (!self->entity) {
2541 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002542 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002544
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 self->names = PyDict_New();
2546 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002547 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002549 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 }
2551
2552 memory_handler.malloc_fcn = PyObject_Malloc;
2553 memory_handler.realloc_fcn = PyObject_Realloc;
2554 memory_handler.free_fcn = PyObject_Free;
2555
2556 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2557 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002558 PyObject_Del(self->names);
2559 PyObject_Del(self->entity);
2560 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 }
2564
2565 /* setup target handlers */
2566 if (!target) {
2567 target = treebuilder_new();
2568 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002569 EXPAT(ParserFree)(self->parser);
2570 PyObject_Del(self->names);
2571 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002573 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 }
2575 } else
2576 Py_INCREF(target);
2577 self->target = target;
2578
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002579 self->handle_start = PyObject_GetAttrString(target, "start");
2580 self->handle_data = PyObject_GetAttrString(target, "data");
2581 self->handle_end = PyObject_GetAttrString(target, "end");
2582 self->handle_comment = PyObject_GetAttrString(target, "comment");
2583 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002584 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002585
2586 PyErr_Clear();
2587
2588 /* configure parser */
2589 EXPAT(SetUserData)(self->parser, self);
2590 EXPAT(SetElementHandler)(
2591 self->parser,
2592 (XML_StartElementHandler) expat_start_handler,
2593 (XML_EndElementHandler) expat_end_handler
2594 );
2595 EXPAT(SetDefaultHandlerExpand)(
2596 self->parser,
2597 (XML_DefaultHandler) expat_default_handler
2598 );
2599 EXPAT(SetCharacterDataHandler)(
2600 self->parser,
2601 (XML_CharacterDataHandler) expat_data_handler
2602 );
2603 if (self->handle_comment)
2604 EXPAT(SetCommentHandler)(
2605 self->parser,
2606 (XML_CommentHandler) expat_comment_handler
2607 );
2608 if (self->handle_pi)
2609 EXPAT(SetProcessingInstructionHandler)(
2610 self->parser,
2611 (XML_ProcessingInstructionHandler) expat_pi_handler
2612 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613 EXPAT(SetUnknownEncodingHandler)(
2614 self->parser,
2615 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2616 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617
2618 ALLOC(sizeof(XMLParserObject), "create expatparser");
2619
2620 return (PyObject*) self;
2621}
2622
2623static void
2624xmlparser_dealloc(XMLParserObject* self)
2625{
2626 EXPAT(ParserFree)(self->parser);
2627
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002628 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629 Py_XDECREF(self->handle_pi);
2630 Py_XDECREF(self->handle_comment);
2631 Py_XDECREF(self->handle_end);
2632 Py_XDECREF(self->handle_data);
2633 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634
2635 Py_DECREF(self->target);
2636 Py_DECREF(self->entity);
2637 Py_DECREF(self->names);
2638
2639 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2640
2641 PyObject_Del(self);
2642}
2643
2644/* -------------------------------------------------------------------- */
2645/* methods (in alphabetical order) */
2646
2647LOCAL(PyObject*)
2648expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2649{
2650 int ok;
2651
2652 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2653
2654 if (PyErr_Occurred())
2655 return NULL;
2656
2657 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002658 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002659 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002660 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002661 EXPAT(GetErrorColumnNumber)(self->parser),
2662 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 );
2664 return NULL;
2665 }
2666
2667 Py_RETURN_NONE;
2668}
2669
2670static PyObject*
2671xmlparser_close(XMLParserObject* self, PyObject* args)
2672{
2673 /* end feeding data to parser */
2674
2675 PyObject* res;
2676 if (!PyArg_ParseTuple(args, ":close"))
2677 return NULL;
2678
2679 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002680 if (!res)
2681 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002682
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002683 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684 Py_DECREF(res);
2685 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002686 } if (self->handle_close) {
2687 Py_DECREF(res);
2688 return PyObject_CallFunction(self->handle_close, "");
2689 } else
2690 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691}
2692
2693static PyObject*
2694xmlparser_feed(XMLParserObject* self, PyObject* args)
2695{
2696 /* feed data to parser */
2697
2698 char* data;
2699 int data_len;
2700 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2701 return NULL;
2702
2703 return expat_parse(self, data, data_len, 0);
2704}
2705
2706static PyObject*
2707xmlparser_parse(XMLParserObject* self, PyObject* args)
2708{
2709 /* (internal) parse until end of input stream */
2710
2711 PyObject* reader;
2712 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002713 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714 PyObject* res;
2715
2716 PyObject* fileobj;
2717 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2718 return NULL;
2719
2720 reader = PyObject_GetAttrString(fileobj, "read");
2721 if (!reader)
2722 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002723
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724 /* read from open file object */
2725 for (;;) {
2726
2727 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2728
2729 if (!buffer) {
2730 /* read failed (e.g. due to KeyboardInterrupt) */
2731 Py_DECREF(reader);
2732 return NULL;
2733 }
2734
Eli Benderskyf996e772012-03-16 05:53:30 +02002735 if (PyUnicode_CheckExact(buffer)) {
2736 /* A unicode object is encoded into bytes using UTF-8 */
2737 if (PyUnicode_GET_SIZE(buffer) == 0) {
2738 Py_DECREF(buffer);
2739 break;
2740 }
2741 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2742 if (!temp) {
2743 /* Propagate exception from PyUnicode_AsEncodedString */
2744 Py_DECREF(buffer);
2745 Py_DECREF(reader);
2746 return NULL;
2747 }
2748
2749 /* Here we no longer need the original buffer since it contains
2750 * unicode. Make it point to the encoded bytes object.
2751 */
2752 Py_DECREF(buffer);
2753 buffer = temp;
2754 }
2755 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756 Py_DECREF(buffer);
2757 break;
2758 }
2759
2760 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002761 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 );
2763
2764 Py_DECREF(buffer);
2765
2766 if (!res) {
2767 Py_DECREF(reader);
2768 return NULL;
2769 }
2770 Py_DECREF(res);
2771
2772 }
2773
2774 Py_DECREF(reader);
2775
2776 res = expat_parse(self, "", 0, 1);
2777
2778 if (res && TreeBuilder_CheckExact(self->target)) {
2779 Py_DECREF(res);
2780 return treebuilder_done((TreeBuilderObject*) self->target);
2781 }
2782
2783 return res;
2784}
2785
2786static PyObject*
2787xmlparser_setevents(XMLParserObject* self, PyObject* args)
2788{
2789 /* activate element event reporting */
2790
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002791 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792 TreeBuilderObject* target;
2793
2794 PyObject* events; /* event collector */
2795 PyObject* event_set = Py_None;
2796 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2797 &event_set))
2798 return NULL;
2799
2800 if (!TreeBuilder_CheckExact(self->target)) {
2801 PyErr_SetString(
2802 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002803 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804 "targets"
2805 );
2806 return NULL;
2807 }
2808
2809 target = (TreeBuilderObject*) self->target;
2810
2811 Py_INCREF(events);
2812 Py_XDECREF(target->events);
2813 target->events = events;
2814
2815 /* clear out existing events */
2816 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2817 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2818 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2819 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2820
2821 if (event_set == Py_None) {
2822 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002823 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824 Py_RETURN_NONE;
2825 }
2826
2827 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2828 goto error;
2829
2830 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2831 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2832 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002833 if (PyUnicode_Check(item)) {
2834 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002835 if (event == NULL)
2836 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002837 } else if (PyBytes_Check(item))
2838 event = PyBytes_AS_STRING(item);
2839 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002840 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002841 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842 if (strcmp(event, "start") == 0) {
2843 Py_INCREF(item);
2844 target->start_event_obj = item;
2845 } else if (strcmp(event, "end") == 0) {
2846 Py_INCREF(item);
2847 Py_XDECREF(target->end_event_obj);
2848 target->end_event_obj = item;
2849 } else if (strcmp(event, "start-ns") == 0) {
2850 Py_INCREF(item);
2851 Py_XDECREF(target->start_ns_event_obj);
2852 target->start_ns_event_obj = item;
2853 EXPAT(SetNamespaceDeclHandler)(
2854 self->parser,
2855 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2856 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2857 );
2858 } else if (strcmp(event, "end-ns") == 0) {
2859 Py_INCREF(item);
2860 Py_XDECREF(target->end_ns_event_obj);
2861 target->end_ns_event_obj = item;
2862 EXPAT(SetNamespaceDeclHandler)(
2863 self->parser,
2864 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2865 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2866 );
2867 } else {
2868 PyErr_Format(
2869 PyExc_ValueError,
2870 "unknown event '%s'", event
2871 );
2872 return NULL;
2873 }
2874 }
2875
2876 Py_RETURN_NONE;
2877
2878 error:
2879 PyErr_SetString(
2880 PyExc_TypeError,
2881 "invalid event tuple"
2882 );
2883 return NULL;
2884}
2885
2886static PyMethodDef xmlparser_methods[] = {
2887 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2888 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2889 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2890 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2891 {NULL, NULL}
2892};
2893
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002894static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002895xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002896{
Alexander Belopolskye239d232010-12-08 23:31:48 +00002897 if (PyUnicode_Check(nameobj)) {
2898 PyObject* res;
2899 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
2900 res = self->entity;
2901 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
2902 res = self->target;
2903 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
2904 return PyUnicode_FromFormat(
2905 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00002907 }
2908 else
2909 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910
Alexander Belopolskye239d232010-12-08 23:31:48 +00002911 Py_INCREF(res);
2912 return res;
2913 }
2914 generic:
2915 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002916}
2917
Neal Norwitz227b5332006-03-22 09:28:35 +00002918static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002919 PyVarObject_HEAD_INIT(NULL, 0)
2920 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002921 /* methods */
2922 (destructor)xmlparser_dealloc, /* tp_dealloc */
2923 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002924 0, /* tp_getattr */
2925 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002926 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002927 0, /* tp_repr */
2928 0, /* tp_as_number */
2929 0, /* tp_as_sequence */
2930 0, /* tp_as_mapping */
2931 0, /* tp_hash */
2932 0, /* tp_call */
2933 0, /* tp_str */
2934 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2935 0, /* tp_setattro */
2936 0, /* tp_as_buffer */
2937 Py_TPFLAGS_DEFAULT, /* tp_flags */
2938 0, /* tp_doc */
2939 0, /* tp_traverse */
2940 0, /* tp_clear */
2941 0, /* tp_richcompare */
2942 0, /* tp_weaklistoffset */
2943 0, /* tp_iter */
2944 0, /* tp_iternext */
2945 xmlparser_methods, /* tp_methods */
2946 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947};
2948
2949#endif
2950
2951/* ==================================================================== */
2952/* python module interface */
2953
2954static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2956 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2957#if defined(USE_EXPAT)
2958 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959#endif
2960 {NULL, NULL}
2961};
2962
Martin v. Löwis1a214512008-06-11 05:26:20 +00002963
2964static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002965 PyModuleDef_HEAD_INIT,
2966 "_elementtree",
2967 NULL,
2968 -1,
2969 _functions,
2970 NULL,
2971 NULL,
2972 NULL,
2973 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002974};
2975
Neal Norwitzf6657e62006-12-28 04:47:50 +00002976PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002977PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978{
2979 PyObject* m;
2980 PyObject* g;
2981 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002983 /* Initialize object types */
2984 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002985 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002986 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002987 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002989 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00002990 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002991#endif
2992
Martin v. Löwis1a214512008-06-11 05:26:20 +00002993 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002994 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002995 return NULL;
2996
2997 /* The code below requires that the module gets already added
2998 to sys.modules. */
2999 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003000 _elementtreemodule.m_name,
3001 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002
3003 /* python glue code */
3004
3005 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003006 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003007 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008
3009 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3010
3011 bootstrap = (
3012
Florent Xiclunaf4bdf4e2012-02-11 11:28:16 +01003013 "from copy import deepcopy\n"
3014 "from xml.etree import ElementPath\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003016 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017 " if tag == '*':\n"
3018 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019 " if tag is None or node.tag == tag:\n"
3020 " yield node\n"
3021 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003022 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003024
3025 "def itertext(node):\n" /* helper */
3026 " if node.text:\n"
3027 " yield node.text\n"
3028 " for e in node:\n"
3029 " for s in e.itertext():\n"
3030 " yield s\n"
3031 " if e.tail:\n"
3032 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003033
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 );
3035
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003036 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3037 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003038
3039 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003041 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3042 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003043
3044#if defined(USE_PYEXPAT_CAPI)
3045 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003046 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3047 if (expat_capi) {
3048 /* check that it's usable */
3049 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3050 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3051 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3052 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3053 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3054 expat_capi = NULL;
3055 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003057
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003058 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003059 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003060 );
3061 Py_INCREF(elementtree_parseerror_obj);
3062 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3063
Eli Bendersky092af1f2012-03-04 07:14:03 +02003064 Py_INCREF((PyObject *)&Element_Type);
3065 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3066
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003067 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068}