blob: 0c8abcf901068dd4f7e18111595a713617ecec6b [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
Antoine Pitrouc1948842012-10-01 23:40:37 +0200126 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000129 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000131 return result;
132}
133
134LOCAL(PyObject*)
135list_join(PyObject* list)
136{
137 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 PyObject* result;
140
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 if (!joiner)
143 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200144 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200146 if (result)
147 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 return result;
149}
150
Eli Bendersky48d358b2012-05-30 17:57:50 +0300151/* Is the given object an empty dictionary?
152*/
153static int
154is_empty_dict(PyObject *obj)
155{
156 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
157}
158
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200161/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162
163typedef struct {
164
165 /* attributes (a dictionary object), or None if no attributes */
166 PyObject* attrib;
167
168 /* child elements */
169 int length; /* actual number of items */
170 int allocated; /* allocated items */
171
172 /* this either points to _children or to a malloced buffer */
173 PyObject* *children;
174
175 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177} ElementObjectExtra;
178
179typedef struct {
180 PyObject_HEAD
181
182 /* element tag (a string). */
183 PyObject* tag;
184
185 /* text before first child. note that this is a tagged pointer;
186 use JOIN_OBJ to get the object pointer. the join flag is used
187 to distinguish lists created by the tree builder from lists
188 assigned to the attribute by application code; the former
189 should be joined before being returned to the user, the latter
190 should be left intact. */
191 PyObject* text;
192
193 /* text after this element, in parent. note that this is a tagged
194 pointer; use JOIN_OBJ to get the object pointer. */
195 PyObject* tail;
196
197 ElementObjectExtra* extra;
198
Eli Benderskyebf37a22012-04-03 22:02:37 +0300199 PyObject *weakreflist; /* For tp_weaklistoffset */
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201} ElementObject;
202
Neal Norwitz227b5332006-03-22 09:28:35 +0000203static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000204
Christian Heimes90aa7642007-12-19 02:45:37 +0000205#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206
207/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200208/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212{
213 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
214 if (!self->extra)
215 return -1;
216
217 if (!attrib)
218 attrib = Py_None;
219
220 Py_INCREF(attrib);
221 self->extra->attrib = attrib;
222
223 self->extra->length = 0;
224 self->extra->allocated = STATIC_CHILDREN;
225 self->extra->children = self->extra->_children;
226
227 return 0;
228}
229
230LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
Eli Bendersky08b85292012-04-04 15:55:07 +0300233 ElementObjectExtra *myextra;
234 int i;
235
Eli Benderskyebf37a22012-04-03 22:02:37 +0300236 if (!self->extra)
237 return;
238
239 /* Avoid DECREFs calling into this code again (cycles, etc.)
240 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 self->extra = NULL;
243
244 Py_DECREF(myextra->attrib);
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 for (i = 0; i < myextra->length; i++)
247 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 if (myextra->children != myextra->_children)
250 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253}
254
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255/* Convenience internal function to create new Element objects with the given
256 * tag and attributes.
257*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260{
261 ElementObject* self;
262
Eli Bendersky0192ba32012-03-30 16:38:33 +0300263 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 if (self == NULL)
265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 self->extra = NULL;
267
Eli Bendersky48d358b2012-05-30 17:57:50 +0300268 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 }
274
275 Py_INCREF(tag);
276 self->tag = tag;
277
278 Py_INCREF(Py_None);
279 self->text = Py_None;
280
281 Py_INCREF(Py_None);
282 self->tail = Py_None;
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 self->weakreflist = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 return (PyObject*) self;
289}
290
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291static PyObject *
292element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
293{
294 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
295 if (e != NULL) {
296 Py_INCREF(Py_None);
297 e->tag = Py_None;
298
299 Py_INCREF(Py_None);
300 e->text = Py_None;
301
302 Py_INCREF(Py_None);
303 e->tail = Py_None;
304
305 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300306 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 }
308 return (PyObject *)e;
309}
310
Eli Bendersky737b1732012-05-29 06:02:56 +0300311/* Helper function for extracting the attrib dictionary from a keywords dict.
312 * This is required by some constructors/functions in this module that can
313 * either accept attrib as a keyword argument or all attributes splashed
314 * directly into *kwds.
315 * If there is no 'attrib' keyword, return an empty dict.
316 */
317static PyObject*
318get_attrib_from_keywords(PyObject *kwds)
319{
320 PyObject *attrib_str = PyUnicode_FromString("attrib");
321 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
322
323 if (attrib) {
324 /* If attrib was found in kwds, copy its value and remove it from
325 * kwds
326 */
327 if (!PyDict_Check(attrib)) {
328 Py_DECREF(attrib_str);
329 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
330 Py_TYPE(attrib)->tp_name);
331 return NULL;
332 }
333 attrib = PyDict_Copy(attrib);
334 PyDict_DelItem(kwds, attrib_str);
335 } else {
336 attrib = PyDict_New();
337 }
338
339 Py_DECREF(attrib_str);
340
341 if (attrib)
342 PyDict_Update(attrib, kwds);
343 return attrib;
344}
345
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346static int
347element_init(PyObject *self, PyObject *args, PyObject *kwds)
348{
349 PyObject *tag;
350 PyObject *tmp;
351 PyObject *attrib = NULL;
352 ElementObject *self_elem;
353
354 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
355 return -1;
356
Eli Bendersky737b1732012-05-29 06:02:56 +0300357 if (attrib) {
358 /* attrib passed as positional arg */
359 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 if (!attrib)
361 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300362 if (kwds) {
363 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200364 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300365 return -1;
366 }
367 }
368 } else if (kwds) {
369 /* have keywords args */
370 attrib = get_attrib_from_keywords(kwds);
371 if (!attrib)
372 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 }
374
375 self_elem = (ElementObject *)self;
376
Antoine Pitrouc1948842012-10-01 23:40:37 +0200377 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380 return -1;
381 }
382 }
383
Eli Bendersky48d358b2012-05-30 17:57:50 +0300384 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200385 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386
387 /* Replace the objects already pointed to by tag, text and tail. */
388 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 Py_DECREF(tmp);
392
393 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200395 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200396 Py_DECREF(JOIN_OBJ(tmp));
397
398 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_DECREF(JOIN_OBJ(tmp));
402
403 return 0;
404}
405
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000406LOCAL(int)
407element_resize(ElementObject* self, int extra)
408{
409 int size;
410 PyObject* *children;
411
412 /* make sure self->children can hold the given number of extra
413 elements. set an exception and return -1 if allocation failed */
414
415 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417
418 size = self->extra->length + extra;
419
420 if (size > self->extra->allocated) {
421 /* use Python 2.4's list growth strategy */
422 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000423 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100424 * which needs at least 4 bytes.
425 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000426 * be safe.
427 */
428 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000430 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100431 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 * false alarm always assume at least one child to be safe.
433 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 children = PyObject_Realloc(self->extra->children,
435 size * sizeof(PyObject*));
436 if (!children)
437 goto nomemory;
438 } else {
439 children = PyObject_Malloc(size * sizeof(PyObject*));
440 if (!children)
441 goto nomemory;
442 /* copy existing children from static area to malloc buffer */
443 memcpy(children, self->extra->children,
444 self->extra->length * sizeof(PyObject*));
445 }
446 self->extra->children = children;
447 self->extra->allocated = size;
448 }
449
450 return 0;
451
452 nomemory:
453 PyErr_NoMemory();
454 return -1;
455}
456
457LOCAL(int)
458element_add_subelement(ElementObject* self, PyObject* element)
459{
460 /* add a child element to a parent */
461
462 if (element_resize(self, 1) < 0)
463 return -1;
464
465 Py_INCREF(element);
466 self->extra->children[self->extra->length] = element;
467
468 self->extra->length++;
469
470 return 0;
471}
472
473LOCAL(PyObject*)
474element_get_attrib(ElementObject* self)
475{
476 /* return borrowed reference to attrib dictionary */
477 /* note: this function assumes that the extra section exists */
478
479 PyObject* res = self->extra->attrib;
480
481 if (res == Py_None) {
482 /* create missing dictionary */
483 res = PyDict_New();
484 if (!res)
485 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200486 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000487 self->extra->attrib = res;
488 }
489
490 return res;
491}
492
493LOCAL(PyObject*)
494element_get_text(ElementObject* self)
495{
496 /* return borrowed reference to text attribute */
497
498 PyObject* res = self->text;
499
500 if (JOIN_GET(res)) {
501 res = JOIN_OBJ(res);
502 if (PyList_CheckExact(res)) {
503 res = list_join(res);
504 if (!res)
505 return NULL;
506 self->text = res;
507 }
508 }
509
510 return res;
511}
512
513LOCAL(PyObject*)
514element_get_tail(ElementObject* self)
515{
516 /* return borrowed reference to text attribute */
517
518 PyObject* res = self->tail;
519
520 if (JOIN_GET(res)) {
521 res = JOIN_OBJ(res);
522 if (PyList_CheckExact(res)) {
523 res = list_join(res);
524 if (!res)
525 return NULL;
526 self->tail = res;
527 }
528 }
529
530 return res;
531}
532
533static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300534subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535{
536 PyObject* elem;
537
538 ElementObject* parent;
539 PyObject* tag;
540 PyObject* attrib = NULL;
541 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
542 &Element_Type, &parent, &tag,
543 &PyDict_Type, &attrib))
544 return NULL;
545
Eli Bendersky737b1732012-05-29 06:02:56 +0300546 if (attrib) {
547 /* attrib passed as positional arg */
548 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549 if (!attrib)
550 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300551 if (kwds) {
552 if (PyDict_Update(attrib, kwds) < 0) {
553 return NULL;
554 }
555 }
556 } else if (kwds) {
557 /* have keyword args */
558 attrib = get_attrib_from_keywords(kwds);
559 if (!attrib)
560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 Py_INCREF(Py_None);
564 attrib = Py_None;
565 }
566
Eli Bendersky092af1f2012-03-04 07:14:03 +0200567 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568
569 Py_DECREF(attrib);
570
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000571 if (element_add_subelement(parent, elem) < 0) {
572 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575
576 return elem;
577}
578
Eli Bendersky0192ba32012-03-30 16:38:33 +0300579static int
580element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
581{
582 Py_VISIT(self->tag);
583 Py_VISIT(JOIN_OBJ(self->text));
584 Py_VISIT(JOIN_OBJ(self->tail));
585
586 if (self->extra) {
587 int i;
588 Py_VISIT(self->extra->attrib);
589
590 for (i = 0; i < self->extra->length; ++i)
591 Py_VISIT(self->extra->children[i]);
592 }
593 return 0;
594}
595
596static int
597element_gc_clear(ElementObject *self)
598{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300599 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300600
601 /* The following is like Py_CLEAR for self->text and self->tail, but
602 * written explicitily because the real pointers hide behind access
603 * macros.
604 */
605 if (self->text) {
606 PyObject *tmp = JOIN_OBJ(self->text);
607 self->text = NULL;
608 Py_DECREF(tmp);
609 }
610
611 if (self->tail) {
612 PyObject *tmp = JOIN_OBJ(self->tail);
613 self->tail = NULL;
614 Py_DECREF(tmp);
615 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616
617 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300618 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300620 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 return 0;
622}
623
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624static void
625element_dealloc(ElementObject* self)
626{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300628
629 if (self->weakreflist != NULL)
630 PyObject_ClearWeakRefs((PyObject *) self);
631
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632 /* element_gc_clear clears all references and deallocates extra
633 */
634 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000635
636 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200637 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638}
639
640/* -------------------------------------------------------------------- */
641/* methods (in alphabetical order) */
642
643static PyObject*
644element_append(ElementObject* self, PyObject* args)
645{
646 PyObject* element;
647 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
648 return NULL;
649
650 if (element_add_subelement(self, element) < 0)
651 return NULL;
652
653 Py_RETURN_NONE;
654}
655
656static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658{
659 if (!PyArg_ParseTuple(args, ":clear"))
660 return NULL;
661
Eli Benderskyebf37a22012-04-03 22:02:37 +0300662 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 Py_INCREF(Py_None);
665 Py_DECREF(JOIN_OBJ(self->text));
666 self->text = Py_None;
667
668 Py_INCREF(Py_None);
669 Py_DECREF(JOIN_OBJ(self->tail));
670 self->tail = Py_None;
671
672 Py_RETURN_NONE;
673}
674
675static PyObject*
676element_copy(ElementObject* self, PyObject* args)
677{
678 int i;
679 ElementObject* element;
680
681 if (!PyArg_ParseTuple(args, ":__copy__"))
682 return NULL;
683
Eli Bendersky092af1f2012-03-04 07:14:03 +0200684 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685 self->tag, (self->extra) ? self->extra->attrib : Py_None
686 );
687 if (!element)
688 return NULL;
689
690 Py_DECREF(JOIN_OBJ(element->text));
691 element->text = self->text;
692 Py_INCREF(JOIN_OBJ(element->text));
693
694 Py_DECREF(JOIN_OBJ(element->tail));
695 element->tail = self->tail;
696 Py_INCREF(JOIN_OBJ(element->tail));
697
698 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100699
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000700 if (element_resize(element, self->extra->length) < 0) {
701 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000703 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 for (i = 0; i < self->extra->length; i++) {
706 Py_INCREF(self->extra->children[i]);
707 element->extra->children[i] = self->extra->children[i];
708 }
709
710 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712 }
713
714 return (PyObject*) element;
715}
716
717static PyObject*
718element_deepcopy(ElementObject* self, PyObject* args)
719{
720 int i;
721 ElementObject* element;
722 PyObject* tag;
723 PyObject* attrib;
724 PyObject* text;
725 PyObject* tail;
726 PyObject* id;
727
728 PyObject* memo;
729 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
730 return NULL;
731
732 tag = deepcopy(self->tag, memo);
733 if (!tag)
734 return NULL;
735
736 if (self->extra) {
737 attrib = deepcopy(self->extra->attrib, memo);
738 if (!attrib) {
739 Py_DECREF(tag);
740 return NULL;
741 }
742 } else {
743 Py_INCREF(Py_None);
744 attrib = Py_None;
745 }
746
Eli Bendersky092af1f2012-03-04 07:14:03 +0200747 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 Py_DECREF(tag);
750 Py_DECREF(attrib);
751
752 if (!element)
753 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 text = deepcopy(JOIN_OBJ(self->text), memo);
756 if (!text)
757 goto error;
758 Py_DECREF(element->text);
759 element->text = JOIN_SET(text, JOIN_GET(self->text));
760
761 tail = deepcopy(JOIN_OBJ(self->tail), memo);
762 if (!tail)
763 goto error;
764 Py_DECREF(element->tail);
765 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
766
767 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769 if (element_resize(element, self->extra->length) < 0)
770 goto error;
771
772 for (i = 0; i < self->extra->length; i++) {
773 PyObject* child = deepcopy(self->extra->children[i], memo);
774 if (!child) {
775 element->extra->length = i;
776 goto error;
777 }
778 element->extra->children[i] = child;
779 }
780
781 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 }
784
785 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200786 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000787 if (!id)
788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 i = PyDict_SetItem(memo, id, (PyObject*) element);
791
792 Py_DECREF(id);
793
794 if (i < 0)
795 goto error;
796
797 return (PyObject*) element;
798
799 error:
800 Py_DECREF(element);
801 return NULL;
802}
803
Martin v. Löwisbce16662012-06-17 10:41:22 +0200804static PyObject*
805element_sizeof(PyObject* _self, PyObject* args)
806{
807 ElementObject *self = (ElementObject*)_self;
808 Py_ssize_t result = sizeof(ElementObject);
809 if (self->extra) {
810 result += sizeof(ElementObjectExtra);
811 if (self->extra->children != self->extra->_children)
812 result += sizeof(PyObject*) * self->extra->allocated;
813 }
814 return PyLong_FromSsize_t(result);
815}
816
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817LOCAL(int)
818checkpath(PyObject* tag)
819{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000820 Py_ssize_t i;
821 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000822
823 /* check if a tag contains an xpath character */
824
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000825#define PATHCHAR(ch) \
826 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000827
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200829 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
830 void *data = PyUnicode_DATA(tag);
831 unsigned int kind = PyUnicode_KIND(tag);
832 for (i = 0; i < len; i++) {
833 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
834 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000835 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200836 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200838 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839 return 1;
840 }
841 return 0;
842 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000843 if (PyBytes_Check(tag)) {
844 char *p = PyBytes_AS_STRING(tag);
845 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000846 if (p[i] == '{')
847 check = 0;
848 else if (p[i] == '}')
849 check = 1;
850 else if (check && PATHCHAR(p[i]))
851 return 1;
852 }
853 return 0;
854 }
855
856 return 1; /* unknown type; might be path expression */
857}
858
859static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000860element_extend(ElementObject* self, PyObject* args)
861{
862 PyObject* seq;
863 Py_ssize_t i, seqlen = 0;
864
865 PyObject* seq_in;
866 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
867 return NULL;
868
869 seq = PySequence_Fast(seq_in, "");
870 if (!seq) {
871 PyErr_Format(
872 PyExc_TypeError,
873 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
874 );
875 return NULL;
876 }
877
878 seqlen = PySequence_Size(seq);
879 for (i = 0; i < seqlen; i++) {
880 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200881 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
882 Py_DECREF(seq);
883 PyErr_Format(
884 PyExc_TypeError,
885 "expected an Element, not \"%.200s\"",
886 Py_TYPE(element)->tp_name);
887 return NULL;
888 }
889
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000890 if (element_add_subelement(self, element) < 0) {
891 Py_DECREF(seq);
892 return NULL;
893 }
894 }
895
896 Py_DECREF(seq);
897
898 Py_RETURN_NONE;
899}
900
901static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300902element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000903{
904 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000906 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300907 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200908
Eli Bendersky737b1732012-05-29 06:02:56 +0300909 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
910 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000911 return NULL;
912
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200913 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200914 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200915 return _PyObject_CallMethodId(
916 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000917 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200918 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000919
920 if (!self->extra)
921 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100922
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000923 for (i = 0; i < self->extra->length; i++) {
924 PyObject* item = self->extra->children[i];
925 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000926 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000927 Py_INCREF(item);
928 return item;
929 }
930 }
931
932 Py_RETURN_NONE;
933}
934
935static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300936element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000937{
938 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000939 PyObject* tag;
940 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000941 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200942 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300943 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200944
Eli Bendersky737b1732012-05-29 06:02:56 +0300945 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
946 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947 return NULL;
948
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000949 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200950 return _PyObject_CallMethodId(
951 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000952 );
953
954 if (!self->extra) {
955 Py_INCREF(default_value);
956 return default_value;
957 }
958
959 for (i = 0; i < self->extra->length; i++) {
960 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000961 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
962
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000963 PyObject* text = element_get_text(item);
964 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000965 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000966 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967 return text;
968 }
969 }
970
971 Py_INCREF(default_value);
972 return default_value;
973}
974
975static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300976element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000977{
978 int i;
979 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000981 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300982 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200983
Eli Bendersky737b1732012-05-29 06:02:56 +0300984 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
985 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000986 return NULL;
987
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200988 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200989 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200990 return _PyObject_CallMethodId(
991 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200993 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000994
995 out = PyList_New(0);
996 if (!out)
997 return NULL;
998
999 if (!self->extra)
1000 return out;
1001
1002 for (i = 0; i < self->extra->length; i++) {
1003 PyObject* item = self->extra->children[i];
1004 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001005 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001006 if (PyList_Append(out, item) < 0) {
1007 Py_DECREF(out);
1008 return NULL;
1009 }
1010 }
1011 }
1012
1013 return out;
1014}
1015
1016static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001017element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001018{
1019 PyObject* tag;
1020 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001021 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001022 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001023
Eli Bendersky737b1732012-05-29 06:02:56 +03001024 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1025 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001026 return NULL;
1027
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001028 return _PyObject_CallMethodId(
1029 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001030 );
1031}
1032
1033static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001034element_get(ElementObject* self, PyObject* args)
1035{
1036 PyObject* value;
1037
1038 PyObject* key;
1039 PyObject* default_value = Py_None;
1040 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1041 return NULL;
1042
1043 if (!self->extra || self->extra->attrib == Py_None)
1044 value = default_value;
1045 else {
1046 value = PyDict_GetItem(self->extra->attrib, key);
1047 if (!value)
1048 value = default_value;
1049 }
1050
1051 Py_INCREF(value);
1052 return value;
1053}
1054
1055static PyObject*
1056element_getchildren(ElementObject* self, PyObject* args)
1057{
1058 int i;
1059 PyObject* list;
1060
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001061 /* FIXME: report as deprecated? */
1062
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001063 if (!PyArg_ParseTuple(args, ":getchildren"))
1064 return NULL;
1065
1066 if (!self->extra)
1067 return PyList_New(0);
1068
1069 list = PyList_New(self->extra->length);
1070 if (!list)
1071 return NULL;
1072
1073 for (i = 0; i < self->extra->length; i++) {
1074 PyObject* item = self->extra->children[i];
1075 Py_INCREF(item);
1076 PyList_SET_ITEM(list, i, item);
1077 }
1078
1079 return list;
1080}
1081
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001082
Eli Bendersky64d11e62012-06-15 07:42:50 +03001083static PyObject *
1084create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1085
1086
1087static PyObject *
1088element_iter(ElementObject *self, PyObject *args)
1089{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001091 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 return NULL;
1093
Eli Bendersky64d11e62012-06-15 07:42:50 +03001094 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095}
1096
1097
1098static PyObject*
1099element_itertext(ElementObject* self, PyObject* args)
1100{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001101 if (!PyArg_ParseTuple(args, ":itertext"))
1102 return NULL;
1103
Eli Bendersky64d11e62012-06-15 07:42:50 +03001104 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001105}
1106
Eli Bendersky64d11e62012-06-15 07:42:50 +03001107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001108static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001109element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001110{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001111 ElementObject* self = (ElementObject*) self_;
1112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001113 if (!self->extra || index < 0 || index >= self->extra->length) {
1114 PyErr_SetString(
1115 PyExc_IndexError,
1116 "child index out of range"
1117 );
1118 return NULL;
1119 }
1120
1121 Py_INCREF(self->extra->children[index]);
1122 return self->extra->children[index];
1123}
1124
1125static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001126element_insert(ElementObject* self, PyObject* args)
1127{
1128 int i;
1129
1130 int index;
1131 PyObject* element;
1132 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1133 &Element_Type, &element))
1134 return NULL;
1135
1136 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001137 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001139 if (index < 0) {
1140 index += self->extra->length;
1141 if (index < 0)
1142 index = 0;
1143 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001144 if (index > self->extra->length)
1145 index = self->extra->length;
1146
1147 if (element_resize(self, 1) < 0)
1148 return NULL;
1149
1150 for (i = self->extra->length; i > index; i--)
1151 self->extra->children[i] = self->extra->children[i-1];
1152
1153 Py_INCREF(element);
1154 self->extra->children[index] = element;
1155
1156 self->extra->length++;
1157
1158 Py_RETURN_NONE;
1159}
1160
1161static PyObject*
1162element_items(ElementObject* self, PyObject* args)
1163{
1164 if (!PyArg_ParseTuple(args, ":items"))
1165 return NULL;
1166
1167 if (!self->extra || self->extra->attrib == Py_None)
1168 return PyList_New(0);
1169
1170 return PyDict_Items(self->extra->attrib);
1171}
1172
1173static PyObject*
1174element_keys(ElementObject* self, PyObject* args)
1175{
1176 if (!PyArg_ParseTuple(args, ":keys"))
1177 return NULL;
1178
1179 if (!self->extra || self->extra->attrib == Py_None)
1180 return PyList_New(0);
1181
1182 return PyDict_Keys(self->extra->attrib);
1183}
1184
Martin v. Löwis18e16552006-02-15 17:27:45 +00001185static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186element_length(ElementObject* self)
1187{
1188 if (!self->extra)
1189 return 0;
1190
1191 return self->extra->length;
1192}
1193
1194static PyObject*
1195element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1196{
1197 PyObject* elem;
1198
1199 PyObject* tag;
1200 PyObject* attrib;
1201 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1202 return NULL;
1203
1204 attrib = PyDict_Copy(attrib);
1205 if (!attrib)
1206 return NULL;
1207
Eli Bendersky092af1f2012-03-04 07:14:03 +02001208 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209
1210 Py_DECREF(attrib);
1211
1212 return elem;
1213}
1214
1215static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216element_remove(ElementObject* self, PyObject* args)
1217{
1218 int i;
1219
1220 PyObject* element;
1221 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1222 return NULL;
1223
1224 if (!self->extra) {
1225 /* element has no children, so raise exception */
1226 PyErr_SetString(
1227 PyExc_ValueError,
1228 "list.remove(x): x not in list"
1229 );
1230 return NULL;
1231 }
1232
1233 for (i = 0; i < self->extra->length; i++) {
1234 if (self->extra->children[i] == element)
1235 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001236 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237 break;
1238 }
1239
1240 if (i == self->extra->length) {
1241 /* element is not in children, so raise exception */
1242 PyErr_SetString(
1243 PyExc_ValueError,
1244 "list.remove(x): x not in list"
1245 );
1246 return NULL;
1247 }
1248
1249 Py_DECREF(self->extra->children[i]);
1250
1251 self->extra->length--;
1252
1253 for (; i < self->extra->length; i++)
1254 self->extra->children[i] = self->extra->children[i+1];
1255
1256 Py_RETURN_NONE;
1257}
1258
1259static PyObject*
1260element_repr(ElementObject* self)
1261{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001262 if (self->tag)
1263 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1264 else
1265 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001266}
1267
1268static PyObject*
1269element_set(ElementObject* self, PyObject* args)
1270{
1271 PyObject* attrib;
1272
1273 PyObject* key;
1274 PyObject* value;
1275 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1276 return NULL;
1277
1278 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001279 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280
1281 attrib = element_get_attrib(self);
1282 if (!attrib)
1283 return NULL;
1284
1285 if (PyDict_SetItem(attrib, key, value) < 0)
1286 return NULL;
1287
1288 Py_RETURN_NONE;
1289}
1290
1291static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001292element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001294 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295 int i;
1296 PyObject* old;
1297
1298 if (!self->extra || index < 0 || index >= self->extra->length) {
1299 PyErr_SetString(
1300 PyExc_IndexError,
1301 "child assignment index out of range");
1302 return -1;
1303 }
1304
1305 old = self->extra->children[index];
1306
1307 if (item) {
1308 Py_INCREF(item);
1309 self->extra->children[index] = item;
1310 } else {
1311 self->extra->length--;
1312 for (i = index; i < self->extra->length; i++)
1313 self->extra->children[i] = self->extra->children[i+1];
1314 }
1315
1316 Py_DECREF(old);
1317
1318 return 0;
1319}
1320
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001321static PyObject*
1322element_subscr(PyObject* self_, PyObject* item)
1323{
1324 ElementObject* self = (ElementObject*) self_;
1325
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326 if (PyIndex_Check(item)) {
1327 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001328
1329 if (i == -1 && PyErr_Occurred()) {
1330 return NULL;
1331 }
1332 if (i < 0 && self->extra)
1333 i += self->extra->length;
1334 return element_getitem(self_, i);
1335 }
1336 else if (PySlice_Check(item)) {
1337 Py_ssize_t start, stop, step, slicelen, cur, i;
1338 PyObject* list;
1339
1340 if (!self->extra)
1341 return PyList_New(0);
1342
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001343 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001344 self->extra->length,
1345 &start, &stop, &step, &slicelen) < 0) {
1346 return NULL;
1347 }
1348
1349 if (slicelen <= 0)
1350 return PyList_New(0);
1351 else {
1352 list = PyList_New(slicelen);
1353 if (!list)
1354 return NULL;
1355
1356 for (cur = start, i = 0; i < slicelen;
1357 cur += step, i++) {
1358 PyObject* item = self->extra->children[cur];
1359 Py_INCREF(item);
1360 PyList_SET_ITEM(list, i, item);
1361 }
1362
1363 return list;
1364 }
1365 }
1366 else {
1367 PyErr_SetString(PyExc_TypeError,
1368 "element indices must be integers");
1369 return NULL;
1370 }
1371}
1372
1373static int
1374element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1375{
1376 ElementObject* self = (ElementObject*) self_;
1377
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001378 if (PyIndex_Check(item)) {
1379 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001380
1381 if (i == -1 && PyErr_Occurred()) {
1382 return -1;
1383 }
1384 if (i < 0 && self->extra)
1385 i += self->extra->length;
1386 return element_setitem(self_, i, value);
1387 }
1388 else if (PySlice_Check(item)) {
1389 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1390
1391 PyObject* recycle = NULL;
1392 PyObject* seq = NULL;
1393
1394 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001395 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001396
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001397 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001398 self->extra->length,
1399 &start, &stop, &step, &slicelen) < 0) {
1400 return -1;
1401 }
1402
Eli Bendersky865756a2012-03-09 13:38:15 +02001403 if (value == NULL) {
1404 /* Delete slice */
1405 size_t cur;
1406 Py_ssize_t i;
1407
1408 if (slicelen <= 0)
1409 return 0;
1410
1411 /* Since we're deleting, the direction of the range doesn't matter,
1412 * so for simplicity make it always ascending.
1413 */
1414 if (step < 0) {
1415 stop = start + 1;
1416 start = stop + step * (slicelen - 1) - 1;
1417 step = -step;
1418 }
1419
1420 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1421
1422 /* recycle is a list that will contain all the children
1423 * scheduled for removal.
1424 */
1425 if (!(recycle = PyList_New(slicelen))) {
1426 PyErr_NoMemory();
1427 return -1;
1428 }
1429
1430 /* This loop walks over all the children that have to be deleted,
1431 * with cur pointing at them. num_moved is the amount of children
1432 * until the next deleted child that have to be "shifted down" to
1433 * occupy the deleted's places.
1434 * Note that in the ith iteration, shifting is done i+i places down
1435 * because i children were already removed.
1436 */
1437 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1438 /* Compute how many children have to be moved, clipping at the
1439 * list end.
1440 */
1441 Py_ssize_t num_moved = step - 1;
1442 if (cur + step >= (size_t)self->extra->length) {
1443 num_moved = self->extra->length - cur - 1;
1444 }
1445
1446 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1447
1448 memmove(
1449 self->extra->children + cur - i,
1450 self->extra->children + cur + 1,
1451 num_moved * sizeof(PyObject *));
1452 }
1453
1454 /* Leftover "tail" after the last removed child */
1455 cur = start + (size_t)slicelen * step;
1456 if (cur < (size_t)self->extra->length) {
1457 memmove(
1458 self->extra->children + cur - slicelen,
1459 self->extra->children + cur,
1460 (self->extra->length - cur) * sizeof(PyObject *));
1461 }
1462
1463 self->extra->length -= slicelen;
1464
1465 /* Discard the recycle list with all the deleted sub-elements */
1466 Py_XDECREF(recycle);
1467 return 0;
1468 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001469 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001470 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001471 seq = PySequence_Fast(value, "");
1472 if (!seq) {
1473 PyErr_Format(
1474 PyExc_TypeError,
1475 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1476 );
1477 return -1;
1478 }
1479 newlen = PySequence_Size(seq);
1480 }
1481
1482 if (step != 1 && newlen != slicelen)
1483 {
1484 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001485 "attempt to assign sequence of size %zd "
1486 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001487 newlen, slicelen
1488 );
1489 return -1;
1490 }
1491
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492 /* Resize before creating the recycle bin, to prevent refleaks. */
1493 if (newlen > slicelen) {
1494 if (element_resize(self, newlen - slicelen) < 0) {
1495 if (seq) {
1496 Py_DECREF(seq);
1497 }
1498 return -1;
1499 }
1500 }
1501
1502 if (slicelen > 0) {
1503 /* to avoid recursive calls to this method (via decref), move
1504 old items to the recycle bin here, and get rid of them when
1505 we're done modifying the element */
1506 recycle = PyList_New(slicelen);
1507 if (!recycle) {
1508 if (seq) {
1509 Py_DECREF(seq);
1510 }
1511 return -1;
1512 }
1513 for (cur = start, i = 0; i < slicelen;
1514 cur += step, i++)
1515 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1516 }
1517
1518 if (newlen < slicelen) {
1519 /* delete slice */
1520 for (i = stop; i < self->extra->length; i++)
1521 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1522 } else if (newlen > slicelen) {
1523 /* insert slice */
1524 for (i = self->extra->length-1; i >= stop; i--)
1525 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1526 }
1527
1528 /* replace the slice */
1529 for (cur = start, i = 0; i < newlen;
1530 cur += step, i++) {
1531 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1532 Py_INCREF(element);
1533 self->extra->children[cur] = element;
1534 }
1535
1536 self->extra->length += newlen - slicelen;
1537
1538 if (seq) {
1539 Py_DECREF(seq);
1540 }
1541
1542 /* discard the recycle bin, and everything in it */
1543 Py_XDECREF(recycle);
1544
1545 return 0;
1546 }
1547 else {
1548 PyErr_SetString(PyExc_TypeError,
1549 "element indices must be integers");
1550 return -1;
1551 }
1552}
1553
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001554static PyMethodDef element_methods[] = {
1555
Eli Bendersky0192ba32012-03-30 16:38:33 +03001556 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557
1558 {"get", (PyCFunction) element_get, METH_VARARGS},
1559 {"set", (PyCFunction) element_set, METH_VARARGS},
1560
Eli Bendersky737b1732012-05-29 06:02:56 +03001561 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1562 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1563 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564
1565 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001566 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1568 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1569
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001570 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1571 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001572 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001573
1574 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1576
1577 {"items", (PyCFunction) element_items, METH_VARARGS},
1578 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1579
1580 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1581
1582 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1583 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001584 {"__sizeof__", element_sizeof, METH_NOARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 {NULL, NULL}
1587};
1588
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001589static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001590element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591{
1592 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001593 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001595 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001596 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001597
Alexander Belopolskye239d232010-12-08 23:31:48 +00001598 if (name == NULL)
1599 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001601 /* handle common attributes first */
1602 if (strcmp(name, "tag") == 0) {
1603 res = self->tag;
1604 Py_INCREF(res);
1605 return res;
1606 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001608 Py_INCREF(res);
1609 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610 }
1611
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001612 /* methods */
1613 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1614 if (res)
1615 return res;
1616
1617 /* less common attributes */
1618 if (strcmp(name, "tail") == 0) {
1619 PyErr_Clear();
1620 res = element_get_tail(self);
1621 } else if (strcmp(name, "attrib") == 0) {
1622 PyErr_Clear();
1623 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001624 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001625 res = element_get_attrib(self);
1626 }
1627
1628 if (!res)
1629 return NULL;
1630
1631 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 return res;
1633}
1634
Eli Benderskyb20df952012-05-20 06:33:29 +03001635static PyObject*
1636element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637{
Eli Benderskyb20df952012-05-20 06:33:29 +03001638 char *name = "";
1639 if (PyUnicode_Check(nameobj))
1640 name = _PyUnicode_AsString(nameobj);
1641
1642 if (name == NULL)
1643 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644
1645 if (strcmp(name, "tag") == 0) {
1646 Py_DECREF(self->tag);
1647 self->tag = value;
1648 Py_INCREF(self->tag);
1649 } else if (strcmp(name, "text") == 0) {
1650 Py_DECREF(JOIN_OBJ(self->text));
1651 self->text = value;
1652 Py_INCREF(self->text);
1653 } else if (strcmp(name, "tail") == 0) {
1654 Py_DECREF(JOIN_OBJ(self->tail));
1655 self->tail = value;
1656 Py_INCREF(self->tail);
1657 } else if (strcmp(name, "attrib") == 0) {
1658 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001659 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001660 Py_DECREF(self->extra->attrib);
1661 self->extra->attrib = value;
1662 Py_INCREF(self->extra->attrib);
1663 } else {
1664 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001665 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666 }
1667
Eli Benderskyb20df952012-05-20 06:33:29 +03001668 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669}
1670
1671static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673 0, /* sq_concat */
1674 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001675 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001676 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001677 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001678 0,
1679};
1680
1681static PyMappingMethods element_as_mapping = {
1682 (lenfunc) element_length,
1683 (binaryfunc) element_subscr,
1684 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001685};
1686
Neal Norwitz227b5332006-03-22 09:28:35 +00001687static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001688 PyVarObject_HEAD_INIT(NULL, 0)
1689 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001690 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001691 (destructor)element_dealloc, /* tp_dealloc */
1692 0, /* tp_print */
1693 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001694 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001695 0, /* tp_reserved */
1696 (reprfunc)element_repr, /* tp_repr */
1697 0, /* tp_as_number */
1698 &element_as_sequence, /* tp_as_sequence */
1699 &element_as_mapping, /* tp_as_mapping */
1700 0, /* tp_hash */
1701 0, /* tp_call */
1702 0, /* tp_str */
1703 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001704 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001705 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001706 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1707 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001708 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001709 (traverseproc)element_gc_traverse, /* tp_traverse */
1710 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001711 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001712 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001713 0, /* tp_iter */
1714 0, /* tp_iternext */
1715 element_methods, /* tp_methods */
1716 0, /* tp_members */
1717 0, /* tp_getset */
1718 0, /* tp_base */
1719 0, /* tp_dict */
1720 0, /* tp_descr_get */
1721 0, /* tp_descr_set */
1722 0, /* tp_dictoffset */
1723 (initproc)element_init, /* tp_init */
1724 PyType_GenericAlloc, /* tp_alloc */
1725 element_new, /* tp_new */
1726 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727};
1728
Eli Bendersky64d11e62012-06-15 07:42:50 +03001729/******************************* Element iterator ****************************/
1730
1731/* ElementIterObject represents the iteration state over an XML element in
1732 * pre-order traversal. To keep track of which sub-element should be returned
1733 * next, a stack of parents is maintained. This is a standard stack-based
1734 * iterative pre-order traversal of a tree.
1735 * The stack is managed using a single-linked list starting at parent_stack.
1736 * Each stack node contains the saved parent to which we should return after
1737 * the current one is exhausted, and the next child to examine in that parent.
1738 */
1739typedef struct ParentLocator_t {
1740 ElementObject *parent;
1741 Py_ssize_t child_index;
1742 struct ParentLocator_t *next;
1743} ParentLocator;
1744
1745typedef struct {
1746 PyObject_HEAD
1747 ParentLocator *parent_stack;
1748 ElementObject *root_element;
1749 PyObject *sought_tag;
1750 int root_done;
1751 int gettext;
1752} ElementIterObject;
1753
1754
1755static void
1756elementiter_dealloc(ElementIterObject *it)
1757{
1758 ParentLocator *p = it->parent_stack;
1759 while (p) {
1760 ParentLocator *temp = p;
1761 Py_XDECREF(p->parent);
1762 p = p->next;
1763 PyObject_Free(temp);
1764 }
1765
1766 Py_XDECREF(it->sought_tag);
1767 Py_XDECREF(it->root_element);
1768
1769 PyObject_GC_UnTrack(it);
1770 PyObject_GC_Del(it);
1771}
1772
1773static int
1774elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1775{
1776 ParentLocator *p = it->parent_stack;
1777 while (p) {
1778 Py_VISIT(p->parent);
1779 p = p->next;
1780 }
1781
1782 Py_VISIT(it->root_element);
1783 Py_VISIT(it->sought_tag);
1784 return 0;
1785}
1786
1787/* Helper function for elementiter_next. Add a new parent to the parent stack.
1788 */
1789static ParentLocator *
1790parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1791{
1792 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1793 if (new_node) {
1794 new_node->parent = parent;
1795 Py_INCREF(parent);
1796 new_node->child_index = 0;
1797 new_node->next = stack;
1798 }
1799 return new_node;
1800}
1801
1802static PyObject *
1803elementiter_next(ElementIterObject *it)
1804{
1805 /* Sub-element iterator.
1806 *
1807 * A short note on gettext: this function serves both the iter() and
1808 * itertext() methods to avoid code duplication. However, there are a few
1809 * small differences in the way these iterations work. Namely:
1810 * - itertext() only yields text from nodes that have it, and continues
1811 * iterating when a node doesn't have text (so it doesn't return any
1812 * node like iter())
1813 * - itertext() also has to handle tail, after finishing with all the
1814 * children of a node.
1815 */
Eli Bendersky113da642012-06-15 07:52:49 +03001816 ElementObject *cur_parent;
1817 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001818
1819 while (1) {
1820 /* Handle the case reached in the beginning and end of iteration, where
1821 * the parent stack is empty. The root_done flag gives us indication
1822 * whether we've just started iterating (so root_done is 0), in which
1823 * case the root is returned. If root_done is 1 and we're here, the
1824 * iterator is exhausted.
1825 */
1826 if (!it->parent_stack->parent) {
1827 if (it->root_done) {
1828 PyErr_SetNone(PyExc_StopIteration);
1829 return NULL;
1830 } else {
1831 it->parent_stack = parent_stack_push_new(it->parent_stack,
1832 it->root_element);
1833 if (!it->parent_stack) {
1834 PyErr_NoMemory();
1835 return NULL;
1836 }
1837
1838 it->root_done = 1;
1839 if (it->sought_tag == Py_None ||
1840 PyObject_RichCompareBool(it->root_element->tag,
1841 it->sought_tag, Py_EQ) == 1) {
1842 if (it->gettext) {
1843 PyObject *text = JOIN_OBJ(it->root_element->text);
1844 if (PyObject_IsTrue(text)) {
1845 Py_INCREF(text);
1846 return text;
1847 }
1848 } else {
1849 Py_INCREF(it->root_element);
1850 return (PyObject *)it->root_element;
1851 }
1852 }
1853 }
1854 }
1855
1856 /* See if there are children left to traverse in the current parent. If
1857 * yes, visit the next child. If not, pop the stack and try again.
1858 */
Eli Bendersky113da642012-06-15 07:52:49 +03001859 cur_parent = it->parent_stack->parent;
1860 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001861 if (cur_parent->extra && child_index < cur_parent->extra->length) {
1862 ElementObject *child = (ElementObject *)
1863 cur_parent->extra->children[child_index];
1864 it->parent_stack->child_index++;
1865 it->parent_stack = parent_stack_push_new(it->parent_stack,
1866 child);
1867 if (!it->parent_stack) {
1868 PyErr_NoMemory();
1869 return NULL;
1870 }
1871
1872 if (it->gettext) {
1873 PyObject *text = JOIN_OBJ(child->text);
1874 if (PyObject_IsTrue(text)) {
1875 Py_INCREF(text);
1876 return text;
1877 }
1878 } else if (it->sought_tag == Py_None ||
1879 PyObject_RichCompareBool(child->tag,
1880 it->sought_tag, Py_EQ) == 1) {
1881 Py_INCREF(child);
1882 return (PyObject *)child;
1883 }
1884 else
1885 continue;
1886 }
1887 else {
1888 PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
1889 ParentLocator *next = it->parent_stack->next;
1890 Py_XDECREF(it->parent_stack->parent);
1891 PyObject_Free(it->parent_stack);
1892 it->parent_stack = next;
1893
1894 /* Note that extra condition on it->parent_stack->parent here;
1895 * this is because itertext() is supposed to only return *inner*
1896 * text, not text following the element it began iteration with.
1897 */
1898 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
1899 Py_INCREF(tail);
1900 return tail;
1901 }
1902 }
1903 }
1904
1905 return NULL;
1906}
1907
1908
1909static PyTypeObject ElementIter_Type = {
1910 PyVarObject_HEAD_INIT(NULL, 0)
1911 "_elementtree._element_iterator", /* tp_name */
1912 sizeof(ElementIterObject), /* tp_basicsize */
1913 0, /* tp_itemsize */
1914 /* methods */
1915 (destructor)elementiter_dealloc, /* tp_dealloc */
1916 0, /* tp_print */
1917 0, /* tp_getattr */
1918 0, /* tp_setattr */
1919 0, /* tp_reserved */
1920 0, /* tp_repr */
1921 0, /* tp_as_number */
1922 0, /* tp_as_sequence */
1923 0, /* tp_as_mapping */
1924 0, /* tp_hash */
1925 0, /* tp_call */
1926 0, /* tp_str */
1927 0, /* tp_getattro */
1928 0, /* tp_setattro */
1929 0, /* tp_as_buffer */
1930 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1931 0, /* tp_doc */
1932 (traverseproc)elementiter_traverse, /* tp_traverse */
1933 0, /* tp_clear */
1934 0, /* tp_richcompare */
1935 0, /* tp_weaklistoffset */
1936 PyObject_SelfIter, /* tp_iter */
1937 (iternextfunc)elementiter_next, /* tp_iternext */
1938 0, /* tp_methods */
1939 0, /* tp_members */
1940 0, /* tp_getset */
1941 0, /* tp_base */
1942 0, /* tp_dict */
1943 0, /* tp_descr_get */
1944 0, /* tp_descr_set */
1945 0, /* tp_dictoffset */
1946 0, /* tp_init */
1947 0, /* tp_alloc */
1948 0, /* tp_new */
1949};
1950
1951
1952static PyObject *
1953create_elementiter(ElementObject *self, PyObject *tag, int gettext)
1954{
1955 ElementIterObject *it;
1956 PyObject *star = NULL;
1957
1958 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
1959 if (!it)
1960 return NULL;
1961 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
1962 PyObject_GC_Del(it);
1963 return NULL;
1964 }
1965
1966 it->parent_stack->parent = NULL;
1967 it->parent_stack->child_index = 0;
1968 it->parent_stack->next = NULL;
1969
1970 if (PyUnicode_Check(tag))
1971 star = PyUnicode_FromString("*");
1972 else if (PyBytes_Check(tag))
1973 star = PyBytes_FromString("*");
1974
1975 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
1976 tag = Py_None;
1977
1978 Py_XDECREF(star);
1979 it->sought_tag = tag;
1980 it->root_done = 0;
1981 it->gettext = gettext;
1982 it->root_element = self;
1983
1984 Py_INCREF(self);
1985 Py_INCREF(tag);
1986
1987 PyObject_GC_Track(it);
1988 return (PyObject *)it;
1989}
1990
1991
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001992/* ==================================================================== */
1993/* the tree builder type */
1994
1995typedef struct {
1996 PyObject_HEAD
1997
Eli Bendersky58d548d2012-05-29 15:45:16 +03001998 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001999
Antoine Pitrouee329312012-10-04 19:53:29 +02002000 PyObject *this; /* current node */
2001 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002002
Eli Bendersky58d548d2012-05-29 15:45:16 +03002003 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002004
Eli Bendersky58d548d2012-05-29 15:45:16 +03002005 PyObject *stack; /* element stack */
2006 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002007
Eli Bendersky48d358b2012-05-30 17:57:50 +03002008 PyObject *element_factory;
2009
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002010 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002011 PyObject *events; /* list of events, or NULL if not collecting */
2012 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2013 PyObject *end_event_obj;
2014 PyObject *start_ns_event_obj;
2015 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002016} TreeBuilderObject;
2017
Neal Norwitz227b5332006-03-22 09:28:35 +00002018static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019
Christian Heimes90aa7642007-12-19 02:45:37 +00002020#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002021
2022/* -------------------------------------------------------------------- */
2023/* constructor and destructor */
2024
Eli Bendersky58d548d2012-05-29 15:45:16 +03002025static PyObject *
2026treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002027{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002028 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2029 if (t != NULL) {
2030 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002031
Eli Bendersky58d548d2012-05-29 15:45:16 +03002032 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002033 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002034 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002035 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036
Eli Bendersky58d548d2012-05-29 15:45:16 +03002037 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002038 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002039 t->stack = PyList_New(20);
2040 if (!t->stack) {
2041 Py_DECREF(t->this);
2042 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002043 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002044 return NULL;
2045 }
2046 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002047
Eli Bendersky58d548d2012-05-29 15:45:16 +03002048 t->events = NULL;
2049 t->start_event_obj = t->end_event_obj = NULL;
2050 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2051 }
2052 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002053}
2054
Eli Bendersky58d548d2012-05-29 15:45:16 +03002055static int
2056treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002057{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002058 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002059 PyObject *element_factory = NULL;
2060 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002061 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002062
2063 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2064 &element_factory)) {
2065 return -1;
2066 }
2067
2068 if (element_factory) {
2069 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002070 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002071 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002072 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002073 }
2074
Eli Bendersky58d548d2012-05-29 15:45:16 +03002075 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002076}
2077
Eli Bendersky48d358b2012-05-30 17:57:50 +03002078static int
2079treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2080{
2081 Py_VISIT(self->root);
2082 Py_VISIT(self->this);
2083 Py_VISIT(self->last);
2084 Py_VISIT(self->data);
2085 Py_VISIT(self->stack);
2086 Py_VISIT(self->element_factory);
2087 return 0;
2088}
2089
2090static int
2091treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002092{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002093 Py_CLEAR(self->end_ns_event_obj);
2094 Py_CLEAR(self->start_ns_event_obj);
2095 Py_CLEAR(self->end_event_obj);
2096 Py_CLEAR(self->start_event_obj);
2097 Py_CLEAR(self->events);
2098 Py_CLEAR(self->stack);
2099 Py_CLEAR(self->data);
2100 Py_CLEAR(self->last);
2101 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002102 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002103 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002104 return 0;
2105}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002106
Eli Bendersky48d358b2012-05-30 17:57:50 +03002107static void
2108treebuilder_dealloc(TreeBuilderObject *self)
2109{
2110 PyObject_GC_UnTrack(self);
2111 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002112 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002113}
2114
2115/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002116/* helpers for handling of arbitrary element-like objects */
2117
2118static int
2119treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2120 PyObject **dest, _Py_Identifier *name)
2121{
2122 if (Element_CheckExact(element)) {
2123 Py_DECREF(JOIN_OBJ(*dest));
2124 *dest = JOIN_SET(data, PyList_CheckExact(data));
2125 return 0;
2126 }
2127 else {
2128 PyObject *joined = list_join(data);
2129 int r;
2130 if (joined == NULL)
2131 return -1;
2132 r = _PyObject_SetAttrId(element, name, joined);
2133 Py_DECREF(joined);
2134 return r;
2135 }
2136}
2137
2138/* These two functions steal a reference to data */
2139static int
2140treebuilder_set_element_text(PyObject *element, PyObject *data)
2141{
2142 _Py_IDENTIFIER(text);
2143 return treebuilder_set_element_text_or_tail(
2144 element, data, &((ElementObject *) element)->text, &PyId_text);
2145}
2146
2147static int
2148treebuilder_set_element_tail(PyObject *element, PyObject *data)
2149{
2150 _Py_IDENTIFIER(tail);
2151 return treebuilder_set_element_text_or_tail(
2152 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2153}
2154
2155static int
2156treebuilder_add_subelement(PyObject *element, PyObject *child)
2157{
2158 _Py_IDENTIFIER(append);
2159 if (Element_CheckExact(element)) {
2160 ElementObject *elem = (ElementObject *) element;
2161 return element_add_subelement(elem, child);
2162 }
2163 else {
2164 PyObject *res;
2165 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2166 if (res == NULL)
2167 return -1;
2168 Py_DECREF(res);
2169 return 0;
2170 }
2171}
2172
2173/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002174/* handlers */
2175
2176LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002177treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2178 PyObject* attrib)
2179{
2180 PyObject* node;
2181 PyObject* this;
2182
2183 if (self->data) {
2184 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002185 if (treebuilder_set_element_text(self->last, self->data))
2186 return NULL;
2187 }
2188 else {
2189 if (treebuilder_set_element_tail(self->last, self->data))
2190 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002191 }
2192 self->data = NULL;
2193 }
2194
Eli Bendersky48d358b2012-05-30 17:57:50 +03002195 if (self->element_factory) {
2196 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2197 } else {
2198 node = create_new_element(tag, attrib);
2199 }
2200 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002201 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002202 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002203
Antoine Pitrouee329312012-10-04 19:53:29 +02002204 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002205
2206 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002207 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002208 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002209 } else {
2210 if (self->root) {
2211 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002212 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002213 "multiple elements on top level"
2214 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002215 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216 }
2217 Py_INCREF(node);
2218 self->root = node;
2219 }
2220
2221 if (self->index < PyList_GET_SIZE(self->stack)) {
2222 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002223 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002224 Py_INCREF(this);
2225 } else {
2226 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002227 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002228 }
2229 self->index++;
2230
2231 Py_DECREF(this);
2232 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002233 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002234
2235 Py_DECREF(self->last);
2236 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002237 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002238
2239 if (self->start_event_obj) {
2240 PyObject* res;
2241 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002242 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002243 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002244 PyList_Append(self->events, res);
2245 Py_DECREF(res);
2246 } else
2247 PyErr_Clear(); /* FIXME: propagate error */
2248 }
2249
2250 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002251
2252 error:
2253 Py_DECREF(node);
2254 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002255}
2256
2257LOCAL(PyObject*)
2258treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2259{
2260 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002261 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002262 /* ignore calls to data before the first call to start */
2263 Py_RETURN_NONE;
2264 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265 /* store the first item as is */
2266 Py_INCREF(data); self->data = data;
2267 } else {
2268 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002269 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2270 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002271 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272 /* expat often generates single character data sections; handle
2273 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002274 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2275 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002276 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002277 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278 } else if (PyList_CheckExact(self->data)) {
2279 if (PyList_Append(self->data, data) < 0)
2280 return NULL;
2281 } else {
2282 PyObject* list = PyList_New(2);
2283 if (!list)
2284 return NULL;
2285 PyList_SET_ITEM(list, 0, self->data);
2286 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2287 self->data = list;
2288 }
2289 }
2290
2291 Py_RETURN_NONE;
2292}
2293
2294LOCAL(PyObject*)
2295treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2296{
2297 PyObject* item;
2298
2299 if (self->data) {
2300 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002301 if (treebuilder_set_element_text(self->last, self->data))
2302 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002303 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002304 if (treebuilder_set_element_tail(self->last, self->data))
2305 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002306 }
2307 self->data = NULL;
2308 }
2309
2310 if (self->index == 0) {
2311 PyErr_SetString(
2312 PyExc_IndexError,
2313 "pop from empty stack"
2314 );
2315 return NULL;
2316 }
2317
2318 self->index--;
2319
2320 item = PyList_GET_ITEM(self->stack, self->index);
2321 Py_INCREF(item);
2322
2323 Py_DECREF(self->last);
2324
Antoine Pitrouee329312012-10-04 19:53:29 +02002325 self->last = self->this;
2326 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327
2328 if (self->end_event_obj) {
2329 PyObject* res;
2330 PyObject* action = self->end_event_obj;
2331 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002332 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002333 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334 PyList_Append(self->events, res);
2335 Py_DECREF(res);
2336 } else
2337 PyErr_Clear(); /* FIXME: propagate error */
2338 }
2339
2340 Py_INCREF(self->last);
2341 return (PyObject*) self->last;
2342}
2343
2344LOCAL(void)
2345treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002346 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347{
2348 PyObject* res;
2349 PyObject* action;
2350 PyObject* parcel;
2351
2352 if (!self->events)
2353 return;
2354
2355 if (start) {
2356 if (!self->start_ns_event_obj)
2357 return;
2358 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002359 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002360 if (!parcel)
2361 return;
2362 Py_INCREF(action);
2363 } else {
2364 if (!self->end_ns_event_obj)
2365 return;
2366 action = self->end_ns_event_obj;
2367 Py_INCREF(action);
2368 parcel = Py_None;
2369 Py_INCREF(parcel);
2370 }
2371
2372 res = PyTuple_New(2);
2373
2374 if (res) {
2375 PyTuple_SET_ITEM(res, 0, action);
2376 PyTuple_SET_ITEM(res, 1, parcel);
2377 PyList_Append(self->events, res);
2378 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002379 }
2380 else {
2381 Py_DECREF(action);
2382 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002384 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385}
2386
2387/* -------------------------------------------------------------------- */
2388/* methods (in alphabetical order) */
2389
2390static PyObject*
2391treebuilder_data(TreeBuilderObject* self, PyObject* args)
2392{
2393 PyObject* data;
2394 if (!PyArg_ParseTuple(args, "O:data", &data))
2395 return NULL;
2396
2397 return treebuilder_handle_data(self, data);
2398}
2399
2400static PyObject*
2401treebuilder_end(TreeBuilderObject* self, PyObject* args)
2402{
2403 PyObject* tag;
2404 if (!PyArg_ParseTuple(args, "O:end", &tag))
2405 return NULL;
2406
2407 return treebuilder_handle_end(self, tag);
2408}
2409
2410LOCAL(PyObject*)
2411treebuilder_done(TreeBuilderObject* self)
2412{
2413 PyObject* res;
2414
2415 /* FIXME: check stack size? */
2416
2417 if (self->root)
2418 res = self->root;
2419 else
2420 res = Py_None;
2421
2422 Py_INCREF(res);
2423 return res;
2424}
2425
2426static PyObject*
2427treebuilder_close(TreeBuilderObject* self, PyObject* args)
2428{
2429 if (!PyArg_ParseTuple(args, ":close"))
2430 return NULL;
2431
2432 return treebuilder_done(self);
2433}
2434
2435static PyObject*
2436treebuilder_start(TreeBuilderObject* self, PyObject* args)
2437{
2438 PyObject* tag;
2439 PyObject* attrib = Py_None;
2440 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2441 return NULL;
2442
2443 return treebuilder_handle_start(self, tag, attrib);
2444}
2445
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002446static PyMethodDef treebuilder_methods[] = {
2447 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2448 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2449 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002450 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2451 {NULL, NULL}
2452};
2453
Neal Norwitz227b5332006-03-22 09:28:35 +00002454static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002455 PyVarObject_HEAD_INIT(NULL, 0)
2456 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002458 (destructor)treebuilder_dealloc, /* tp_dealloc */
2459 0, /* tp_print */
2460 0, /* tp_getattr */
2461 0, /* tp_setattr */
2462 0, /* tp_reserved */
2463 0, /* tp_repr */
2464 0, /* tp_as_number */
2465 0, /* tp_as_sequence */
2466 0, /* tp_as_mapping */
2467 0, /* tp_hash */
2468 0, /* tp_call */
2469 0, /* tp_str */
2470 0, /* tp_getattro */
2471 0, /* tp_setattro */
2472 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002473 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2474 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002475 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002476 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2477 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002478 0, /* tp_richcompare */
2479 0, /* tp_weaklistoffset */
2480 0, /* tp_iter */
2481 0, /* tp_iternext */
2482 treebuilder_methods, /* tp_methods */
2483 0, /* tp_members */
2484 0, /* tp_getset */
2485 0, /* tp_base */
2486 0, /* tp_dict */
2487 0, /* tp_descr_get */
2488 0, /* tp_descr_set */
2489 0, /* tp_dictoffset */
2490 (initproc)treebuilder_init, /* tp_init */
2491 PyType_GenericAlloc, /* tp_alloc */
2492 treebuilder_new, /* tp_new */
2493 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002494};
2495
2496/* ==================================================================== */
2497/* the expat interface */
2498
2499#if defined(USE_EXPAT)
2500
2501#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002503static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505
Eli Bendersky52467b12012-06-01 07:13:08 +03002506static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2507 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2508
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509typedef struct {
2510 PyObject_HEAD
2511
2512 XML_Parser parser;
2513
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002514 PyObject *target;
2515 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002517 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002519 PyObject *handle_start;
2520 PyObject *handle_data;
2521 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002523 PyObject *handle_comment;
2524 PyObject *handle_pi;
2525 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002527 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002528
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529} XMLParserObject;
2530
Neal Norwitz227b5332006-03-22 09:28:35 +00002531static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002533#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2534
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535/* helpers */
2536
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537LOCAL(PyObject*)
2538makeuniversal(XMLParserObject* self, const char* string)
2539{
2540 /* convert a UTF-8 tag/attribute name from the expat parser
2541 to a universal name string */
2542
Antoine Pitrouc1948842012-10-01 23:40:37 +02002543 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 PyObject* key;
2545 PyObject* value;
2546
2547 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002548 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549 if (!key)
2550 return NULL;
2551
2552 value = PyDict_GetItem(self->names, key);
2553
2554 if (value) {
2555 Py_INCREF(value);
2556 } else {
2557 /* new name. convert to universal name, and decode as
2558 necessary */
2559
2560 PyObject* tag;
2561 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002562 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563
2564 /* look for namespace separator */
2565 for (i = 0; i < size; i++)
2566 if (string[i] == '}')
2567 break;
2568 if (i != size) {
2569 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002570 tag = PyBytes_FromStringAndSize(NULL, size+1);
2571 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 p[0] = '{';
2573 memcpy(p+1, string, size);
2574 size++;
2575 } else {
2576 /* plain name; use key as tag */
2577 Py_INCREF(key);
2578 tag = key;
2579 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002580
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002582 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002583 value = PyUnicode_DecodeUTF8(p, size, "strict");
2584 Py_DECREF(tag);
2585 if (!value) {
2586 Py_DECREF(key);
2587 return NULL;
2588 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589
2590 /* add to names dictionary */
2591 if (PyDict_SetItem(self->names, key, value) < 0) {
2592 Py_DECREF(key);
2593 Py_DECREF(value);
2594 return NULL;
2595 }
2596 }
2597
2598 Py_DECREF(key);
2599 return value;
2600}
2601
Eli Bendersky5b77d812012-03-16 08:20:05 +02002602/* Set the ParseError exception with the given parameters.
2603 * If message is not NULL, it's used as the error string. Otherwise, the
2604 * message string is the default for the given error_code.
2605*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002606static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002607expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002608{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002609 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002610
Victor Stinner499dfcf2011-03-21 13:26:24 +01002611 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002612 message ? message : EXPAT(ErrorString)(error_code),
2613 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002614 if (errmsg == NULL)
2615 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002616
Victor Stinner499dfcf2011-03-21 13:26:24 +01002617 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2618 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002619 if (!error)
2620 return;
2621
Eli Bendersky5b77d812012-03-16 08:20:05 +02002622 /* Add code and position attributes */
2623 code = PyLong_FromLong((long)error_code);
2624 if (!code) {
2625 Py_DECREF(error);
2626 return;
2627 }
2628 if (PyObject_SetAttrString(error, "code", code) == -1) {
2629 Py_DECREF(error);
2630 Py_DECREF(code);
2631 return;
2632 }
2633 Py_DECREF(code);
2634
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002635 position = Py_BuildValue("(ii)", line, column);
2636 if (!position) {
2637 Py_DECREF(error);
2638 return;
2639 }
2640 if (PyObject_SetAttrString(error, "position", position) == -1) {
2641 Py_DECREF(error);
2642 Py_DECREF(position);
2643 return;
2644 }
2645 Py_DECREF(position);
2646
2647 PyErr_SetObject(elementtree_parseerror_obj, error);
2648 Py_DECREF(error);
2649}
2650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651/* -------------------------------------------------------------------- */
2652/* handlers */
2653
2654static void
2655expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2656 int data_len)
2657{
2658 PyObject* key;
2659 PyObject* value;
2660 PyObject* res;
2661
2662 if (data_len < 2 || data_in[0] != '&')
2663 return;
2664
Neal Norwitz0269b912007-08-08 06:56:02 +00002665 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666 if (!key)
2667 return;
2668
2669 value = PyDict_GetItem(self->entity, key);
2670
2671 if (value) {
2672 if (TreeBuilder_CheckExact(self->target))
2673 res = treebuilder_handle_data(
2674 (TreeBuilderObject*) self->target, value
2675 );
2676 else if (self->handle_data)
2677 res = PyObject_CallFunction(self->handle_data, "O", value);
2678 else
2679 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002680 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002681 } else if (!PyErr_Occurred()) {
2682 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002683 char message[128] = "undefined entity ";
2684 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002685 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002686 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002688 EXPAT(GetErrorColumnNumber)(self->parser),
2689 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690 );
2691 }
2692
2693 Py_DECREF(key);
2694}
2695
2696static void
2697expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2698 const XML_Char **attrib_in)
2699{
2700 PyObject* res;
2701 PyObject* tag;
2702 PyObject* attrib;
2703 int ok;
2704
2705 /* tag name */
2706 tag = makeuniversal(self, tag_in);
2707 if (!tag)
2708 return; /* parser will look for errors */
2709
2710 /* attributes */
2711 if (attrib_in[0]) {
2712 attrib = PyDict_New();
2713 if (!attrib)
2714 return;
2715 while (attrib_in[0] && attrib_in[1]) {
2716 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002717 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718 if (!key || !value) {
2719 Py_XDECREF(value);
2720 Py_XDECREF(key);
2721 Py_DECREF(attrib);
2722 return;
2723 }
2724 ok = PyDict_SetItem(attrib, key, value);
2725 Py_DECREF(value);
2726 Py_DECREF(key);
2727 if (ok < 0) {
2728 Py_DECREF(attrib);
2729 return;
2730 }
2731 attrib_in += 2;
2732 }
2733 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002734 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002735 attrib = PyDict_New();
2736 if (!attrib)
2737 return;
2738 }
2739
2740 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741 /* shortcut */
2742 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2743 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002744 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002745 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002747 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748 res = NULL;
2749
2750 Py_DECREF(tag);
2751 Py_DECREF(attrib);
2752
2753 Py_XDECREF(res);
2754}
2755
2756static void
2757expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2758 int data_len)
2759{
2760 PyObject* data;
2761 PyObject* res;
2762
Neal Norwitz0269b912007-08-08 06:56:02 +00002763 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002764 if (!data)
2765 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766
2767 if (TreeBuilder_CheckExact(self->target))
2768 /* shortcut */
2769 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2770 else if (self->handle_data)
2771 res = PyObject_CallFunction(self->handle_data, "O", data);
2772 else
2773 res = NULL;
2774
2775 Py_DECREF(data);
2776
2777 Py_XDECREF(res);
2778}
2779
2780static void
2781expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2782{
2783 PyObject* tag;
2784 PyObject* res = NULL;
2785
2786 if (TreeBuilder_CheckExact(self->target))
2787 /* shortcut */
2788 /* the standard tree builder doesn't look at the end tag */
2789 res = treebuilder_handle_end(
2790 (TreeBuilderObject*) self->target, Py_None
2791 );
2792 else if (self->handle_end) {
2793 tag = makeuniversal(self, tag_in);
2794 if (tag) {
2795 res = PyObject_CallFunction(self->handle_end, "O", tag);
2796 Py_DECREF(tag);
2797 }
2798 }
2799
2800 Py_XDECREF(res);
2801}
2802
2803static void
2804expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2805 const XML_Char *uri)
2806{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002807 PyObject* sprefix = NULL;
2808 PyObject* suri = NULL;
2809
2810 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2811 if (!suri)
2812 return;
2813
2814 if (prefix)
2815 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2816 else
2817 sprefix = PyUnicode_FromString("");
2818 if (!sprefix) {
2819 Py_DECREF(suri);
2820 return;
2821 }
2822
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826
2827 Py_DECREF(sprefix);
2828 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829}
2830
2831static void
2832expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2833{
2834 treebuilder_handle_namespace(
2835 (TreeBuilderObject*) self->target, 0, NULL, NULL
2836 );
2837}
2838
2839static void
2840expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2841{
2842 PyObject* comment;
2843 PyObject* res;
2844
2845 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002846 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002847 if (comment) {
2848 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2849 Py_XDECREF(res);
2850 Py_DECREF(comment);
2851 }
2852 }
2853}
2854
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002855static void
2856expat_start_doctype_handler(XMLParserObject *self,
2857 const XML_Char *doctype_name,
2858 const XML_Char *sysid,
2859 const XML_Char *pubid,
2860 int has_internal_subset)
2861{
2862 PyObject *self_pyobj = (PyObject *)self;
2863 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
2864 PyObject *parser_doctype = NULL;
2865 PyObject *res = NULL;
2866
2867 doctype_name_obj = makeuniversal(self, doctype_name);
2868 if (!doctype_name_obj)
2869 return;
2870
2871 if (sysid) {
2872 sysid_obj = makeuniversal(self, sysid);
2873 if (!sysid_obj) {
2874 Py_DECREF(doctype_name_obj);
2875 return;
2876 }
2877 } else {
2878 Py_INCREF(Py_None);
2879 sysid_obj = Py_None;
2880 }
2881
2882 if (pubid) {
2883 pubid_obj = makeuniversal(self, pubid);
2884 if (!pubid_obj) {
2885 Py_DECREF(doctype_name_obj);
2886 Py_DECREF(sysid_obj);
2887 return;
2888 }
2889 } else {
2890 Py_INCREF(Py_None);
2891 pubid_obj = Py_None;
2892 }
2893
2894 /* If the target has a handler for doctype, call it. */
2895 if (self->handle_doctype) {
2896 res = PyObject_CallFunction(self->handle_doctype, "OOO",
2897 doctype_name_obj, pubid_obj, sysid_obj);
2898 Py_CLEAR(res);
2899 }
2900
2901 /* Now see if the parser itself has a doctype method. If yes and it's
2902 * a subclass, call it but warn about deprecation. If it's not a subclass
2903 * (i.e. vanilla XMLParser), do nothing.
2904 */
2905 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
2906 if (parser_doctype) {
2907 if (!XMLParser_CheckExact(self_pyobj)) {
2908 if (PyErr_WarnEx(PyExc_DeprecationWarning,
2909 "This method of XMLParser is deprecated. Define"
2910 " doctype() method on the TreeBuilder target.",
2911 1) < 0) {
2912 goto clear;
2913 }
2914 res = PyObject_CallFunction(parser_doctype, "OOO",
2915 doctype_name_obj, pubid_obj, sysid_obj);
2916 Py_CLEAR(res);
2917 }
2918 }
2919
2920clear:
2921 Py_XDECREF(parser_doctype);
2922 Py_DECREF(doctype_name_obj);
2923 Py_DECREF(pubid_obj);
2924 Py_DECREF(sysid_obj);
2925}
2926
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927static void
2928expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2929 const XML_Char* data_in)
2930{
2931 PyObject* target;
2932 PyObject* data;
2933 PyObject* res;
2934
2935 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002936 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2937 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 if (target && data) {
2939 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2940 Py_XDECREF(res);
2941 Py_DECREF(data);
2942 Py_DECREF(target);
2943 } else {
2944 Py_XDECREF(data);
2945 Py_XDECREF(target);
2946 }
2947 }
2948}
2949
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950static int
2951expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2952 XML_Encoding *info)
2953{
2954 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 unsigned char s[256];
2956 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002957 void *data;
2958 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959
2960 memset(info, 0, sizeof(XML_Encoding));
2961
2962 for (i = 0; i < 256; i++)
2963 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002964
Fredrik Lundhc3389992005-12-25 11:40:19 +00002965 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 if (!u)
2967 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002968 if (PyUnicode_READY(u))
2969 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002971 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 Py_DECREF(u);
2973 return XML_STATUS_ERROR;
2974 }
2975
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002976 kind = PyUnicode_KIND(u);
2977 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002979 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2980 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2981 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002983 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002984 }
2985
2986 Py_DECREF(u);
2987
2988 return XML_STATUS_OK;
2989}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990
2991/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992
Eli Bendersky52467b12012-06-01 07:13:08 +03002993static PyObject *
2994xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995{
Eli Bendersky52467b12012-06-01 07:13:08 +03002996 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
2997 if (self) {
2998 self->parser = NULL;
2999 self->target = self->entity = self->names = NULL;
3000 self->handle_start = self->handle_data = self->handle_end = NULL;
3001 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003002 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003003 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003004 return (PyObject *)self;
3005}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006
Eli Bendersky52467b12012-06-01 07:13:08 +03003007static int
3008xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3009{
3010 XMLParserObject *self_xp = (XMLParserObject *)self;
3011 PyObject *target = NULL, *html = NULL;
3012 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003013 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014
Eli Bendersky52467b12012-06-01 07:13:08 +03003015 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3016 &html, &target, &encoding)) {
3017 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003019
Eli Bendersky52467b12012-06-01 07:13:08 +03003020 self_xp->entity = PyDict_New();
3021 if (!self_xp->entity)
3022 return -1;
3023
3024 self_xp->names = PyDict_New();
3025 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003026 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003027 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028 }
3029
Eli Bendersky52467b12012-06-01 07:13:08 +03003030 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3031 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003032 Py_CLEAR(self_xp->entity);
3033 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003035 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036 }
3037
Eli Bendersky52467b12012-06-01 07:13:08 +03003038 if (target) {
3039 Py_INCREF(target);
3040 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003041 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003043 Py_CLEAR(self_xp->entity);
3044 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003045 EXPAT(ParserFree)(self_xp->parser);
3046 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003047 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003048 }
3049 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050
Eli Bendersky52467b12012-06-01 07:13:08 +03003051 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3052 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3053 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3054 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3055 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3056 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003057 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058
3059 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003060
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003061 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003062 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003064 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065 (XML_StartElementHandler) expat_start_handler,
3066 (XML_EndElementHandler) expat_end_handler
3067 );
3068 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003069 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 (XML_DefaultHandler) expat_default_handler
3071 );
3072 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003073 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003074 (XML_CharacterDataHandler) expat_data_handler
3075 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003076 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003077 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003078 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079 (XML_CommentHandler) expat_comment_handler
3080 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003081 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003083 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084 (XML_ProcessingInstructionHandler) expat_pi_handler
3085 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003086 EXPAT(SetStartDoctypeDeclHandler)(
3087 self_xp->parser,
3088 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3089 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003091 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3093 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094
Eli Bendersky52467b12012-06-01 07:13:08 +03003095 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096}
3097
Eli Bendersky52467b12012-06-01 07:13:08 +03003098static int
3099xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3100{
3101 Py_VISIT(self->handle_close);
3102 Py_VISIT(self->handle_pi);
3103 Py_VISIT(self->handle_comment);
3104 Py_VISIT(self->handle_end);
3105 Py_VISIT(self->handle_data);
3106 Py_VISIT(self->handle_start);
3107
3108 Py_VISIT(self->target);
3109 Py_VISIT(self->entity);
3110 Py_VISIT(self->names);
3111
3112 return 0;
3113}
3114
3115static int
3116xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117{
3118 EXPAT(ParserFree)(self->parser);
3119
Antoine Pitrouc1948842012-10-01 23:40:37 +02003120 Py_CLEAR(self->handle_close);
3121 Py_CLEAR(self->handle_pi);
3122 Py_CLEAR(self->handle_comment);
3123 Py_CLEAR(self->handle_end);
3124 Py_CLEAR(self->handle_data);
3125 Py_CLEAR(self->handle_start);
3126 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127
Antoine Pitrouc1948842012-10-01 23:40:37 +02003128 Py_CLEAR(self->target);
3129 Py_CLEAR(self->entity);
3130 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003131
Eli Bendersky52467b12012-06-01 07:13:08 +03003132 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003133}
3134
Eli Bendersky52467b12012-06-01 07:13:08 +03003135static void
3136xmlparser_dealloc(XMLParserObject* self)
3137{
3138 PyObject_GC_UnTrack(self);
3139 xmlparser_gc_clear(self);
3140 Py_TYPE(self)->tp_free((PyObject *)self);
3141}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003142
3143LOCAL(PyObject*)
3144expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3145{
3146 int ok;
3147
3148 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3149
3150 if (PyErr_Occurred())
3151 return NULL;
3152
3153 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003154 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003155 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003156 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003157 EXPAT(GetErrorColumnNumber)(self->parser),
3158 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159 );
3160 return NULL;
3161 }
3162
3163 Py_RETURN_NONE;
3164}
3165
3166static PyObject*
3167xmlparser_close(XMLParserObject* self, PyObject* args)
3168{
3169 /* end feeding data to parser */
3170
3171 PyObject* res;
3172 if (!PyArg_ParseTuple(args, ":close"))
3173 return NULL;
3174
3175 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003176 if (!res)
3177 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003178
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003179 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003180 Py_DECREF(res);
3181 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003182 } if (self->handle_close) {
3183 Py_DECREF(res);
3184 return PyObject_CallFunction(self->handle_close, "");
3185 } else
3186 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003187}
3188
3189static PyObject*
3190xmlparser_feed(XMLParserObject* self, PyObject* args)
3191{
3192 /* feed data to parser */
3193
3194 char* data;
3195 int data_len;
3196 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3197 return NULL;
3198
3199 return expat_parse(self, data, data_len, 0);
3200}
3201
3202static PyObject*
3203xmlparser_parse(XMLParserObject* self, PyObject* args)
3204{
3205 /* (internal) parse until end of input stream */
3206
3207 PyObject* reader;
3208 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003209 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003210 PyObject* res;
3211
3212 PyObject* fileobj;
3213 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3214 return NULL;
3215
3216 reader = PyObject_GetAttrString(fileobj, "read");
3217 if (!reader)
3218 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220 /* read from open file object */
3221 for (;;) {
3222
3223 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3224
3225 if (!buffer) {
3226 /* read failed (e.g. due to KeyboardInterrupt) */
3227 Py_DECREF(reader);
3228 return NULL;
3229 }
3230
Eli Benderskyf996e772012-03-16 05:53:30 +02003231 if (PyUnicode_CheckExact(buffer)) {
3232 /* A unicode object is encoded into bytes using UTF-8 */
3233 if (PyUnicode_GET_SIZE(buffer) == 0) {
3234 Py_DECREF(buffer);
3235 break;
3236 }
3237 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003238 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003239 if (!temp) {
3240 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003241 Py_DECREF(reader);
3242 return NULL;
3243 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003244 buffer = temp;
3245 }
3246 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247 Py_DECREF(buffer);
3248 break;
3249 }
3250
3251 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003252 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 );
3254
3255 Py_DECREF(buffer);
3256
3257 if (!res) {
3258 Py_DECREF(reader);
3259 return NULL;
3260 }
3261 Py_DECREF(res);
3262
3263 }
3264
3265 Py_DECREF(reader);
3266
3267 res = expat_parse(self, "", 0, 1);
3268
3269 if (res && TreeBuilder_CheckExact(self->target)) {
3270 Py_DECREF(res);
3271 return treebuilder_done((TreeBuilderObject*) self->target);
3272 }
3273
3274 return res;
3275}
3276
3277static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003278xmlparser_doctype(XMLParserObject *self, PyObject *args)
3279{
3280 Py_RETURN_NONE;
3281}
3282
3283static PyObject*
3284xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285{
3286 /* activate element event reporting */
3287
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003288 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 TreeBuilderObject* target;
3290
3291 PyObject* events; /* event collector */
3292 PyObject* event_set = Py_None;
3293 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3294 &event_set))
3295 return NULL;
3296
3297 if (!TreeBuilder_CheckExact(self->target)) {
3298 PyErr_SetString(
3299 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003300 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 "targets"
3302 );
3303 return NULL;
3304 }
3305
3306 target = (TreeBuilderObject*) self->target;
3307
3308 Py_INCREF(events);
3309 Py_XDECREF(target->events);
3310 target->events = events;
3311
3312 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003313 Py_CLEAR(target->start_event_obj);
3314 Py_CLEAR(target->end_event_obj);
3315 Py_CLEAR(target->start_ns_event_obj);
3316 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317
3318 if (event_set == Py_None) {
3319 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003320 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321 Py_RETURN_NONE;
3322 }
3323
3324 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3325 goto error;
3326
3327 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3328 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3329 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003330 if (PyUnicode_Check(item)) {
3331 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003332 if (event == NULL)
3333 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003334 } else if (PyBytes_Check(item))
3335 event = PyBytes_AS_STRING(item);
3336 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003338 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339 if (strcmp(event, "start") == 0) {
3340 Py_INCREF(item);
3341 target->start_event_obj = item;
3342 } else if (strcmp(event, "end") == 0) {
3343 Py_INCREF(item);
3344 Py_XDECREF(target->end_event_obj);
3345 target->end_event_obj = item;
3346 } else if (strcmp(event, "start-ns") == 0) {
3347 Py_INCREF(item);
3348 Py_XDECREF(target->start_ns_event_obj);
3349 target->start_ns_event_obj = item;
3350 EXPAT(SetNamespaceDeclHandler)(
3351 self->parser,
3352 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3353 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3354 );
3355 } else if (strcmp(event, "end-ns") == 0) {
3356 Py_INCREF(item);
3357 Py_XDECREF(target->end_ns_event_obj);
3358 target->end_ns_event_obj = item;
3359 EXPAT(SetNamespaceDeclHandler)(
3360 self->parser,
3361 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3362 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3363 );
3364 } else {
3365 PyErr_Format(
3366 PyExc_ValueError,
3367 "unknown event '%s'", event
3368 );
3369 return NULL;
3370 }
3371 }
3372
3373 Py_RETURN_NONE;
3374
3375 error:
3376 PyErr_SetString(
3377 PyExc_TypeError,
3378 "invalid event tuple"
3379 );
3380 return NULL;
3381}
3382
3383static PyMethodDef xmlparser_methods[] = {
3384 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3385 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3386 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3387 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003388 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 {NULL, NULL}
3390};
3391
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003392static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003393xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003395 if (PyUnicode_Check(nameobj)) {
3396 PyObject* res;
3397 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3398 res = self->entity;
3399 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3400 res = self->target;
3401 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3402 return PyUnicode_FromFormat(
3403 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003405 }
3406 else
3407 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003408
Alexander Belopolskye239d232010-12-08 23:31:48 +00003409 Py_INCREF(res);
3410 return res;
3411 }
3412 generic:
3413 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003414}
3415
Neal Norwitz227b5332006-03-22 09:28:35 +00003416static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003417 PyVarObject_HEAD_INIT(NULL, 0)
3418 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003420 (destructor)xmlparser_dealloc, /* tp_dealloc */
3421 0, /* tp_print */
3422 0, /* tp_getattr */
3423 0, /* tp_setattr */
3424 0, /* tp_reserved */
3425 0, /* tp_repr */
3426 0, /* tp_as_number */
3427 0, /* tp_as_sequence */
3428 0, /* tp_as_mapping */
3429 0, /* tp_hash */
3430 0, /* tp_call */
3431 0, /* tp_str */
3432 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3433 0, /* tp_setattro */
3434 0, /* tp_as_buffer */
3435 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3436 /* tp_flags */
3437 0, /* tp_doc */
3438 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3439 (inquiry)xmlparser_gc_clear, /* tp_clear */
3440 0, /* tp_richcompare */
3441 0, /* tp_weaklistoffset */
3442 0, /* tp_iter */
3443 0, /* tp_iternext */
3444 xmlparser_methods, /* tp_methods */
3445 0, /* tp_members */
3446 0, /* tp_getset */
3447 0, /* tp_base */
3448 0, /* tp_dict */
3449 0, /* tp_descr_get */
3450 0, /* tp_descr_set */
3451 0, /* tp_dictoffset */
3452 (initproc)xmlparser_init, /* tp_init */
3453 PyType_GenericAlloc, /* tp_alloc */
3454 xmlparser_new, /* tp_new */
3455 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003456};
3457
3458#endif
3459
3460/* ==================================================================== */
3461/* python module interface */
3462
3463static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465 {NULL, NULL}
3466};
3467
Martin v. Löwis1a214512008-06-11 05:26:20 +00003468
3469static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003470 PyModuleDef_HEAD_INIT,
3471 "_elementtree",
3472 NULL,
3473 -1,
3474 _functions,
3475 NULL,
3476 NULL,
3477 NULL,
3478 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003479};
3480
Neal Norwitzf6657e62006-12-28 04:47:50 +00003481PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003482PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003483{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003484 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003485
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003486 /* Initialize object types */
3487 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003488 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003489 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003490 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003491#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003492 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003493 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494#endif
3495
Martin v. Löwis1a214512008-06-11 05:26:20 +00003496 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003497 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003498 return NULL;
3499
Eli Bendersky828efde2012-04-05 05:40:58 +03003500 if (!(temp = PyImport_ImportModule("copy")))
3501 return NULL;
3502 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3503 Py_XDECREF(temp);
3504
3505 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3506 return NULL;
3507
Eli Bendersky20d41742012-06-01 09:48:37 +03003508 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003509 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3510 if (expat_capi) {
3511 /* check that it's usable */
3512 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3513 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3514 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3515 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003516 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003517 PyErr_SetString(PyExc_ImportError,
3518 "pyexpat version is incompatible");
3519 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003520 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003521 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003522 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003523 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003524
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003525 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003526 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003527 );
3528 Py_INCREF(elementtree_parseerror_obj);
3529 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3530
Eli Bendersky092af1f2012-03-04 07:14:03 +02003531 Py_INCREF((PyObject *)&Element_Type);
3532 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3533
Eli Bendersky58d548d2012-05-29 15:45:16 +03003534 Py_INCREF((PyObject *)&TreeBuilder_Type);
3535 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3536
Eli Bendersky52467b12012-06-01 07:13:08 +03003537#if defined(USE_EXPAT)
3538 Py_INCREF((PyObject *)&XMLParser_Type);
3539 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3540#endif
3541
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003542 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003543}