blob: 9e65f423c9464b69c936158c809ea9bec1a6cc82 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
Antoine Pitrouc1948842012-10-01 23:40:37 +0200126 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000129 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000131 return result;
132}
133
134LOCAL(PyObject*)
135list_join(PyObject* list)
136{
137 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 PyObject* result;
140
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 if (!joiner)
143 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200144 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200146 if (result)
147 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 return result;
149}
150
Eli Bendersky48d358b2012-05-30 17:57:50 +0300151/* Is the given object an empty dictionary?
152*/
153static int
154is_empty_dict(PyObject *obj)
155{
156 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
157}
158
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200161/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162
163typedef struct {
164
165 /* attributes (a dictionary object), or None if no attributes */
166 PyObject* attrib;
167
168 /* child elements */
169 int length; /* actual number of items */
170 int allocated; /* allocated items */
171
172 /* this either points to _children or to a malloced buffer */
173 PyObject* *children;
174
175 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177} ElementObjectExtra;
178
179typedef struct {
180 PyObject_HEAD
181
182 /* element tag (a string). */
183 PyObject* tag;
184
185 /* text before first child. note that this is a tagged pointer;
186 use JOIN_OBJ to get the object pointer. the join flag is used
187 to distinguish lists created by the tree builder from lists
188 assigned to the attribute by application code; the former
189 should be joined before being returned to the user, the latter
190 should be left intact. */
191 PyObject* text;
192
193 /* text after this element, in parent. note that this is a tagged
194 pointer; use JOIN_OBJ to get the object pointer. */
195 PyObject* tail;
196
197 ElementObjectExtra* extra;
198
Eli Benderskyebf37a22012-04-03 22:02:37 +0300199 PyObject *weakreflist; /* For tp_weaklistoffset */
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201} ElementObject;
202
Neal Norwitz227b5332006-03-22 09:28:35 +0000203static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000204
Christian Heimes90aa7642007-12-19 02:45:37 +0000205#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206
207/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200208/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212{
213 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
214 if (!self->extra)
215 return -1;
216
217 if (!attrib)
218 attrib = Py_None;
219
220 Py_INCREF(attrib);
221 self->extra->attrib = attrib;
222
223 self->extra->length = 0;
224 self->extra->allocated = STATIC_CHILDREN;
225 self->extra->children = self->extra->_children;
226
227 return 0;
228}
229
230LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
Eli Bendersky08b85292012-04-04 15:55:07 +0300233 ElementObjectExtra *myextra;
234 int i;
235
Eli Benderskyebf37a22012-04-03 22:02:37 +0300236 if (!self->extra)
237 return;
238
239 /* Avoid DECREFs calling into this code again (cycles, etc.)
240 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 self->extra = NULL;
243
244 Py_DECREF(myextra->attrib);
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 for (i = 0; i < myextra->length; i++)
247 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 if (myextra->children != myextra->_children)
250 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253}
254
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255/* Convenience internal function to create new Element objects with the given
256 * tag and attributes.
257*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260{
261 ElementObject* self;
262
Eli Bendersky0192ba32012-03-30 16:38:33 +0300263 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 if (self == NULL)
265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 self->extra = NULL;
267
Eli Bendersky48d358b2012-05-30 17:57:50 +0300268 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 }
274
275 Py_INCREF(tag);
276 self->tag = tag;
277
278 Py_INCREF(Py_None);
279 self->text = Py_None;
280
281 Py_INCREF(Py_None);
282 self->tail = Py_None;
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 self->weakreflist = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 return (PyObject*) self;
289}
290
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291static PyObject *
292element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
293{
294 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
295 if (e != NULL) {
296 Py_INCREF(Py_None);
297 e->tag = Py_None;
298
299 Py_INCREF(Py_None);
300 e->text = Py_None;
301
302 Py_INCREF(Py_None);
303 e->tail = Py_None;
304
305 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300306 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 }
308 return (PyObject *)e;
309}
310
Eli Bendersky737b1732012-05-29 06:02:56 +0300311/* Helper function for extracting the attrib dictionary from a keywords dict.
312 * This is required by some constructors/functions in this module that can
313 * either accept attrib as a keyword argument or all attributes splashed
314 * directly into *kwds.
315 * If there is no 'attrib' keyword, return an empty dict.
316 */
317static PyObject*
318get_attrib_from_keywords(PyObject *kwds)
319{
320 PyObject *attrib_str = PyUnicode_FromString("attrib");
321 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
322
323 if (attrib) {
324 /* If attrib was found in kwds, copy its value and remove it from
325 * kwds
326 */
327 if (!PyDict_Check(attrib)) {
328 Py_DECREF(attrib_str);
329 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
330 Py_TYPE(attrib)->tp_name);
331 return NULL;
332 }
333 attrib = PyDict_Copy(attrib);
334 PyDict_DelItem(kwds, attrib_str);
335 } else {
336 attrib = PyDict_New();
337 }
338
339 Py_DECREF(attrib_str);
340
341 if (attrib)
342 PyDict_Update(attrib, kwds);
343 return attrib;
344}
345
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346static int
347element_init(PyObject *self, PyObject *args, PyObject *kwds)
348{
349 PyObject *tag;
350 PyObject *tmp;
351 PyObject *attrib = NULL;
352 ElementObject *self_elem;
353
354 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
355 return -1;
356
Eli Bendersky737b1732012-05-29 06:02:56 +0300357 if (attrib) {
358 /* attrib passed as positional arg */
359 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 if (!attrib)
361 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300362 if (kwds) {
363 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200364 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300365 return -1;
366 }
367 }
368 } else if (kwds) {
369 /* have keywords args */
370 attrib = get_attrib_from_keywords(kwds);
371 if (!attrib)
372 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 }
374
375 self_elem = (ElementObject *)self;
376
Antoine Pitrouc1948842012-10-01 23:40:37 +0200377 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380 return -1;
381 }
382 }
383
Eli Bendersky48d358b2012-05-30 17:57:50 +0300384 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200385 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386
387 /* Replace the objects already pointed to by tag, text and tail. */
388 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 Py_DECREF(tmp);
392
393 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200395 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200396 Py_DECREF(JOIN_OBJ(tmp));
397
398 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_DECREF(JOIN_OBJ(tmp));
402
403 return 0;
404}
405
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000406LOCAL(int)
407element_resize(ElementObject* self, int extra)
408{
409 int size;
410 PyObject* *children;
411
412 /* make sure self->children can hold the given number of extra
413 elements. set an exception and return -1 if allocation failed */
414
415 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417
418 size = self->extra->length + extra;
419
420 if (size > self->extra->allocated) {
421 /* use Python 2.4's list growth strategy */
422 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000423 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100424 * which needs at least 4 bytes.
425 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000426 * be safe.
427 */
428 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000430 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100431 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 * false alarm always assume at least one child to be safe.
433 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 children = PyObject_Realloc(self->extra->children,
435 size * sizeof(PyObject*));
436 if (!children)
437 goto nomemory;
438 } else {
439 children = PyObject_Malloc(size * sizeof(PyObject*));
440 if (!children)
441 goto nomemory;
442 /* copy existing children from static area to malloc buffer */
443 memcpy(children, self->extra->children,
444 self->extra->length * sizeof(PyObject*));
445 }
446 self->extra->children = children;
447 self->extra->allocated = size;
448 }
449
450 return 0;
451
452 nomemory:
453 PyErr_NoMemory();
454 return -1;
455}
456
457LOCAL(int)
458element_add_subelement(ElementObject* self, PyObject* element)
459{
460 /* add a child element to a parent */
461
462 if (element_resize(self, 1) < 0)
463 return -1;
464
465 Py_INCREF(element);
466 self->extra->children[self->extra->length] = element;
467
468 self->extra->length++;
469
470 return 0;
471}
472
473LOCAL(PyObject*)
474element_get_attrib(ElementObject* self)
475{
476 /* return borrowed reference to attrib dictionary */
477 /* note: this function assumes that the extra section exists */
478
479 PyObject* res = self->extra->attrib;
480
481 if (res == Py_None) {
482 /* create missing dictionary */
483 res = PyDict_New();
484 if (!res)
485 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200486 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000487 self->extra->attrib = res;
488 }
489
490 return res;
491}
492
493LOCAL(PyObject*)
494element_get_text(ElementObject* self)
495{
496 /* return borrowed reference to text attribute */
497
498 PyObject* res = self->text;
499
500 if (JOIN_GET(res)) {
501 res = JOIN_OBJ(res);
502 if (PyList_CheckExact(res)) {
503 res = list_join(res);
504 if (!res)
505 return NULL;
506 self->text = res;
507 }
508 }
509
510 return res;
511}
512
513LOCAL(PyObject*)
514element_get_tail(ElementObject* self)
515{
516 /* return borrowed reference to text attribute */
517
518 PyObject* res = self->tail;
519
520 if (JOIN_GET(res)) {
521 res = JOIN_OBJ(res);
522 if (PyList_CheckExact(res)) {
523 res = list_join(res);
524 if (!res)
525 return NULL;
526 self->tail = res;
527 }
528 }
529
530 return res;
531}
532
533static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300534subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535{
536 PyObject* elem;
537
538 ElementObject* parent;
539 PyObject* tag;
540 PyObject* attrib = NULL;
541 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
542 &Element_Type, &parent, &tag,
543 &PyDict_Type, &attrib))
544 return NULL;
545
Eli Bendersky737b1732012-05-29 06:02:56 +0300546 if (attrib) {
547 /* attrib passed as positional arg */
548 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549 if (!attrib)
550 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300551 if (kwds) {
552 if (PyDict_Update(attrib, kwds) < 0) {
553 return NULL;
554 }
555 }
556 } else if (kwds) {
557 /* have keyword args */
558 attrib = get_attrib_from_keywords(kwds);
559 if (!attrib)
560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 Py_INCREF(Py_None);
564 attrib = Py_None;
565 }
566
Eli Bendersky092af1f2012-03-04 07:14:03 +0200567 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568
569 Py_DECREF(attrib);
570
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000571 if (element_add_subelement(parent, elem) < 0) {
572 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575
576 return elem;
577}
578
Eli Bendersky0192ba32012-03-30 16:38:33 +0300579static int
580element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
581{
582 Py_VISIT(self->tag);
583 Py_VISIT(JOIN_OBJ(self->text));
584 Py_VISIT(JOIN_OBJ(self->tail));
585
586 if (self->extra) {
587 int i;
588 Py_VISIT(self->extra->attrib);
589
590 for (i = 0; i < self->extra->length; ++i)
591 Py_VISIT(self->extra->children[i]);
592 }
593 return 0;
594}
595
596static int
597element_gc_clear(ElementObject *self)
598{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300599 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300600
601 /* The following is like Py_CLEAR for self->text and self->tail, but
602 * written explicitily because the real pointers hide behind access
603 * macros.
604 */
605 if (self->text) {
606 PyObject *tmp = JOIN_OBJ(self->text);
607 self->text = NULL;
608 Py_DECREF(tmp);
609 }
610
611 if (self->tail) {
612 PyObject *tmp = JOIN_OBJ(self->tail);
613 self->tail = NULL;
614 Py_DECREF(tmp);
615 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616
617 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300618 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300620 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 return 0;
622}
623
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624static void
625element_dealloc(ElementObject* self)
626{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300628
629 if (self->weakreflist != NULL)
630 PyObject_ClearWeakRefs((PyObject *) self);
631
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632 /* element_gc_clear clears all references and deallocates extra
633 */
634 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000635
636 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200637 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638}
639
640/* -------------------------------------------------------------------- */
641/* methods (in alphabetical order) */
642
643static PyObject*
644element_append(ElementObject* self, PyObject* args)
645{
646 PyObject* element;
647 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
648 return NULL;
649
650 if (element_add_subelement(self, element) < 0)
651 return NULL;
652
653 Py_RETURN_NONE;
654}
655
656static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658{
659 if (!PyArg_ParseTuple(args, ":clear"))
660 return NULL;
661
Eli Benderskyebf37a22012-04-03 22:02:37 +0300662 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 Py_INCREF(Py_None);
665 Py_DECREF(JOIN_OBJ(self->text));
666 self->text = Py_None;
667
668 Py_INCREF(Py_None);
669 Py_DECREF(JOIN_OBJ(self->tail));
670 self->tail = Py_None;
671
672 Py_RETURN_NONE;
673}
674
675static PyObject*
676element_copy(ElementObject* self, PyObject* args)
677{
678 int i;
679 ElementObject* element;
680
681 if (!PyArg_ParseTuple(args, ":__copy__"))
682 return NULL;
683
Eli Bendersky092af1f2012-03-04 07:14:03 +0200684 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685 self->tag, (self->extra) ? self->extra->attrib : Py_None
686 );
687 if (!element)
688 return NULL;
689
690 Py_DECREF(JOIN_OBJ(element->text));
691 element->text = self->text;
692 Py_INCREF(JOIN_OBJ(element->text));
693
694 Py_DECREF(JOIN_OBJ(element->tail));
695 element->tail = self->tail;
696 Py_INCREF(JOIN_OBJ(element->tail));
697
698 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100699
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000700 if (element_resize(element, self->extra->length) < 0) {
701 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000703 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 for (i = 0; i < self->extra->length; i++) {
706 Py_INCREF(self->extra->children[i]);
707 element->extra->children[i] = self->extra->children[i];
708 }
709
710 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712 }
713
714 return (PyObject*) element;
715}
716
717static PyObject*
718element_deepcopy(ElementObject* self, PyObject* args)
719{
720 int i;
721 ElementObject* element;
722 PyObject* tag;
723 PyObject* attrib;
724 PyObject* text;
725 PyObject* tail;
726 PyObject* id;
727
728 PyObject* memo;
729 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
730 return NULL;
731
732 tag = deepcopy(self->tag, memo);
733 if (!tag)
734 return NULL;
735
736 if (self->extra) {
737 attrib = deepcopy(self->extra->attrib, memo);
738 if (!attrib) {
739 Py_DECREF(tag);
740 return NULL;
741 }
742 } else {
743 Py_INCREF(Py_None);
744 attrib = Py_None;
745 }
746
Eli Bendersky092af1f2012-03-04 07:14:03 +0200747 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 Py_DECREF(tag);
750 Py_DECREF(attrib);
751
752 if (!element)
753 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 text = deepcopy(JOIN_OBJ(self->text), memo);
756 if (!text)
757 goto error;
758 Py_DECREF(element->text);
759 element->text = JOIN_SET(text, JOIN_GET(self->text));
760
761 tail = deepcopy(JOIN_OBJ(self->tail), memo);
762 if (!tail)
763 goto error;
764 Py_DECREF(element->tail);
765 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
766
767 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769 if (element_resize(element, self->extra->length) < 0)
770 goto error;
771
772 for (i = 0; i < self->extra->length; i++) {
773 PyObject* child = deepcopy(self->extra->children[i], memo);
774 if (!child) {
775 element->extra->length = i;
776 goto error;
777 }
778 element->extra->children[i] = child;
779 }
780
781 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 }
784
785 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200786 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000787 if (!id)
788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 i = PyDict_SetItem(memo, id, (PyObject*) element);
791
792 Py_DECREF(id);
793
794 if (i < 0)
795 goto error;
796
797 return (PyObject*) element;
798
799 error:
800 Py_DECREF(element);
801 return NULL;
802}
803
Martin v. Löwisbce16662012-06-17 10:41:22 +0200804static PyObject*
805element_sizeof(PyObject* _self, PyObject* args)
806{
807 ElementObject *self = (ElementObject*)_self;
808 Py_ssize_t result = sizeof(ElementObject);
809 if (self->extra) {
810 result += sizeof(ElementObjectExtra);
811 if (self->extra->children != self->extra->_children)
812 result += sizeof(PyObject*) * self->extra->allocated;
813 }
814 return PyLong_FromSsize_t(result);
815}
816
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817LOCAL(int)
818checkpath(PyObject* tag)
819{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000820 Py_ssize_t i;
821 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000822
823 /* check if a tag contains an xpath character */
824
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000825#define PATHCHAR(ch) \
826 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000827
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200829 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
830 void *data = PyUnicode_DATA(tag);
831 unsigned int kind = PyUnicode_KIND(tag);
832 for (i = 0; i < len; i++) {
833 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
834 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000835 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200836 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200838 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839 return 1;
840 }
841 return 0;
842 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000843 if (PyBytes_Check(tag)) {
844 char *p = PyBytes_AS_STRING(tag);
845 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000846 if (p[i] == '{')
847 check = 0;
848 else if (p[i] == '}')
849 check = 1;
850 else if (check && PATHCHAR(p[i]))
851 return 1;
852 }
853 return 0;
854 }
855
856 return 1; /* unknown type; might be path expression */
857}
858
859static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000860element_extend(ElementObject* self, PyObject* args)
861{
862 PyObject* seq;
863 Py_ssize_t i, seqlen = 0;
864
865 PyObject* seq_in;
866 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
867 return NULL;
868
869 seq = PySequence_Fast(seq_in, "");
870 if (!seq) {
871 PyErr_Format(
872 PyExc_TypeError,
873 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
874 );
875 return NULL;
876 }
877
878 seqlen = PySequence_Size(seq);
879 for (i = 0; i < seqlen; i++) {
880 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200881 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
882 Py_DECREF(seq);
883 PyErr_Format(
884 PyExc_TypeError,
885 "expected an Element, not \"%.200s\"",
886 Py_TYPE(element)->tp_name);
887 return NULL;
888 }
889
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000890 if (element_add_subelement(self, element) < 0) {
891 Py_DECREF(seq);
892 return NULL;
893 }
894 }
895
896 Py_DECREF(seq);
897
898 Py_RETURN_NONE;
899}
900
901static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300902element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000903{
904 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000906 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300907 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200908
Eli Bendersky737b1732012-05-29 06:02:56 +0300909 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
910 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000911 return NULL;
912
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200913 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200914 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200915 return _PyObject_CallMethodId(
916 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000917 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200918 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000919
920 if (!self->extra)
921 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100922
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000923 for (i = 0; i < self->extra->length; i++) {
924 PyObject* item = self->extra->children[i];
925 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000926 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000927 Py_INCREF(item);
928 return item;
929 }
930 }
931
932 Py_RETURN_NONE;
933}
934
935static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300936element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000937{
938 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000939 PyObject* tag;
940 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000941 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200942 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300943 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200944
Eli Bendersky737b1732012-05-29 06:02:56 +0300945 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
946 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947 return NULL;
948
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000949 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200950 return _PyObject_CallMethodId(
951 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000952 );
953
954 if (!self->extra) {
955 Py_INCREF(default_value);
956 return default_value;
957 }
958
959 for (i = 0; i < self->extra->length; i++) {
960 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000961 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
962
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000963 PyObject* text = element_get_text(item);
964 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000965 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000966 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967 return text;
968 }
969 }
970
971 Py_INCREF(default_value);
972 return default_value;
973}
974
975static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300976element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000977{
978 int i;
979 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000981 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300982 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200983
Eli Bendersky737b1732012-05-29 06:02:56 +0300984 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
985 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000986 return NULL;
987
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200988 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200989 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200990 return _PyObject_CallMethodId(
991 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200993 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000994
995 out = PyList_New(0);
996 if (!out)
997 return NULL;
998
999 if (!self->extra)
1000 return out;
1001
1002 for (i = 0; i < self->extra->length; i++) {
1003 PyObject* item = self->extra->children[i];
1004 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001005 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001006 if (PyList_Append(out, item) < 0) {
1007 Py_DECREF(out);
1008 return NULL;
1009 }
1010 }
1011 }
1012
1013 return out;
1014}
1015
1016static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001017element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001018{
1019 PyObject* tag;
1020 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001021 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001022 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001023
Eli Bendersky737b1732012-05-29 06:02:56 +03001024 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1025 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001026 return NULL;
1027
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001028 return _PyObject_CallMethodId(
1029 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001030 );
1031}
1032
1033static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001034element_get(ElementObject* self, PyObject* args)
1035{
1036 PyObject* value;
1037
1038 PyObject* key;
1039 PyObject* default_value = Py_None;
1040 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1041 return NULL;
1042
1043 if (!self->extra || self->extra->attrib == Py_None)
1044 value = default_value;
1045 else {
1046 value = PyDict_GetItem(self->extra->attrib, key);
1047 if (!value)
1048 value = default_value;
1049 }
1050
1051 Py_INCREF(value);
1052 return value;
1053}
1054
1055static PyObject*
1056element_getchildren(ElementObject* self, PyObject* args)
1057{
1058 int i;
1059 PyObject* list;
1060
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001061 /* FIXME: report as deprecated? */
1062
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001063 if (!PyArg_ParseTuple(args, ":getchildren"))
1064 return NULL;
1065
1066 if (!self->extra)
1067 return PyList_New(0);
1068
1069 list = PyList_New(self->extra->length);
1070 if (!list)
1071 return NULL;
1072
1073 for (i = 0; i < self->extra->length; i++) {
1074 PyObject* item = self->extra->children[i];
1075 Py_INCREF(item);
1076 PyList_SET_ITEM(list, i, item);
1077 }
1078
1079 return list;
1080}
1081
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001082
Eli Bendersky64d11e62012-06-15 07:42:50 +03001083static PyObject *
1084create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1085
1086
1087static PyObject *
1088element_iter(ElementObject *self, PyObject *args)
1089{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001091 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 return NULL;
1093
Eli Bendersky64d11e62012-06-15 07:42:50 +03001094 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095}
1096
1097
1098static PyObject*
1099element_itertext(ElementObject* self, PyObject* args)
1100{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001101 if (!PyArg_ParseTuple(args, ":itertext"))
1102 return NULL;
1103
Eli Bendersky64d11e62012-06-15 07:42:50 +03001104 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001105}
1106
Eli Bendersky64d11e62012-06-15 07:42:50 +03001107
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001108static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001109element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001110{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001111 ElementObject* self = (ElementObject*) self_;
1112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001113 if (!self->extra || index < 0 || index >= self->extra->length) {
1114 PyErr_SetString(
1115 PyExc_IndexError,
1116 "child index out of range"
1117 );
1118 return NULL;
1119 }
1120
1121 Py_INCREF(self->extra->children[index]);
1122 return self->extra->children[index];
1123}
1124
1125static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001126element_insert(ElementObject* self, PyObject* args)
1127{
1128 int i;
1129
1130 int index;
1131 PyObject* element;
1132 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1133 &Element_Type, &element))
1134 return NULL;
1135
1136 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001137 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001139 if (index < 0) {
1140 index += self->extra->length;
1141 if (index < 0)
1142 index = 0;
1143 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001144 if (index > self->extra->length)
1145 index = self->extra->length;
1146
1147 if (element_resize(self, 1) < 0)
1148 return NULL;
1149
1150 for (i = self->extra->length; i > index; i--)
1151 self->extra->children[i] = self->extra->children[i-1];
1152
1153 Py_INCREF(element);
1154 self->extra->children[index] = element;
1155
1156 self->extra->length++;
1157
1158 Py_RETURN_NONE;
1159}
1160
1161static PyObject*
1162element_items(ElementObject* self, PyObject* args)
1163{
1164 if (!PyArg_ParseTuple(args, ":items"))
1165 return NULL;
1166
1167 if (!self->extra || self->extra->attrib == Py_None)
1168 return PyList_New(0);
1169
1170 return PyDict_Items(self->extra->attrib);
1171}
1172
1173static PyObject*
1174element_keys(ElementObject* self, PyObject* args)
1175{
1176 if (!PyArg_ParseTuple(args, ":keys"))
1177 return NULL;
1178
1179 if (!self->extra || self->extra->attrib == Py_None)
1180 return PyList_New(0);
1181
1182 return PyDict_Keys(self->extra->attrib);
1183}
1184
Martin v. Löwis18e16552006-02-15 17:27:45 +00001185static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186element_length(ElementObject* self)
1187{
1188 if (!self->extra)
1189 return 0;
1190
1191 return self->extra->length;
1192}
1193
1194static PyObject*
1195element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1196{
1197 PyObject* elem;
1198
1199 PyObject* tag;
1200 PyObject* attrib;
1201 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1202 return NULL;
1203
1204 attrib = PyDict_Copy(attrib);
1205 if (!attrib)
1206 return NULL;
1207
Eli Bendersky092af1f2012-03-04 07:14:03 +02001208 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001209
1210 Py_DECREF(attrib);
1211
1212 return elem;
1213}
1214
1215static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216element_remove(ElementObject* self, PyObject* args)
1217{
1218 int i;
1219
1220 PyObject* element;
1221 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1222 return NULL;
1223
1224 if (!self->extra) {
1225 /* element has no children, so raise exception */
1226 PyErr_SetString(
1227 PyExc_ValueError,
1228 "list.remove(x): x not in list"
1229 );
1230 return NULL;
1231 }
1232
1233 for (i = 0; i < self->extra->length; i++) {
1234 if (self->extra->children[i] == element)
1235 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001236 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237 break;
1238 }
1239
1240 if (i == self->extra->length) {
1241 /* element is not in children, so raise exception */
1242 PyErr_SetString(
1243 PyExc_ValueError,
1244 "list.remove(x): x not in list"
1245 );
1246 return NULL;
1247 }
1248
1249 Py_DECREF(self->extra->children[i]);
1250
1251 self->extra->length--;
1252
1253 for (; i < self->extra->length; i++)
1254 self->extra->children[i] = self->extra->children[i+1];
1255
1256 Py_RETURN_NONE;
1257}
1258
1259static PyObject*
1260element_repr(ElementObject* self)
1261{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001262 if (self->tag)
1263 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1264 else
1265 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001266}
1267
1268static PyObject*
1269element_set(ElementObject* self, PyObject* args)
1270{
1271 PyObject* attrib;
1272
1273 PyObject* key;
1274 PyObject* value;
1275 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1276 return NULL;
1277
1278 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001279 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280
1281 attrib = element_get_attrib(self);
1282 if (!attrib)
1283 return NULL;
1284
1285 if (PyDict_SetItem(attrib, key, value) < 0)
1286 return NULL;
1287
1288 Py_RETURN_NONE;
1289}
1290
1291static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001292element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001294 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295 int i;
1296 PyObject* old;
1297
1298 if (!self->extra || index < 0 || index >= self->extra->length) {
1299 PyErr_SetString(
1300 PyExc_IndexError,
1301 "child assignment index out of range");
1302 return -1;
1303 }
1304
1305 old = self->extra->children[index];
1306
1307 if (item) {
1308 Py_INCREF(item);
1309 self->extra->children[index] = item;
1310 } else {
1311 self->extra->length--;
1312 for (i = index; i < self->extra->length; i++)
1313 self->extra->children[i] = self->extra->children[i+1];
1314 }
1315
1316 Py_DECREF(old);
1317
1318 return 0;
1319}
1320
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001321static PyObject*
1322element_subscr(PyObject* self_, PyObject* item)
1323{
1324 ElementObject* self = (ElementObject*) self_;
1325
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326 if (PyIndex_Check(item)) {
1327 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001328
1329 if (i == -1 && PyErr_Occurred()) {
1330 return NULL;
1331 }
1332 if (i < 0 && self->extra)
1333 i += self->extra->length;
1334 return element_getitem(self_, i);
1335 }
1336 else if (PySlice_Check(item)) {
1337 Py_ssize_t start, stop, step, slicelen, cur, i;
1338 PyObject* list;
1339
1340 if (!self->extra)
1341 return PyList_New(0);
1342
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001343 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001344 self->extra->length,
1345 &start, &stop, &step, &slicelen) < 0) {
1346 return NULL;
1347 }
1348
1349 if (slicelen <= 0)
1350 return PyList_New(0);
1351 else {
1352 list = PyList_New(slicelen);
1353 if (!list)
1354 return NULL;
1355
1356 for (cur = start, i = 0; i < slicelen;
1357 cur += step, i++) {
1358 PyObject* item = self->extra->children[cur];
1359 Py_INCREF(item);
1360 PyList_SET_ITEM(list, i, item);
1361 }
1362
1363 return list;
1364 }
1365 }
1366 else {
1367 PyErr_SetString(PyExc_TypeError,
1368 "element indices must be integers");
1369 return NULL;
1370 }
1371}
1372
1373static int
1374element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1375{
1376 ElementObject* self = (ElementObject*) self_;
1377
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001378 if (PyIndex_Check(item)) {
1379 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001380
1381 if (i == -1 && PyErr_Occurred()) {
1382 return -1;
1383 }
1384 if (i < 0 && self->extra)
1385 i += self->extra->length;
1386 return element_setitem(self_, i, value);
1387 }
1388 else if (PySlice_Check(item)) {
1389 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1390
1391 PyObject* recycle = NULL;
1392 PyObject* seq = NULL;
1393
1394 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001395 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001396
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001397 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001398 self->extra->length,
1399 &start, &stop, &step, &slicelen) < 0) {
1400 return -1;
1401 }
1402
Eli Bendersky865756a2012-03-09 13:38:15 +02001403 if (value == NULL) {
1404 /* Delete slice */
1405 size_t cur;
1406 Py_ssize_t i;
1407
1408 if (slicelen <= 0)
1409 return 0;
1410
1411 /* Since we're deleting, the direction of the range doesn't matter,
1412 * so for simplicity make it always ascending.
1413 */
1414 if (step < 0) {
1415 stop = start + 1;
1416 start = stop + step * (slicelen - 1) - 1;
1417 step = -step;
1418 }
1419
1420 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1421
1422 /* recycle is a list that will contain all the children
1423 * scheduled for removal.
1424 */
1425 if (!(recycle = PyList_New(slicelen))) {
1426 PyErr_NoMemory();
1427 return -1;
1428 }
1429
1430 /* This loop walks over all the children that have to be deleted,
1431 * with cur pointing at them. num_moved is the amount of children
1432 * until the next deleted child that have to be "shifted down" to
1433 * occupy the deleted's places.
1434 * Note that in the ith iteration, shifting is done i+i places down
1435 * because i children were already removed.
1436 */
1437 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1438 /* Compute how many children have to be moved, clipping at the
1439 * list end.
1440 */
1441 Py_ssize_t num_moved = step - 1;
1442 if (cur + step >= (size_t)self->extra->length) {
1443 num_moved = self->extra->length - cur - 1;
1444 }
1445
1446 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1447
1448 memmove(
1449 self->extra->children + cur - i,
1450 self->extra->children + cur + 1,
1451 num_moved * sizeof(PyObject *));
1452 }
1453
1454 /* Leftover "tail" after the last removed child */
1455 cur = start + (size_t)slicelen * step;
1456 if (cur < (size_t)self->extra->length) {
1457 memmove(
1458 self->extra->children + cur - slicelen,
1459 self->extra->children + cur,
1460 (self->extra->length - cur) * sizeof(PyObject *));
1461 }
1462
1463 self->extra->length -= slicelen;
1464
1465 /* Discard the recycle list with all the deleted sub-elements */
1466 Py_XDECREF(recycle);
1467 return 0;
1468 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001469 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001470 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001471 seq = PySequence_Fast(value, "");
1472 if (!seq) {
1473 PyErr_Format(
1474 PyExc_TypeError,
1475 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1476 );
1477 return -1;
1478 }
1479 newlen = PySequence_Size(seq);
1480 }
1481
1482 if (step != 1 && newlen != slicelen)
1483 {
1484 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001485 "attempt to assign sequence of size %zd "
1486 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001487 newlen, slicelen
1488 );
1489 return -1;
1490 }
1491
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492 /* Resize before creating the recycle bin, to prevent refleaks. */
1493 if (newlen > slicelen) {
1494 if (element_resize(self, newlen - slicelen) < 0) {
1495 if (seq) {
1496 Py_DECREF(seq);
1497 }
1498 return -1;
1499 }
1500 }
1501
1502 if (slicelen > 0) {
1503 /* to avoid recursive calls to this method (via decref), move
1504 old items to the recycle bin here, and get rid of them when
1505 we're done modifying the element */
1506 recycle = PyList_New(slicelen);
1507 if (!recycle) {
1508 if (seq) {
1509 Py_DECREF(seq);
1510 }
1511 return -1;
1512 }
1513 for (cur = start, i = 0; i < slicelen;
1514 cur += step, i++)
1515 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1516 }
1517
1518 if (newlen < slicelen) {
1519 /* delete slice */
1520 for (i = stop; i < self->extra->length; i++)
1521 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1522 } else if (newlen > slicelen) {
1523 /* insert slice */
1524 for (i = self->extra->length-1; i >= stop; i--)
1525 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1526 }
1527
1528 /* replace the slice */
1529 for (cur = start, i = 0; i < newlen;
1530 cur += step, i++) {
1531 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1532 Py_INCREF(element);
1533 self->extra->children[cur] = element;
1534 }
1535
1536 self->extra->length += newlen - slicelen;
1537
1538 if (seq) {
1539 Py_DECREF(seq);
1540 }
1541
1542 /* discard the recycle bin, and everything in it */
1543 Py_XDECREF(recycle);
1544
1545 return 0;
1546 }
1547 else {
1548 PyErr_SetString(PyExc_TypeError,
1549 "element indices must be integers");
1550 return -1;
1551 }
1552}
1553
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001554static PyMethodDef element_methods[] = {
1555
Eli Bendersky0192ba32012-03-30 16:38:33 +03001556 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557
1558 {"get", (PyCFunction) element_get, METH_VARARGS},
1559 {"set", (PyCFunction) element_set, METH_VARARGS},
1560
Eli Bendersky737b1732012-05-29 06:02:56 +03001561 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1562 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1563 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564
1565 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001566 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1568 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1569
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001570 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1571 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001572 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001573
1574 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1576
1577 {"items", (PyCFunction) element_items, METH_VARARGS},
1578 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1579
1580 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1581
1582 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1583 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001584 {"__sizeof__", element_sizeof, METH_NOARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 {NULL, NULL}
1587};
1588
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001589static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001590element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591{
1592 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001593 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001595 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001596 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001597
Alexander Belopolskye239d232010-12-08 23:31:48 +00001598 if (name == NULL)
1599 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001601 /* handle common attributes first */
1602 if (strcmp(name, "tag") == 0) {
1603 res = self->tag;
1604 Py_INCREF(res);
1605 return res;
1606 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001608 Py_INCREF(res);
1609 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610 }
1611
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001612 /* methods */
1613 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1614 if (res)
1615 return res;
1616
1617 /* less common attributes */
1618 if (strcmp(name, "tail") == 0) {
1619 PyErr_Clear();
1620 res = element_get_tail(self);
1621 } else if (strcmp(name, "attrib") == 0) {
1622 PyErr_Clear();
1623 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001624 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001625 res = element_get_attrib(self);
1626 }
1627
1628 if (!res)
1629 return NULL;
1630
1631 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 return res;
1633}
1634
Eli Benderskyb20df952012-05-20 06:33:29 +03001635static PyObject*
1636element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637{
Eli Benderskyb20df952012-05-20 06:33:29 +03001638 char *name = "";
1639 if (PyUnicode_Check(nameobj))
1640 name = _PyUnicode_AsString(nameobj);
1641
1642 if (name == NULL)
1643 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644
1645 if (strcmp(name, "tag") == 0) {
1646 Py_DECREF(self->tag);
1647 self->tag = value;
1648 Py_INCREF(self->tag);
1649 } else if (strcmp(name, "text") == 0) {
1650 Py_DECREF(JOIN_OBJ(self->text));
1651 self->text = value;
1652 Py_INCREF(self->text);
1653 } else if (strcmp(name, "tail") == 0) {
1654 Py_DECREF(JOIN_OBJ(self->tail));
1655 self->tail = value;
1656 Py_INCREF(self->tail);
1657 } else if (strcmp(name, "attrib") == 0) {
1658 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001659 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001660 Py_DECREF(self->extra->attrib);
1661 self->extra->attrib = value;
1662 Py_INCREF(self->extra->attrib);
1663 } else {
1664 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001665 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666 }
1667
Eli Benderskyb20df952012-05-20 06:33:29 +03001668 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669}
1670
1671static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673 0, /* sq_concat */
1674 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001675 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001676 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001677 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001678 0,
1679};
1680
1681static PyMappingMethods element_as_mapping = {
1682 (lenfunc) element_length,
1683 (binaryfunc) element_subscr,
1684 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001685};
1686
Neal Norwitz227b5332006-03-22 09:28:35 +00001687static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001688 PyVarObject_HEAD_INIT(NULL, 0)
1689 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001690 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001691 (destructor)element_dealloc, /* tp_dealloc */
1692 0, /* tp_print */
1693 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001694 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001695 0, /* tp_reserved */
1696 (reprfunc)element_repr, /* tp_repr */
1697 0, /* tp_as_number */
1698 &element_as_sequence, /* tp_as_sequence */
1699 &element_as_mapping, /* tp_as_mapping */
1700 0, /* tp_hash */
1701 0, /* tp_call */
1702 0, /* tp_str */
1703 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001704 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001705 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001706 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1707 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001708 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001709 (traverseproc)element_gc_traverse, /* tp_traverse */
1710 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001711 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001712 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001713 0, /* tp_iter */
1714 0, /* tp_iternext */
1715 element_methods, /* tp_methods */
1716 0, /* tp_members */
1717 0, /* tp_getset */
1718 0, /* tp_base */
1719 0, /* tp_dict */
1720 0, /* tp_descr_get */
1721 0, /* tp_descr_set */
1722 0, /* tp_dictoffset */
1723 (initproc)element_init, /* tp_init */
1724 PyType_GenericAlloc, /* tp_alloc */
1725 element_new, /* tp_new */
1726 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727};
1728
Eli Bendersky64d11e62012-06-15 07:42:50 +03001729/******************************* Element iterator ****************************/
1730
1731/* ElementIterObject represents the iteration state over an XML element in
1732 * pre-order traversal. To keep track of which sub-element should be returned
1733 * next, a stack of parents is maintained. This is a standard stack-based
1734 * iterative pre-order traversal of a tree.
1735 * The stack is managed using a single-linked list starting at parent_stack.
1736 * Each stack node contains the saved parent to which we should return after
1737 * the current one is exhausted, and the next child to examine in that parent.
1738 */
1739typedef struct ParentLocator_t {
1740 ElementObject *parent;
1741 Py_ssize_t child_index;
1742 struct ParentLocator_t *next;
1743} ParentLocator;
1744
1745typedef struct {
1746 PyObject_HEAD
1747 ParentLocator *parent_stack;
1748 ElementObject *root_element;
1749 PyObject *sought_tag;
1750 int root_done;
1751 int gettext;
1752} ElementIterObject;
1753
1754
1755static void
1756elementiter_dealloc(ElementIterObject *it)
1757{
1758 ParentLocator *p = it->parent_stack;
1759 while (p) {
1760 ParentLocator *temp = p;
1761 Py_XDECREF(p->parent);
1762 p = p->next;
1763 PyObject_Free(temp);
1764 }
1765
1766 Py_XDECREF(it->sought_tag);
1767 Py_XDECREF(it->root_element);
1768
1769 PyObject_GC_UnTrack(it);
1770 PyObject_GC_Del(it);
1771}
1772
1773static int
1774elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1775{
1776 ParentLocator *p = it->parent_stack;
1777 while (p) {
1778 Py_VISIT(p->parent);
1779 p = p->next;
1780 }
1781
1782 Py_VISIT(it->root_element);
1783 Py_VISIT(it->sought_tag);
1784 return 0;
1785}
1786
1787/* Helper function for elementiter_next. Add a new parent to the parent stack.
1788 */
1789static ParentLocator *
1790parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1791{
1792 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1793 if (new_node) {
1794 new_node->parent = parent;
1795 Py_INCREF(parent);
1796 new_node->child_index = 0;
1797 new_node->next = stack;
1798 }
1799 return new_node;
1800}
1801
1802static PyObject *
1803elementiter_next(ElementIterObject *it)
1804{
1805 /* Sub-element iterator.
1806 *
1807 * A short note on gettext: this function serves both the iter() and
1808 * itertext() methods to avoid code duplication. However, there are a few
1809 * small differences in the way these iterations work. Namely:
1810 * - itertext() only yields text from nodes that have it, and continues
1811 * iterating when a node doesn't have text (so it doesn't return any
1812 * node like iter())
1813 * - itertext() also has to handle tail, after finishing with all the
1814 * children of a node.
1815 */
Eli Bendersky113da642012-06-15 07:52:49 +03001816 ElementObject *cur_parent;
1817 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001818
1819 while (1) {
1820 /* Handle the case reached in the beginning and end of iteration, where
1821 * the parent stack is empty. The root_done flag gives us indication
1822 * whether we've just started iterating (so root_done is 0), in which
1823 * case the root is returned. If root_done is 1 and we're here, the
1824 * iterator is exhausted.
1825 */
1826 if (!it->parent_stack->parent) {
1827 if (it->root_done) {
1828 PyErr_SetNone(PyExc_StopIteration);
1829 return NULL;
1830 } else {
1831 it->parent_stack = parent_stack_push_new(it->parent_stack,
1832 it->root_element);
1833 if (!it->parent_stack) {
1834 PyErr_NoMemory();
1835 return NULL;
1836 }
1837
1838 it->root_done = 1;
1839 if (it->sought_tag == Py_None ||
1840 PyObject_RichCompareBool(it->root_element->tag,
1841 it->sought_tag, Py_EQ) == 1) {
1842 if (it->gettext) {
1843 PyObject *text = JOIN_OBJ(it->root_element->text);
1844 if (PyObject_IsTrue(text)) {
1845 Py_INCREF(text);
1846 return text;
1847 }
1848 } else {
1849 Py_INCREF(it->root_element);
1850 return (PyObject *)it->root_element;
1851 }
1852 }
1853 }
1854 }
1855
1856 /* See if there are children left to traverse in the current parent. If
1857 * yes, visit the next child. If not, pop the stack and try again.
1858 */
Eli Bendersky113da642012-06-15 07:52:49 +03001859 cur_parent = it->parent_stack->parent;
1860 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001861 if (cur_parent->extra && child_index < cur_parent->extra->length) {
1862 ElementObject *child = (ElementObject *)
1863 cur_parent->extra->children[child_index];
1864 it->parent_stack->child_index++;
1865 it->parent_stack = parent_stack_push_new(it->parent_stack,
1866 child);
1867 if (!it->parent_stack) {
1868 PyErr_NoMemory();
1869 return NULL;
1870 }
1871
1872 if (it->gettext) {
1873 PyObject *text = JOIN_OBJ(child->text);
1874 if (PyObject_IsTrue(text)) {
1875 Py_INCREF(text);
1876 return text;
1877 }
1878 } else if (it->sought_tag == Py_None ||
1879 PyObject_RichCompareBool(child->tag,
1880 it->sought_tag, Py_EQ) == 1) {
1881 Py_INCREF(child);
1882 return (PyObject *)child;
1883 }
1884 else
1885 continue;
1886 }
1887 else {
1888 PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
1889 ParentLocator *next = it->parent_stack->next;
1890 Py_XDECREF(it->parent_stack->parent);
1891 PyObject_Free(it->parent_stack);
1892 it->parent_stack = next;
1893
1894 /* Note that extra condition on it->parent_stack->parent here;
1895 * this is because itertext() is supposed to only return *inner*
1896 * text, not text following the element it began iteration with.
1897 */
1898 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
1899 Py_INCREF(tail);
1900 return tail;
1901 }
1902 }
1903 }
1904
1905 return NULL;
1906}
1907
1908
1909static PyTypeObject ElementIter_Type = {
1910 PyVarObject_HEAD_INIT(NULL, 0)
1911 "_elementtree._element_iterator", /* tp_name */
1912 sizeof(ElementIterObject), /* tp_basicsize */
1913 0, /* tp_itemsize */
1914 /* methods */
1915 (destructor)elementiter_dealloc, /* tp_dealloc */
1916 0, /* tp_print */
1917 0, /* tp_getattr */
1918 0, /* tp_setattr */
1919 0, /* tp_reserved */
1920 0, /* tp_repr */
1921 0, /* tp_as_number */
1922 0, /* tp_as_sequence */
1923 0, /* tp_as_mapping */
1924 0, /* tp_hash */
1925 0, /* tp_call */
1926 0, /* tp_str */
1927 0, /* tp_getattro */
1928 0, /* tp_setattro */
1929 0, /* tp_as_buffer */
1930 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1931 0, /* tp_doc */
1932 (traverseproc)elementiter_traverse, /* tp_traverse */
1933 0, /* tp_clear */
1934 0, /* tp_richcompare */
1935 0, /* tp_weaklistoffset */
1936 PyObject_SelfIter, /* tp_iter */
1937 (iternextfunc)elementiter_next, /* tp_iternext */
1938 0, /* tp_methods */
1939 0, /* tp_members */
1940 0, /* tp_getset */
1941 0, /* tp_base */
1942 0, /* tp_dict */
1943 0, /* tp_descr_get */
1944 0, /* tp_descr_set */
1945 0, /* tp_dictoffset */
1946 0, /* tp_init */
1947 0, /* tp_alloc */
1948 0, /* tp_new */
1949};
1950
1951
1952static PyObject *
1953create_elementiter(ElementObject *self, PyObject *tag, int gettext)
1954{
1955 ElementIterObject *it;
1956 PyObject *star = NULL;
1957
1958 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
1959 if (!it)
1960 return NULL;
1961 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
1962 PyObject_GC_Del(it);
1963 return NULL;
1964 }
1965
1966 it->parent_stack->parent = NULL;
1967 it->parent_stack->child_index = 0;
1968 it->parent_stack->next = NULL;
1969
1970 if (PyUnicode_Check(tag))
1971 star = PyUnicode_FromString("*");
1972 else if (PyBytes_Check(tag))
1973 star = PyBytes_FromString("*");
1974
1975 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
1976 tag = Py_None;
1977
1978 Py_XDECREF(star);
1979 it->sought_tag = tag;
1980 it->root_done = 0;
1981 it->gettext = gettext;
1982 it->root_element = self;
1983
1984 Py_INCREF(self);
1985 Py_INCREF(tag);
1986
1987 PyObject_GC_Track(it);
1988 return (PyObject *)it;
1989}
1990
1991
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001992/* ==================================================================== */
1993/* the tree builder type */
1994
1995typedef struct {
1996 PyObject_HEAD
1997
Eli Bendersky58d548d2012-05-29 15:45:16 +03001998 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001999
Eli Bendersky58d548d2012-05-29 15:45:16 +03002000 ElementObject *this; /* current node */
2001 ElementObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002002
Eli Bendersky58d548d2012-05-29 15:45:16 +03002003 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002004
Eli Bendersky58d548d2012-05-29 15:45:16 +03002005 PyObject *stack; /* element stack */
2006 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002007
Eli Bendersky48d358b2012-05-30 17:57:50 +03002008 PyObject *element_factory;
2009
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002010 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002011 PyObject *events; /* list of events, or NULL if not collecting */
2012 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2013 PyObject *end_event_obj;
2014 PyObject *start_ns_event_obj;
2015 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002016} TreeBuilderObject;
2017
Neal Norwitz227b5332006-03-22 09:28:35 +00002018static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019
Christian Heimes90aa7642007-12-19 02:45:37 +00002020#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002021
2022/* -------------------------------------------------------------------- */
2023/* constructor and destructor */
2024
Eli Bendersky58d548d2012-05-29 15:45:16 +03002025static PyObject *
2026treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002027{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002028 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2029 if (t != NULL) {
2030 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002031
Eli Bendersky58d548d2012-05-29 15:45:16 +03002032 Py_INCREF(Py_None);
2033 t->this = (ElementObject *)Py_None;
2034 Py_INCREF(Py_None);
2035 t->last = (ElementObject *)Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036
Eli Bendersky58d548d2012-05-29 15:45:16 +03002037 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002038 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002039 t->stack = PyList_New(20);
2040 if (!t->stack) {
2041 Py_DECREF(t->this);
2042 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002043 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002044 return NULL;
2045 }
2046 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002047
Eli Bendersky58d548d2012-05-29 15:45:16 +03002048 t->events = NULL;
2049 t->start_event_obj = t->end_event_obj = NULL;
2050 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2051 }
2052 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002053}
2054
Eli Bendersky58d548d2012-05-29 15:45:16 +03002055static int
2056treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002057{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002058 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002059 PyObject *element_factory = NULL;
2060 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002061 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002062
2063 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2064 &element_factory)) {
2065 return -1;
2066 }
2067
2068 if (element_factory) {
2069 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002070 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002071 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002072 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002073 }
2074
Eli Bendersky58d548d2012-05-29 15:45:16 +03002075 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002076}
2077
Eli Bendersky48d358b2012-05-30 17:57:50 +03002078static int
2079treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2080{
2081 Py_VISIT(self->root);
2082 Py_VISIT(self->this);
2083 Py_VISIT(self->last);
2084 Py_VISIT(self->data);
2085 Py_VISIT(self->stack);
2086 Py_VISIT(self->element_factory);
2087 return 0;
2088}
2089
2090static int
2091treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002092{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002093 Py_CLEAR(self->end_ns_event_obj);
2094 Py_CLEAR(self->start_ns_event_obj);
2095 Py_CLEAR(self->end_event_obj);
2096 Py_CLEAR(self->start_event_obj);
2097 Py_CLEAR(self->events);
2098 Py_CLEAR(self->stack);
2099 Py_CLEAR(self->data);
2100 Py_CLEAR(self->last);
2101 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002102 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002103 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002104 return 0;
2105}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002106
Eli Bendersky48d358b2012-05-30 17:57:50 +03002107static void
2108treebuilder_dealloc(TreeBuilderObject *self)
2109{
2110 PyObject_GC_UnTrack(self);
2111 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002112 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002113}
2114
2115/* -------------------------------------------------------------------- */
2116/* handlers */
2117
2118LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002119treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2120 PyObject* attrib)
2121{
2122 PyObject* node;
2123 PyObject* this;
2124
2125 if (self->data) {
2126 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002127 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002128 self->last->text = JOIN_SET(
2129 self->data, PyList_CheckExact(self->data)
2130 );
2131 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002132 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002133 self->last->tail = JOIN_SET(
2134 self->data, PyList_CheckExact(self->data)
2135 );
2136 }
2137 self->data = NULL;
2138 }
2139
Eli Bendersky48d358b2012-05-30 17:57:50 +03002140 if (self->element_factory) {
2141 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2142 } else {
2143 node = create_new_element(tag, attrib);
2144 }
2145 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002146 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002147 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002148
2149 this = (PyObject*) self->this;
2150
2151 if (this != Py_None) {
2152 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002153 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002154 } else {
2155 if (self->root) {
2156 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002157 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002158 "multiple elements on top level"
2159 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002160 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002161 }
2162 Py_INCREF(node);
2163 self->root = node;
2164 }
2165
2166 if (self->index < PyList_GET_SIZE(self->stack)) {
2167 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002168 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002169 Py_INCREF(this);
2170 } else {
2171 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002172 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002173 }
2174 self->index++;
2175
2176 Py_DECREF(this);
2177 Py_INCREF(node);
2178 self->this = (ElementObject*) node;
2179
2180 Py_DECREF(self->last);
2181 Py_INCREF(node);
2182 self->last = (ElementObject*) node;
2183
2184 if (self->start_event_obj) {
2185 PyObject* res;
2186 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002187 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002188 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002189 PyList_Append(self->events, res);
2190 Py_DECREF(res);
2191 } else
2192 PyErr_Clear(); /* FIXME: propagate error */
2193 }
2194
2195 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002196
2197 error:
2198 Py_DECREF(node);
2199 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002200}
2201
2202LOCAL(PyObject*)
2203treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2204{
2205 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002206 if (self->last == (ElementObject*) Py_None) {
2207 /* ignore calls to data before the first call to start */
2208 Py_RETURN_NONE;
2209 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002210 /* store the first item as is */
2211 Py_INCREF(data); self->data = data;
2212 } else {
2213 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002214 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2215 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002216 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002217 /* expat often generates single character data sections; handle
2218 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002219 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2220 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002221 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002222 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002223 } else if (PyList_CheckExact(self->data)) {
2224 if (PyList_Append(self->data, data) < 0)
2225 return NULL;
2226 } else {
2227 PyObject* list = PyList_New(2);
2228 if (!list)
2229 return NULL;
2230 PyList_SET_ITEM(list, 0, self->data);
2231 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2232 self->data = list;
2233 }
2234 }
2235
2236 Py_RETURN_NONE;
2237}
2238
2239LOCAL(PyObject*)
2240treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2241{
2242 PyObject* item;
2243
2244 if (self->data) {
2245 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002246 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002247 self->last->text = JOIN_SET(
2248 self->data, PyList_CheckExact(self->data)
2249 );
2250 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002251 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002252 self->last->tail = JOIN_SET(
2253 self->data, PyList_CheckExact(self->data)
2254 );
2255 }
2256 self->data = NULL;
2257 }
2258
2259 if (self->index == 0) {
2260 PyErr_SetString(
2261 PyExc_IndexError,
2262 "pop from empty stack"
2263 );
2264 return NULL;
2265 }
2266
2267 self->index--;
2268
2269 item = PyList_GET_ITEM(self->stack, self->index);
2270 Py_INCREF(item);
2271
2272 Py_DECREF(self->last);
2273
2274 self->last = (ElementObject*) self->this;
2275 self->this = (ElementObject*) item;
2276
2277 if (self->end_event_obj) {
2278 PyObject* res;
2279 PyObject* action = self->end_event_obj;
2280 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002281 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283 PyList_Append(self->events, res);
2284 Py_DECREF(res);
2285 } else
2286 PyErr_Clear(); /* FIXME: propagate error */
2287 }
2288
2289 Py_INCREF(self->last);
2290 return (PyObject*) self->last;
2291}
2292
2293LOCAL(void)
2294treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002295 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296{
2297 PyObject* res;
2298 PyObject* action;
2299 PyObject* parcel;
2300
2301 if (!self->events)
2302 return;
2303
2304 if (start) {
2305 if (!self->start_ns_event_obj)
2306 return;
2307 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002308 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309 if (!parcel)
2310 return;
2311 Py_INCREF(action);
2312 } else {
2313 if (!self->end_ns_event_obj)
2314 return;
2315 action = self->end_ns_event_obj;
2316 Py_INCREF(action);
2317 parcel = Py_None;
2318 Py_INCREF(parcel);
2319 }
2320
2321 res = PyTuple_New(2);
2322
2323 if (res) {
2324 PyTuple_SET_ITEM(res, 0, action);
2325 PyTuple_SET_ITEM(res, 1, parcel);
2326 PyList_Append(self->events, res);
2327 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002328 }
2329 else {
2330 Py_DECREF(action);
2331 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002332 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002333 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334}
2335
2336/* -------------------------------------------------------------------- */
2337/* methods (in alphabetical order) */
2338
2339static PyObject*
2340treebuilder_data(TreeBuilderObject* self, PyObject* args)
2341{
2342 PyObject* data;
2343 if (!PyArg_ParseTuple(args, "O:data", &data))
2344 return NULL;
2345
2346 return treebuilder_handle_data(self, data);
2347}
2348
2349static PyObject*
2350treebuilder_end(TreeBuilderObject* self, PyObject* args)
2351{
2352 PyObject* tag;
2353 if (!PyArg_ParseTuple(args, "O:end", &tag))
2354 return NULL;
2355
2356 return treebuilder_handle_end(self, tag);
2357}
2358
2359LOCAL(PyObject*)
2360treebuilder_done(TreeBuilderObject* self)
2361{
2362 PyObject* res;
2363
2364 /* FIXME: check stack size? */
2365
2366 if (self->root)
2367 res = self->root;
2368 else
2369 res = Py_None;
2370
2371 Py_INCREF(res);
2372 return res;
2373}
2374
2375static PyObject*
2376treebuilder_close(TreeBuilderObject* self, PyObject* args)
2377{
2378 if (!PyArg_ParseTuple(args, ":close"))
2379 return NULL;
2380
2381 return treebuilder_done(self);
2382}
2383
2384static PyObject*
2385treebuilder_start(TreeBuilderObject* self, PyObject* args)
2386{
2387 PyObject* tag;
2388 PyObject* attrib = Py_None;
2389 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2390 return NULL;
2391
2392 return treebuilder_handle_start(self, tag, attrib);
2393}
2394
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395static PyMethodDef treebuilder_methods[] = {
2396 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2397 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2398 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2400 {NULL, NULL}
2401};
2402
Neal Norwitz227b5332006-03-22 09:28:35 +00002403static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002404 PyVarObject_HEAD_INIT(NULL, 0)
2405 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002406 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002407 (destructor)treebuilder_dealloc, /* tp_dealloc */
2408 0, /* tp_print */
2409 0, /* tp_getattr */
2410 0, /* tp_setattr */
2411 0, /* tp_reserved */
2412 0, /* tp_repr */
2413 0, /* tp_as_number */
2414 0, /* tp_as_sequence */
2415 0, /* tp_as_mapping */
2416 0, /* tp_hash */
2417 0, /* tp_call */
2418 0, /* tp_str */
2419 0, /* tp_getattro */
2420 0, /* tp_setattro */
2421 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002422 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2423 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002424 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002425 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2426 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002427 0, /* tp_richcompare */
2428 0, /* tp_weaklistoffset */
2429 0, /* tp_iter */
2430 0, /* tp_iternext */
2431 treebuilder_methods, /* tp_methods */
2432 0, /* tp_members */
2433 0, /* tp_getset */
2434 0, /* tp_base */
2435 0, /* tp_dict */
2436 0, /* tp_descr_get */
2437 0, /* tp_descr_set */
2438 0, /* tp_dictoffset */
2439 (initproc)treebuilder_init, /* tp_init */
2440 PyType_GenericAlloc, /* tp_alloc */
2441 treebuilder_new, /* tp_new */
2442 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002443};
2444
2445/* ==================================================================== */
2446/* the expat interface */
2447
2448#if defined(USE_EXPAT)
2449
2450#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002451#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002452static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002453#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454
Eli Bendersky52467b12012-06-01 07:13:08 +03002455static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2456 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2457
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002458typedef struct {
2459 PyObject_HEAD
2460
2461 XML_Parser parser;
2462
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002463 PyObject *target;
2464 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002465
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002466 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002467
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002468 PyObject *handle_start;
2469 PyObject *handle_data;
2470 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002472 PyObject *handle_comment;
2473 PyObject *handle_pi;
2474 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002476 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002477
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002478} XMLParserObject;
2479
Neal Norwitz227b5332006-03-22 09:28:35 +00002480static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002482#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2483
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002484/* helpers */
2485
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002486LOCAL(PyObject*)
2487makeuniversal(XMLParserObject* self, const char* string)
2488{
2489 /* convert a UTF-8 tag/attribute name from the expat parser
2490 to a universal name string */
2491
Antoine Pitrouc1948842012-10-01 23:40:37 +02002492 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002493 PyObject* key;
2494 PyObject* value;
2495
2496 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002497 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498 if (!key)
2499 return NULL;
2500
2501 value = PyDict_GetItem(self->names, key);
2502
2503 if (value) {
2504 Py_INCREF(value);
2505 } else {
2506 /* new name. convert to universal name, and decode as
2507 necessary */
2508
2509 PyObject* tag;
2510 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002511 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512
2513 /* look for namespace separator */
2514 for (i = 0; i < size; i++)
2515 if (string[i] == '}')
2516 break;
2517 if (i != size) {
2518 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002519 tag = PyBytes_FromStringAndSize(NULL, size+1);
2520 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 p[0] = '{';
2522 memcpy(p+1, string, size);
2523 size++;
2524 } else {
2525 /* plain name; use key as tag */
2526 Py_INCREF(key);
2527 tag = key;
2528 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002529
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002531 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002532 value = PyUnicode_DecodeUTF8(p, size, "strict");
2533 Py_DECREF(tag);
2534 if (!value) {
2535 Py_DECREF(key);
2536 return NULL;
2537 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538
2539 /* add to names dictionary */
2540 if (PyDict_SetItem(self->names, key, value) < 0) {
2541 Py_DECREF(key);
2542 Py_DECREF(value);
2543 return NULL;
2544 }
2545 }
2546
2547 Py_DECREF(key);
2548 return value;
2549}
2550
Eli Bendersky5b77d812012-03-16 08:20:05 +02002551/* Set the ParseError exception with the given parameters.
2552 * If message is not NULL, it's used as the error string. Otherwise, the
2553 * message string is the default for the given error_code.
2554*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002555static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002556expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002557{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002558 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002559
Victor Stinner499dfcf2011-03-21 13:26:24 +01002560 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002561 message ? message : EXPAT(ErrorString)(error_code),
2562 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002563 if (errmsg == NULL)
2564 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002565
Victor Stinner499dfcf2011-03-21 13:26:24 +01002566 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2567 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002568 if (!error)
2569 return;
2570
Eli Bendersky5b77d812012-03-16 08:20:05 +02002571 /* Add code and position attributes */
2572 code = PyLong_FromLong((long)error_code);
2573 if (!code) {
2574 Py_DECREF(error);
2575 return;
2576 }
2577 if (PyObject_SetAttrString(error, "code", code) == -1) {
2578 Py_DECREF(error);
2579 Py_DECREF(code);
2580 return;
2581 }
2582 Py_DECREF(code);
2583
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002584 position = Py_BuildValue("(ii)", line, column);
2585 if (!position) {
2586 Py_DECREF(error);
2587 return;
2588 }
2589 if (PyObject_SetAttrString(error, "position", position) == -1) {
2590 Py_DECREF(error);
2591 Py_DECREF(position);
2592 return;
2593 }
2594 Py_DECREF(position);
2595
2596 PyErr_SetObject(elementtree_parseerror_obj, error);
2597 Py_DECREF(error);
2598}
2599
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002600/* -------------------------------------------------------------------- */
2601/* handlers */
2602
2603static void
2604expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2605 int data_len)
2606{
2607 PyObject* key;
2608 PyObject* value;
2609 PyObject* res;
2610
2611 if (data_len < 2 || data_in[0] != '&')
2612 return;
2613
Neal Norwitz0269b912007-08-08 06:56:02 +00002614 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615 if (!key)
2616 return;
2617
2618 value = PyDict_GetItem(self->entity, key);
2619
2620 if (value) {
2621 if (TreeBuilder_CheckExact(self->target))
2622 res = treebuilder_handle_data(
2623 (TreeBuilderObject*) self->target, value
2624 );
2625 else if (self->handle_data)
2626 res = PyObject_CallFunction(self->handle_data, "O", value);
2627 else
2628 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002630 } else if (!PyErr_Occurred()) {
2631 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002632 char message[128] = "undefined entity ";
2633 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002634 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002635 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002637 EXPAT(GetErrorColumnNumber)(self->parser),
2638 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002639 );
2640 }
2641
2642 Py_DECREF(key);
2643}
2644
2645static void
2646expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2647 const XML_Char **attrib_in)
2648{
2649 PyObject* res;
2650 PyObject* tag;
2651 PyObject* attrib;
2652 int ok;
2653
2654 /* tag name */
2655 tag = makeuniversal(self, tag_in);
2656 if (!tag)
2657 return; /* parser will look for errors */
2658
2659 /* attributes */
2660 if (attrib_in[0]) {
2661 attrib = PyDict_New();
2662 if (!attrib)
2663 return;
2664 while (attrib_in[0] && attrib_in[1]) {
2665 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002666 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667 if (!key || !value) {
2668 Py_XDECREF(value);
2669 Py_XDECREF(key);
2670 Py_DECREF(attrib);
2671 return;
2672 }
2673 ok = PyDict_SetItem(attrib, key, value);
2674 Py_DECREF(value);
2675 Py_DECREF(key);
2676 if (ok < 0) {
2677 Py_DECREF(attrib);
2678 return;
2679 }
2680 attrib_in += 2;
2681 }
2682 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002683 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002684 attrib = PyDict_New();
2685 if (!attrib)
2686 return;
2687 }
2688
2689 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690 /* shortcut */
2691 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2692 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002693 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002694 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002696 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697 res = NULL;
2698
2699 Py_DECREF(tag);
2700 Py_DECREF(attrib);
2701
2702 Py_XDECREF(res);
2703}
2704
2705static void
2706expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2707 int data_len)
2708{
2709 PyObject* data;
2710 PyObject* res;
2711
Neal Norwitz0269b912007-08-08 06:56:02 +00002712 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002713 if (!data)
2714 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715
2716 if (TreeBuilder_CheckExact(self->target))
2717 /* shortcut */
2718 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2719 else if (self->handle_data)
2720 res = PyObject_CallFunction(self->handle_data, "O", data);
2721 else
2722 res = NULL;
2723
2724 Py_DECREF(data);
2725
2726 Py_XDECREF(res);
2727}
2728
2729static void
2730expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2731{
2732 PyObject* tag;
2733 PyObject* res = NULL;
2734
2735 if (TreeBuilder_CheckExact(self->target))
2736 /* shortcut */
2737 /* the standard tree builder doesn't look at the end tag */
2738 res = treebuilder_handle_end(
2739 (TreeBuilderObject*) self->target, Py_None
2740 );
2741 else if (self->handle_end) {
2742 tag = makeuniversal(self, tag_in);
2743 if (tag) {
2744 res = PyObject_CallFunction(self->handle_end, "O", tag);
2745 Py_DECREF(tag);
2746 }
2747 }
2748
2749 Py_XDECREF(res);
2750}
2751
2752static void
2753expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2754 const XML_Char *uri)
2755{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002756 PyObject* sprefix = NULL;
2757 PyObject* suri = NULL;
2758
2759 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2760 if (!suri)
2761 return;
2762
2763 if (prefix)
2764 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2765 else
2766 sprefix = PyUnicode_FromString("");
2767 if (!sprefix) {
2768 Py_DECREF(suri);
2769 return;
2770 }
2771
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002773 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002775
2776 Py_DECREF(sprefix);
2777 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778}
2779
2780static void
2781expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2782{
2783 treebuilder_handle_namespace(
2784 (TreeBuilderObject*) self->target, 0, NULL, NULL
2785 );
2786}
2787
2788static void
2789expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2790{
2791 PyObject* comment;
2792 PyObject* res;
2793
2794 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002795 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796 if (comment) {
2797 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2798 Py_XDECREF(res);
2799 Py_DECREF(comment);
2800 }
2801 }
2802}
2803
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002804static void
2805expat_start_doctype_handler(XMLParserObject *self,
2806 const XML_Char *doctype_name,
2807 const XML_Char *sysid,
2808 const XML_Char *pubid,
2809 int has_internal_subset)
2810{
2811 PyObject *self_pyobj = (PyObject *)self;
2812 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
2813 PyObject *parser_doctype = NULL;
2814 PyObject *res = NULL;
2815
2816 doctype_name_obj = makeuniversal(self, doctype_name);
2817 if (!doctype_name_obj)
2818 return;
2819
2820 if (sysid) {
2821 sysid_obj = makeuniversal(self, sysid);
2822 if (!sysid_obj) {
2823 Py_DECREF(doctype_name_obj);
2824 return;
2825 }
2826 } else {
2827 Py_INCREF(Py_None);
2828 sysid_obj = Py_None;
2829 }
2830
2831 if (pubid) {
2832 pubid_obj = makeuniversal(self, pubid);
2833 if (!pubid_obj) {
2834 Py_DECREF(doctype_name_obj);
2835 Py_DECREF(sysid_obj);
2836 return;
2837 }
2838 } else {
2839 Py_INCREF(Py_None);
2840 pubid_obj = Py_None;
2841 }
2842
2843 /* If the target has a handler for doctype, call it. */
2844 if (self->handle_doctype) {
2845 res = PyObject_CallFunction(self->handle_doctype, "OOO",
2846 doctype_name_obj, pubid_obj, sysid_obj);
2847 Py_CLEAR(res);
2848 }
2849
2850 /* Now see if the parser itself has a doctype method. If yes and it's
2851 * a subclass, call it but warn about deprecation. If it's not a subclass
2852 * (i.e. vanilla XMLParser), do nothing.
2853 */
2854 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
2855 if (parser_doctype) {
2856 if (!XMLParser_CheckExact(self_pyobj)) {
2857 if (PyErr_WarnEx(PyExc_DeprecationWarning,
2858 "This method of XMLParser is deprecated. Define"
2859 " doctype() method on the TreeBuilder target.",
2860 1) < 0) {
2861 goto clear;
2862 }
2863 res = PyObject_CallFunction(parser_doctype, "OOO",
2864 doctype_name_obj, pubid_obj, sysid_obj);
2865 Py_CLEAR(res);
2866 }
2867 }
2868
2869clear:
2870 Py_XDECREF(parser_doctype);
2871 Py_DECREF(doctype_name_obj);
2872 Py_DECREF(pubid_obj);
2873 Py_DECREF(sysid_obj);
2874}
2875
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002876static void
2877expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2878 const XML_Char* data_in)
2879{
2880 PyObject* target;
2881 PyObject* data;
2882 PyObject* res;
2883
2884 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002885 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2886 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887 if (target && data) {
2888 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2889 Py_XDECREF(res);
2890 Py_DECREF(data);
2891 Py_DECREF(target);
2892 } else {
2893 Py_XDECREF(data);
2894 Py_XDECREF(target);
2895 }
2896 }
2897}
2898
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899static int
2900expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2901 XML_Encoding *info)
2902{
2903 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904 unsigned char s[256];
2905 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002906 void *data;
2907 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908
2909 memset(info, 0, sizeof(XML_Encoding));
2910
2911 for (i = 0; i < 256; i++)
2912 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002913
Fredrik Lundhc3389992005-12-25 11:40:19 +00002914 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002915 if (!u)
2916 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002917 if (PyUnicode_READY(u))
2918 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002920 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002921 Py_DECREF(u);
2922 return XML_STATUS_ERROR;
2923 }
2924
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002925 kind = PyUnicode_KIND(u);
2926 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002928 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2929 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2930 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002932 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 }
2934
2935 Py_DECREF(u);
2936
2937 return XML_STATUS_OK;
2938}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939
2940/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941
Eli Bendersky52467b12012-06-01 07:13:08 +03002942static PyObject *
2943xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944{
Eli Bendersky52467b12012-06-01 07:13:08 +03002945 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
2946 if (self) {
2947 self->parser = NULL;
2948 self->target = self->entity = self->names = NULL;
2949 self->handle_start = self->handle_data = self->handle_end = NULL;
2950 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002951 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002953 return (PyObject *)self;
2954}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955
Eli Bendersky52467b12012-06-01 07:13:08 +03002956static int
2957xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
2958{
2959 XMLParserObject *self_xp = (XMLParserObject *)self;
2960 PyObject *target = NULL, *html = NULL;
2961 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03002962 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002963
Eli Bendersky52467b12012-06-01 07:13:08 +03002964 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
2965 &html, &target, &encoding)) {
2966 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002968
Eli Bendersky52467b12012-06-01 07:13:08 +03002969 self_xp->entity = PyDict_New();
2970 if (!self_xp->entity)
2971 return -1;
2972
2973 self_xp->names = PyDict_New();
2974 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002975 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03002976 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 }
2978
Eli Bendersky52467b12012-06-01 07:13:08 +03002979 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
2980 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002981 Py_CLEAR(self_xp->entity);
2982 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03002984 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002985 }
2986
Eli Bendersky52467b12012-06-01 07:13:08 +03002987 if (target) {
2988 Py_INCREF(target);
2989 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03002990 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002991 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002992 Py_CLEAR(self_xp->entity);
2993 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03002994 EXPAT(ParserFree)(self_xp->parser);
2995 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002996 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002997 }
2998 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002999
Eli Bendersky52467b12012-06-01 07:13:08 +03003000 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3001 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3002 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3003 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3004 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3005 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003006 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003007
3008 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003009
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003011 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003013 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 (XML_StartElementHandler) expat_start_handler,
3015 (XML_EndElementHandler) expat_end_handler
3016 );
3017 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003018 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019 (XML_DefaultHandler) expat_default_handler
3020 );
3021 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003022 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023 (XML_CharacterDataHandler) expat_data_handler
3024 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003025 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003027 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028 (XML_CommentHandler) expat_comment_handler
3029 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003030 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003032 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003033 (XML_ProcessingInstructionHandler) expat_pi_handler
3034 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003035 EXPAT(SetStartDoctypeDeclHandler)(
3036 self_xp->parser,
3037 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3038 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003040 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3042 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003043
Eli Bendersky52467b12012-06-01 07:13:08 +03003044 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045}
3046
Eli Bendersky52467b12012-06-01 07:13:08 +03003047static int
3048xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3049{
3050 Py_VISIT(self->handle_close);
3051 Py_VISIT(self->handle_pi);
3052 Py_VISIT(self->handle_comment);
3053 Py_VISIT(self->handle_end);
3054 Py_VISIT(self->handle_data);
3055 Py_VISIT(self->handle_start);
3056
3057 Py_VISIT(self->target);
3058 Py_VISIT(self->entity);
3059 Py_VISIT(self->names);
3060
3061 return 0;
3062}
3063
3064static int
3065xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066{
3067 EXPAT(ParserFree)(self->parser);
3068
Antoine Pitrouc1948842012-10-01 23:40:37 +02003069 Py_CLEAR(self->handle_close);
3070 Py_CLEAR(self->handle_pi);
3071 Py_CLEAR(self->handle_comment);
3072 Py_CLEAR(self->handle_end);
3073 Py_CLEAR(self->handle_data);
3074 Py_CLEAR(self->handle_start);
3075 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076
Antoine Pitrouc1948842012-10-01 23:40:37 +02003077 Py_CLEAR(self->target);
3078 Py_CLEAR(self->entity);
3079 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003080
Eli Bendersky52467b12012-06-01 07:13:08 +03003081 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082}
3083
Eli Bendersky52467b12012-06-01 07:13:08 +03003084static void
3085xmlparser_dealloc(XMLParserObject* self)
3086{
3087 PyObject_GC_UnTrack(self);
3088 xmlparser_gc_clear(self);
3089 Py_TYPE(self)->tp_free((PyObject *)self);
3090}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091
3092LOCAL(PyObject*)
3093expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3094{
3095 int ok;
3096
3097 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3098
3099 if (PyErr_Occurred())
3100 return NULL;
3101
3102 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003103 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003104 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003106 EXPAT(GetErrorColumnNumber)(self->parser),
3107 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003108 );
3109 return NULL;
3110 }
3111
3112 Py_RETURN_NONE;
3113}
3114
3115static PyObject*
3116xmlparser_close(XMLParserObject* self, PyObject* args)
3117{
3118 /* end feeding data to parser */
3119
3120 PyObject* res;
3121 if (!PyArg_ParseTuple(args, ":close"))
3122 return NULL;
3123
3124 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003125 if (!res)
3126 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003128 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129 Py_DECREF(res);
3130 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003131 } if (self->handle_close) {
3132 Py_DECREF(res);
3133 return PyObject_CallFunction(self->handle_close, "");
3134 } else
3135 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003136}
3137
3138static PyObject*
3139xmlparser_feed(XMLParserObject* self, PyObject* args)
3140{
3141 /* feed data to parser */
3142
3143 char* data;
3144 int data_len;
3145 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3146 return NULL;
3147
3148 return expat_parse(self, data, data_len, 0);
3149}
3150
3151static PyObject*
3152xmlparser_parse(XMLParserObject* self, PyObject* args)
3153{
3154 /* (internal) parse until end of input stream */
3155
3156 PyObject* reader;
3157 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003158 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159 PyObject* res;
3160
3161 PyObject* fileobj;
3162 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3163 return NULL;
3164
3165 reader = PyObject_GetAttrString(fileobj, "read");
3166 if (!reader)
3167 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003168
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003169 /* read from open file object */
3170 for (;;) {
3171
3172 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3173
3174 if (!buffer) {
3175 /* read failed (e.g. due to KeyboardInterrupt) */
3176 Py_DECREF(reader);
3177 return NULL;
3178 }
3179
Eli Benderskyf996e772012-03-16 05:53:30 +02003180 if (PyUnicode_CheckExact(buffer)) {
3181 /* A unicode object is encoded into bytes using UTF-8 */
3182 if (PyUnicode_GET_SIZE(buffer) == 0) {
3183 Py_DECREF(buffer);
3184 break;
3185 }
3186 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003187 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003188 if (!temp) {
3189 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003190 Py_DECREF(reader);
3191 return NULL;
3192 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003193 buffer = temp;
3194 }
3195 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003196 Py_DECREF(buffer);
3197 break;
3198 }
3199
3200 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003201 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202 );
3203
3204 Py_DECREF(buffer);
3205
3206 if (!res) {
3207 Py_DECREF(reader);
3208 return NULL;
3209 }
3210 Py_DECREF(res);
3211
3212 }
3213
3214 Py_DECREF(reader);
3215
3216 res = expat_parse(self, "", 0, 1);
3217
3218 if (res && TreeBuilder_CheckExact(self->target)) {
3219 Py_DECREF(res);
3220 return treebuilder_done((TreeBuilderObject*) self->target);
3221 }
3222
3223 return res;
3224}
3225
3226static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003227xmlparser_doctype(XMLParserObject *self, PyObject *args)
3228{
3229 Py_RETURN_NONE;
3230}
3231
3232static PyObject*
3233xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234{
3235 /* activate element event reporting */
3236
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003237 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238 TreeBuilderObject* target;
3239
3240 PyObject* events; /* event collector */
3241 PyObject* event_set = Py_None;
3242 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3243 &event_set))
3244 return NULL;
3245
3246 if (!TreeBuilder_CheckExact(self->target)) {
3247 PyErr_SetString(
3248 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003249 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250 "targets"
3251 );
3252 return NULL;
3253 }
3254
3255 target = (TreeBuilderObject*) self->target;
3256
3257 Py_INCREF(events);
3258 Py_XDECREF(target->events);
3259 target->events = events;
3260
3261 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003262 Py_CLEAR(target->start_event_obj);
3263 Py_CLEAR(target->end_event_obj);
3264 Py_CLEAR(target->start_ns_event_obj);
3265 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266
3267 if (event_set == Py_None) {
3268 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003269 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270 Py_RETURN_NONE;
3271 }
3272
3273 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3274 goto error;
3275
3276 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3277 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3278 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003279 if (PyUnicode_Check(item)) {
3280 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003281 if (event == NULL)
3282 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003283 } else if (PyBytes_Check(item))
3284 event = PyBytes_AS_STRING(item);
3285 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003287 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 if (strcmp(event, "start") == 0) {
3289 Py_INCREF(item);
3290 target->start_event_obj = item;
3291 } else if (strcmp(event, "end") == 0) {
3292 Py_INCREF(item);
3293 Py_XDECREF(target->end_event_obj);
3294 target->end_event_obj = item;
3295 } else if (strcmp(event, "start-ns") == 0) {
3296 Py_INCREF(item);
3297 Py_XDECREF(target->start_ns_event_obj);
3298 target->start_ns_event_obj = item;
3299 EXPAT(SetNamespaceDeclHandler)(
3300 self->parser,
3301 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3302 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3303 );
3304 } else if (strcmp(event, "end-ns") == 0) {
3305 Py_INCREF(item);
3306 Py_XDECREF(target->end_ns_event_obj);
3307 target->end_ns_event_obj = item;
3308 EXPAT(SetNamespaceDeclHandler)(
3309 self->parser,
3310 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3311 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3312 );
3313 } else {
3314 PyErr_Format(
3315 PyExc_ValueError,
3316 "unknown event '%s'", event
3317 );
3318 return NULL;
3319 }
3320 }
3321
3322 Py_RETURN_NONE;
3323
3324 error:
3325 PyErr_SetString(
3326 PyExc_TypeError,
3327 "invalid event tuple"
3328 );
3329 return NULL;
3330}
3331
3332static PyMethodDef xmlparser_methods[] = {
3333 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3334 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3335 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3336 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003337 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338 {NULL, NULL}
3339};
3340
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003341static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003342xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003344 if (PyUnicode_Check(nameobj)) {
3345 PyObject* res;
3346 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3347 res = self->entity;
3348 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3349 res = self->target;
3350 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3351 return PyUnicode_FromFormat(
3352 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003354 }
3355 else
3356 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357
Alexander Belopolskye239d232010-12-08 23:31:48 +00003358 Py_INCREF(res);
3359 return res;
3360 }
3361 generic:
3362 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003363}
3364
Neal Norwitz227b5332006-03-22 09:28:35 +00003365static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003366 PyVarObject_HEAD_INIT(NULL, 0)
3367 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003368 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003369 (destructor)xmlparser_dealloc, /* tp_dealloc */
3370 0, /* tp_print */
3371 0, /* tp_getattr */
3372 0, /* tp_setattr */
3373 0, /* tp_reserved */
3374 0, /* tp_repr */
3375 0, /* tp_as_number */
3376 0, /* tp_as_sequence */
3377 0, /* tp_as_mapping */
3378 0, /* tp_hash */
3379 0, /* tp_call */
3380 0, /* tp_str */
3381 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3382 0, /* tp_setattro */
3383 0, /* tp_as_buffer */
3384 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3385 /* tp_flags */
3386 0, /* tp_doc */
3387 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3388 (inquiry)xmlparser_gc_clear, /* tp_clear */
3389 0, /* tp_richcompare */
3390 0, /* tp_weaklistoffset */
3391 0, /* tp_iter */
3392 0, /* tp_iternext */
3393 xmlparser_methods, /* tp_methods */
3394 0, /* tp_members */
3395 0, /* tp_getset */
3396 0, /* tp_base */
3397 0, /* tp_dict */
3398 0, /* tp_descr_get */
3399 0, /* tp_descr_set */
3400 0, /* tp_dictoffset */
3401 (initproc)xmlparser_init, /* tp_init */
3402 PyType_GenericAlloc, /* tp_alloc */
3403 xmlparser_new, /* tp_new */
3404 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405};
3406
3407#endif
3408
3409/* ==================================================================== */
3410/* python module interface */
3411
3412static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003414 {NULL, NULL}
3415};
3416
Martin v. Löwis1a214512008-06-11 05:26:20 +00003417
3418static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003419 PyModuleDef_HEAD_INIT,
3420 "_elementtree",
3421 NULL,
3422 -1,
3423 _functions,
3424 NULL,
3425 NULL,
3426 NULL,
3427 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003428};
3429
Neal Norwitzf6657e62006-12-28 04:47:50 +00003430PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003431PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003432{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003433 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003435 /* Initialize object types */
3436 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003437 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003438 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003439 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003440#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003441 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003442 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003443#endif
3444
Martin v. Löwis1a214512008-06-11 05:26:20 +00003445 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003446 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003447 return NULL;
3448
Eli Bendersky828efde2012-04-05 05:40:58 +03003449 if (!(temp = PyImport_ImportModule("copy")))
3450 return NULL;
3451 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3452 Py_XDECREF(temp);
3453
3454 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3455 return NULL;
3456
Eli Bendersky20d41742012-06-01 09:48:37 +03003457 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003458 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3459 if (expat_capi) {
3460 /* check that it's usable */
3461 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3462 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3463 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3464 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003465 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003466 PyErr_SetString(PyExc_ImportError,
3467 "pyexpat version is incompatible");
3468 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003469 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003470 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003471 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003472 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003474 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003475 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003476 );
3477 Py_INCREF(elementtree_parseerror_obj);
3478 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3479
Eli Bendersky092af1f2012-03-04 07:14:03 +02003480 Py_INCREF((PyObject *)&Element_Type);
3481 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3482
Eli Bendersky58d548d2012-05-29 15:45:16 +03003483 Py_INCREF((PyObject *)&TreeBuilder_Type);
3484 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3485
Eli Bendersky52467b12012-06-01 07:13:08 +03003486#if defined(USE_EXPAT)
3487 Py_INCREF((PyObject *)&XMLParser_Type);
3488 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3489#endif
3490
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003491 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003492}