blob: 9f302f916b071a9e9bdb5f482ed7ce6b96a7a92d [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +0200101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
Antoine Pitrouc1948842012-10-01 23:40:37 +0200126 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000129 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000131 return result;
132}
133
134LOCAL(PyObject*)
135list_join(PyObject* list)
136{
137 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 PyObject* result;
140
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 if (!joiner)
143 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200144 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200146 if (result)
147 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 return result;
149}
150
Eli Bendersky48d358b2012-05-30 17:57:50 +0300151/* Is the given object an empty dictionary?
152*/
153static int
154is_empty_dict(PyObject *obj)
155{
156 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
157}
158
159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200161/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162
163typedef struct {
164
165 /* attributes (a dictionary object), or None if no attributes */
166 PyObject* attrib;
167
168 /* child elements */
169 int length; /* actual number of items */
170 int allocated; /* allocated items */
171
172 /* this either points to _children or to a malloced buffer */
173 PyObject* *children;
174
175 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000177} ElementObjectExtra;
178
179typedef struct {
180 PyObject_HEAD
181
182 /* element tag (a string). */
183 PyObject* tag;
184
185 /* text before first child. note that this is a tagged pointer;
186 use JOIN_OBJ to get the object pointer. the join flag is used
187 to distinguish lists created by the tree builder from lists
188 assigned to the attribute by application code; the former
189 should be joined before being returned to the user, the latter
190 should be left intact. */
191 PyObject* text;
192
193 /* text after this element, in parent. note that this is a tagged
194 pointer; use JOIN_OBJ to get the object pointer. */
195 PyObject* tail;
196
197 ElementObjectExtra* extra;
198
Eli Benderskyebf37a22012-04-03 22:02:37 +0300199 PyObject *weakreflist; /* For tp_weaklistoffset */
200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000201} ElementObject;
202
Neal Norwitz227b5332006-03-22 09:28:35 +0000203static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000204
Christian Heimes90aa7642007-12-19 02:45:37 +0000205#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206
207/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200208/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212{
213 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
214 if (!self->extra)
215 return -1;
216
217 if (!attrib)
218 attrib = Py_None;
219
220 Py_INCREF(attrib);
221 self->extra->attrib = attrib;
222
223 self->extra->length = 0;
224 self->extra->allocated = STATIC_CHILDREN;
225 self->extra->children = self->extra->_children;
226
227 return 0;
228}
229
230LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
Eli Bendersky08b85292012-04-04 15:55:07 +0300233 ElementObjectExtra *myextra;
234 int i;
235
Eli Benderskyebf37a22012-04-03 22:02:37 +0300236 if (!self->extra)
237 return;
238
239 /* Avoid DECREFs calling into this code again (cycles, etc.)
240 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300241 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 self->extra = NULL;
243
244 Py_DECREF(myextra->attrib);
245
Eli Benderskyebf37a22012-04-03 22:02:37 +0300246 for (i = 0; i < myextra->length; i++)
247 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 if (myextra->children != myextra->_children)
250 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253}
254
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255/* Convenience internal function to create new Element objects with the given
256 * tag and attributes.
257*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200259create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260{
261 ElementObject* self;
262
Eli Bendersky0192ba32012-03-30 16:38:33 +0300263 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 if (self == NULL)
265 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 self->extra = NULL;
267
Eli Bendersky48d358b2012-05-30 17:57:50 +0300268 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 }
274
275 Py_INCREF(tag);
276 self->tag = tag;
277
278 Py_INCREF(Py_None);
279 self->text = Py_None;
280
281 Py_INCREF(Py_None);
282 self->tail = Py_None;
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 self->weakreflist = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 return (PyObject*) self;
289}
290
Eli Bendersky092af1f2012-03-04 07:14:03 +0200291static PyObject *
292element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
293{
294 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
295 if (e != NULL) {
296 Py_INCREF(Py_None);
297 e->tag = Py_None;
298
299 Py_INCREF(Py_None);
300 e->text = Py_None;
301
302 Py_INCREF(Py_None);
303 e->tail = Py_None;
304
305 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300306 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 }
308 return (PyObject *)e;
309}
310
Eli Bendersky737b1732012-05-29 06:02:56 +0300311/* Helper function for extracting the attrib dictionary from a keywords dict.
312 * This is required by some constructors/functions in this module that can
313 * either accept attrib as a keyword argument or all attributes splashed
314 * directly into *kwds.
315 * If there is no 'attrib' keyword, return an empty dict.
316 */
317static PyObject*
318get_attrib_from_keywords(PyObject *kwds)
319{
320 PyObject *attrib_str = PyUnicode_FromString("attrib");
321 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
322
323 if (attrib) {
324 /* If attrib was found in kwds, copy its value and remove it from
325 * kwds
326 */
327 if (!PyDict_Check(attrib)) {
328 Py_DECREF(attrib_str);
329 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
330 Py_TYPE(attrib)->tp_name);
331 return NULL;
332 }
333 attrib = PyDict_Copy(attrib);
334 PyDict_DelItem(kwds, attrib_str);
335 } else {
336 attrib = PyDict_New();
337 }
338
339 Py_DECREF(attrib_str);
340
341 if (attrib)
342 PyDict_Update(attrib, kwds);
343 return attrib;
344}
345
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346static int
347element_init(PyObject *self, PyObject *args, PyObject *kwds)
348{
349 PyObject *tag;
350 PyObject *tmp;
351 PyObject *attrib = NULL;
352 ElementObject *self_elem;
353
354 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
355 return -1;
356
Eli Bendersky737b1732012-05-29 06:02:56 +0300357 if (attrib) {
358 /* attrib passed as positional arg */
359 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200360 if (!attrib)
361 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300362 if (kwds) {
363 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200364 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300365 return -1;
366 }
367 }
368 } else if (kwds) {
369 /* have keywords args */
370 attrib = get_attrib_from_keywords(kwds);
371 if (!attrib)
372 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373 }
374
375 self_elem = (ElementObject *)self;
376
Antoine Pitrouc1948842012-10-01 23:40:37 +0200377 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380 return -1;
381 }
382 }
383
Eli Bendersky48d358b2012-05-30 17:57:50 +0300384 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200385 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200386
387 /* Replace the objects already pointed to by tag, text and tail. */
388 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 Py_DECREF(tmp);
392
393 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200395 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200396 Py_DECREF(JOIN_OBJ(tmp));
397
398 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_DECREF(JOIN_OBJ(tmp));
402
403 return 0;
404}
405
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000406LOCAL(int)
407element_resize(ElementObject* self, int extra)
408{
409 int size;
410 PyObject* *children;
411
412 /* make sure self->children can hold the given number of extra
413 elements. set an exception and return -1 if allocation failed */
414
415 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417
418 size = self->extra->length + extra;
419
420 if (size > self->extra->allocated) {
421 /* use Python 2.4's list growth strategy */
422 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000423 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100424 * which needs at least 4 bytes.
425 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000426 * be safe.
427 */
428 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000430 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100431 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 * false alarm always assume at least one child to be safe.
433 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 children = PyObject_Realloc(self->extra->children,
435 size * sizeof(PyObject*));
436 if (!children)
437 goto nomemory;
438 } else {
439 children = PyObject_Malloc(size * sizeof(PyObject*));
440 if (!children)
441 goto nomemory;
442 /* copy existing children from static area to malloc buffer */
443 memcpy(children, self->extra->children,
444 self->extra->length * sizeof(PyObject*));
445 }
446 self->extra->children = children;
447 self->extra->allocated = size;
448 }
449
450 return 0;
451
452 nomemory:
453 PyErr_NoMemory();
454 return -1;
455}
456
457LOCAL(int)
458element_add_subelement(ElementObject* self, PyObject* element)
459{
460 /* add a child element to a parent */
461
462 if (element_resize(self, 1) < 0)
463 return -1;
464
465 Py_INCREF(element);
466 self->extra->children[self->extra->length] = element;
467
468 self->extra->length++;
469
470 return 0;
471}
472
473LOCAL(PyObject*)
474element_get_attrib(ElementObject* self)
475{
476 /* return borrowed reference to attrib dictionary */
477 /* note: this function assumes that the extra section exists */
478
479 PyObject* res = self->extra->attrib;
480
481 if (res == Py_None) {
482 /* create missing dictionary */
483 res = PyDict_New();
484 if (!res)
485 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200486 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000487 self->extra->attrib = res;
488 }
489
490 return res;
491}
492
493LOCAL(PyObject*)
494element_get_text(ElementObject* self)
495{
496 /* return borrowed reference to text attribute */
497
498 PyObject* res = self->text;
499
500 if (JOIN_GET(res)) {
501 res = JOIN_OBJ(res);
502 if (PyList_CheckExact(res)) {
503 res = list_join(res);
504 if (!res)
505 return NULL;
506 self->text = res;
507 }
508 }
509
510 return res;
511}
512
513LOCAL(PyObject*)
514element_get_tail(ElementObject* self)
515{
516 /* return borrowed reference to text attribute */
517
518 PyObject* res = self->tail;
519
520 if (JOIN_GET(res)) {
521 res = JOIN_OBJ(res);
522 if (PyList_CheckExact(res)) {
523 res = list_join(res);
524 if (!res)
525 return NULL;
526 self->tail = res;
527 }
528 }
529
530 return res;
531}
532
533static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300534subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535{
536 PyObject* elem;
537
538 ElementObject* parent;
539 PyObject* tag;
540 PyObject* attrib = NULL;
541 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
542 &Element_Type, &parent, &tag,
543 &PyDict_Type, &attrib))
544 return NULL;
545
Eli Bendersky737b1732012-05-29 06:02:56 +0300546 if (attrib) {
547 /* attrib passed as positional arg */
548 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000549 if (!attrib)
550 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300551 if (kwds) {
552 if (PyDict_Update(attrib, kwds) < 0) {
553 return NULL;
554 }
555 }
556 } else if (kwds) {
557 /* have keyword args */
558 attrib = get_attrib_from_keywords(kwds);
559 if (!attrib)
560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 Py_INCREF(Py_None);
564 attrib = Py_None;
565 }
566
Eli Bendersky092af1f2012-03-04 07:14:03 +0200567 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568
569 Py_DECREF(attrib);
570
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000571 if (element_add_subelement(parent, elem) < 0) {
572 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575
576 return elem;
577}
578
Eli Bendersky0192ba32012-03-30 16:38:33 +0300579static int
580element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
581{
582 Py_VISIT(self->tag);
583 Py_VISIT(JOIN_OBJ(self->text));
584 Py_VISIT(JOIN_OBJ(self->tail));
585
586 if (self->extra) {
587 int i;
588 Py_VISIT(self->extra->attrib);
589
590 for (i = 0; i < self->extra->length; ++i)
591 Py_VISIT(self->extra->children[i]);
592 }
593 return 0;
594}
595
596static int
597element_gc_clear(ElementObject *self)
598{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300599 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300600
601 /* The following is like Py_CLEAR for self->text and self->tail, but
602 * written explicitily because the real pointers hide behind access
603 * macros.
604 */
605 if (self->text) {
606 PyObject *tmp = JOIN_OBJ(self->text);
607 self->text = NULL;
608 Py_DECREF(tmp);
609 }
610
611 if (self->tail) {
612 PyObject *tmp = JOIN_OBJ(self->tail);
613 self->tail = NULL;
614 Py_DECREF(tmp);
615 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616
617 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300618 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300620 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 return 0;
622}
623
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624static void
625element_dealloc(ElementObject* self)
626{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300628
629 if (self->weakreflist != NULL)
630 PyObject_ClearWeakRefs((PyObject *) self);
631
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632 /* element_gc_clear clears all references and deallocates extra
633 */
634 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000635
636 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200637 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638}
639
640/* -------------------------------------------------------------------- */
641/* methods (in alphabetical order) */
642
643static PyObject*
644element_append(ElementObject* self, PyObject* args)
645{
646 PyObject* element;
647 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
648 return NULL;
649
650 if (element_add_subelement(self, element) < 0)
651 return NULL;
652
653 Py_RETURN_NONE;
654}
655
656static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658{
659 if (!PyArg_ParseTuple(args, ":clear"))
660 return NULL;
661
Eli Benderskyebf37a22012-04-03 22:02:37 +0300662 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 Py_INCREF(Py_None);
665 Py_DECREF(JOIN_OBJ(self->text));
666 self->text = Py_None;
667
668 Py_INCREF(Py_None);
669 Py_DECREF(JOIN_OBJ(self->tail));
670 self->tail = Py_None;
671
672 Py_RETURN_NONE;
673}
674
675static PyObject*
676element_copy(ElementObject* self, PyObject* args)
677{
678 int i;
679 ElementObject* element;
680
681 if (!PyArg_ParseTuple(args, ":__copy__"))
682 return NULL;
683
Eli Bendersky092af1f2012-03-04 07:14:03 +0200684 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685 self->tag, (self->extra) ? self->extra->attrib : Py_None
686 );
687 if (!element)
688 return NULL;
689
690 Py_DECREF(JOIN_OBJ(element->text));
691 element->text = self->text;
692 Py_INCREF(JOIN_OBJ(element->text));
693
694 Py_DECREF(JOIN_OBJ(element->tail));
695 element->tail = self->tail;
696 Py_INCREF(JOIN_OBJ(element->tail));
697
698 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100699
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000700 if (element_resize(element, self->extra->length) < 0) {
701 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000703 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 for (i = 0; i < self->extra->length; i++) {
706 Py_INCREF(self->extra->children[i]);
707 element->extra->children[i] = self->extra->children[i];
708 }
709
710 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712 }
713
714 return (PyObject*) element;
715}
716
717static PyObject*
718element_deepcopy(ElementObject* self, PyObject* args)
719{
720 int i;
721 ElementObject* element;
722 PyObject* tag;
723 PyObject* attrib;
724 PyObject* text;
725 PyObject* tail;
726 PyObject* id;
727
728 PyObject* memo;
729 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
730 return NULL;
731
732 tag = deepcopy(self->tag, memo);
733 if (!tag)
734 return NULL;
735
736 if (self->extra) {
737 attrib = deepcopy(self->extra->attrib, memo);
738 if (!attrib) {
739 Py_DECREF(tag);
740 return NULL;
741 }
742 } else {
743 Py_INCREF(Py_None);
744 attrib = Py_None;
745 }
746
Eli Bendersky092af1f2012-03-04 07:14:03 +0200747 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 Py_DECREF(tag);
750 Py_DECREF(attrib);
751
752 if (!element)
753 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 text = deepcopy(JOIN_OBJ(self->text), memo);
756 if (!text)
757 goto error;
758 Py_DECREF(element->text);
759 element->text = JOIN_SET(text, JOIN_GET(self->text));
760
761 tail = deepcopy(JOIN_OBJ(self->tail), memo);
762 if (!tail)
763 goto error;
764 Py_DECREF(element->tail);
765 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
766
767 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769 if (element_resize(element, self->extra->length) < 0)
770 goto error;
771
772 for (i = 0; i < self->extra->length; i++) {
773 PyObject* child = deepcopy(self->extra->children[i], memo);
774 if (!child) {
775 element->extra->length = i;
776 goto error;
777 }
778 element->extra->children[i] = child;
779 }
780
781 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 }
784
785 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200786 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000787 if (!id)
788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 i = PyDict_SetItem(memo, id, (PyObject*) element);
791
792 Py_DECREF(id);
793
794 if (i < 0)
795 goto error;
796
797 return (PyObject*) element;
798
799 error:
800 Py_DECREF(element);
801 return NULL;
802}
803
Martin v. Löwisbce16662012-06-17 10:41:22 +0200804static PyObject*
805element_sizeof(PyObject* _self, PyObject* args)
806{
807 ElementObject *self = (ElementObject*)_self;
808 Py_ssize_t result = sizeof(ElementObject);
809 if (self->extra) {
810 result += sizeof(ElementObjectExtra);
811 if (self->extra->children != self->extra->_children)
812 result += sizeof(PyObject*) * self->extra->allocated;
813 }
814 return PyLong_FromSsize_t(result);
815}
816
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817LOCAL(int)
818checkpath(PyObject* tag)
819{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000820 Py_ssize_t i;
821 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000822
823 /* check if a tag contains an xpath character */
824
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000825#define PATHCHAR(ch) \
826 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000827
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200829 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
830 void *data = PyUnicode_DATA(tag);
831 unsigned int kind = PyUnicode_KIND(tag);
832 for (i = 0; i < len; i++) {
833 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
834 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000835 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200836 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200838 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839 return 1;
840 }
841 return 0;
842 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000843 if (PyBytes_Check(tag)) {
844 char *p = PyBytes_AS_STRING(tag);
845 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000846 if (p[i] == '{')
847 check = 0;
848 else if (p[i] == '}')
849 check = 1;
850 else if (check && PATHCHAR(p[i]))
851 return 1;
852 }
853 return 0;
854 }
855
856 return 1; /* unknown type; might be path expression */
857}
858
859static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000860element_extend(ElementObject* self, PyObject* args)
861{
862 PyObject* seq;
863 Py_ssize_t i, seqlen = 0;
864
865 PyObject* seq_in;
866 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
867 return NULL;
868
869 seq = PySequence_Fast(seq_in, "");
870 if (!seq) {
871 PyErr_Format(
872 PyExc_TypeError,
873 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
874 );
875 return NULL;
876 }
877
878 seqlen = PySequence_Size(seq);
879 for (i = 0; i < seqlen; i++) {
880 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200881 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
882 Py_DECREF(seq);
883 PyErr_Format(
884 PyExc_TypeError,
885 "expected an Element, not \"%.200s\"",
886 Py_TYPE(element)->tp_name);
887 return NULL;
888 }
889
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000890 if (element_add_subelement(self, element) < 0) {
891 Py_DECREF(seq);
892 return NULL;
893 }
894 }
895
896 Py_DECREF(seq);
897
898 Py_RETURN_NONE;
899}
900
901static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300902element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000903{
904 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000905 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000906 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300907 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200908
Eli Bendersky737b1732012-05-29 06:02:56 +0300909 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
910 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000911 return NULL;
912
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200913 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200914 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200915 return _PyObject_CallMethodId(
916 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000917 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200918 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000919
920 if (!self->extra)
921 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100922
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000923 for (i = 0; i < self->extra->length; i++) {
924 PyObject* item = self->extra->children[i];
925 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000926 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000927 Py_INCREF(item);
928 return item;
929 }
930 }
931
932 Py_RETURN_NONE;
933}
934
935static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300936element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000937{
938 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000939 PyObject* tag;
940 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000941 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200942 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300943 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200944
Eli Bendersky737b1732012-05-29 06:02:56 +0300945 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
946 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947 return NULL;
948
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000949 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200950 return _PyObject_CallMethodId(
951 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000952 );
953
954 if (!self->extra) {
955 Py_INCREF(default_value);
956 return default_value;
957 }
958
959 for (i = 0; i < self->extra->length; i++) {
960 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000961 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
962
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000963 PyObject* text = element_get_text(item);
964 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000965 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000966 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967 return text;
968 }
969 }
970
971 Py_INCREF(default_value);
972 return default_value;
973}
974
975static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300976element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000977{
978 int i;
979 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000981 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300982 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200983
Eli Bendersky737b1732012-05-29 06:02:56 +0300984 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
985 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000986 return NULL;
987
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200988 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200989 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200990 return _PyObject_CallMethodId(
991 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000992 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200993 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000994
995 out = PyList_New(0);
996 if (!out)
997 return NULL;
998
999 if (!self->extra)
1000 return out;
1001
1002 for (i = 0; i < self->extra->length; i++) {
1003 PyObject* item = self->extra->children[i];
1004 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001005 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001006 if (PyList_Append(out, item) < 0) {
1007 Py_DECREF(out);
1008 return NULL;
1009 }
1010 }
1011 }
1012
1013 return out;
1014}
1015
1016static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001017element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001018{
1019 PyObject* tag;
1020 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001021 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001022 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001023
Eli Bendersky737b1732012-05-29 06:02:56 +03001024 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1025 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001026 return NULL;
1027
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001028 return _PyObject_CallMethodId(
1029 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001030 );
1031}
1032
1033static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001034element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001035{
1036 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001037 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001038
1039 PyObject* key;
1040 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001041
1042 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1043 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001044 return NULL;
1045
1046 if (!self->extra || self->extra->attrib == Py_None)
1047 value = default_value;
1048 else {
1049 value = PyDict_GetItem(self->extra->attrib, key);
1050 if (!value)
1051 value = default_value;
1052 }
1053
1054 Py_INCREF(value);
1055 return value;
1056}
1057
1058static PyObject*
1059element_getchildren(ElementObject* self, PyObject* args)
1060{
1061 int i;
1062 PyObject* list;
1063
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001064 /* FIXME: report as deprecated? */
1065
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066 if (!PyArg_ParseTuple(args, ":getchildren"))
1067 return NULL;
1068
1069 if (!self->extra)
1070 return PyList_New(0);
1071
1072 list = PyList_New(self->extra->length);
1073 if (!list)
1074 return NULL;
1075
1076 for (i = 0; i < self->extra->length; i++) {
1077 PyObject* item = self->extra->children[i];
1078 Py_INCREF(item);
1079 PyList_SET_ITEM(list, i, item);
1080 }
1081
1082 return list;
1083}
1084
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001085
Eli Bendersky64d11e62012-06-15 07:42:50 +03001086static PyObject *
1087create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1088
1089
1090static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001091element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001092{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001094 static char* kwlist[] = {"tag", 0};
1095
1096 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 return NULL;
1098
Eli Bendersky64d11e62012-06-15 07:42:50 +03001099 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001100}
1101
1102
1103static PyObject*
1104element_itertext(ElementObject* self, PyObject* args)
1105{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001106 if (!PyArg_ParseTuple(args, ":itertext"))
1107 return NULL;
1108
Eli Bendersky64d11e62012-06-15 07:42:50 +03001109 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001110}
1111
Eli Bendersky64d11e62012-06-15 07:42:50 +03001112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001113static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001114element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001115{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001116 ElementObject* self = (ElementObject*) self_;
1117
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001118 if (!self->extra || index < 0 || index >= self->extra->length) {
1119 PyErr_SetString(
1120 PyExc_IndexError,
1121 "child index out of range"
1122 );
1123 return NULL;
1124 }
1125
1126 Py_INCREF(self->extra->children[index]);
1127 return self->extra->children[index];
1128}
1129
1130static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001131element_insert(ElementObject* self, PyObject* args)
1132{
1133 int i;
1134
1135 int index;
1136 PyObject* element;
1137 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1138 &Element_Type, &element))
1139 return NULL;
1140
1141 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001142 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001143
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001144 if (index < 0) {
1145 index += self->extra->length;
1146 if (index < 0)
1147 index = 0;
1148 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149 if (index > self->extra->length)
1150 index = self->extra->length;
1151
1152 if (element_resize(self, 1) < 0)
1153 return NULL;
1154
1155 for (i = self->extra->length; i > index; i--)
1156 self->extra->children[i] = self->extra->children[i-1];
1157
1158 Py_INCREF(element);
1159 self->extra->children[index] = element;
1160
1161 self->extra->length++;
1162
1163 Py_RETURN_NONE;
1164}
1165
1166static PyObject*
1167element_items(ElementObject* self, PyObject* args)
1168{
1169 if (!PyArg_ParseTuple(args, ":items"))
1170 return NULL;
1171
1172 if (!self->extra || self->extra->attrib == Py_None)
1173 return PyList_New(0);
1174
1175 return PyDict_Items(self->extra->attrib);
1176}
1177
1178static PyObject*
1179element_keys(ElementObject* self, PyObject* args)
1180{
1181 if (!PyArg_ParseTuple(args, ":keys"))
1182 return NULL;
1183
1184 if (!self->extra || self->extra->attrib == Py_None)
1185 return PyList_New(0);
1186
1187 return PyDict_Keys(self->extra->attrib);
1188}
1189
Martin v. Löwis18e16552006-02-15 17:27:45 +00001190static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191element_length(ElementObject* self)
1192{
1193 if (!self->extra)
1194 return 0;
1195
1196 return self->extra->length;
1197}
1198
1199static PyObject*
1200element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1201{
1202 PyObject* elem;
1203
1204 PyObject* tag;
1205 PyObject* attrib;
1206 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1207 return NULL;
1208
1209 attrib = PyDict_Copy(attrib);
1210 if (!attrib)
1211 return NULL;
1212
Eli Bendersky092af1f2012-03-04 07:14:03 +02001213 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214
1215 Py_DECREF(attrib);
1216
1217 return elem;
1218}
1219
1220static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221element_remove(ElementObject* self, PyObject* args)
1222{
1223 int i;
1224
1225 PyObject* element;
1226 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1227 return NULL;
1228
1229 if (!self->extra) {
1230 /* element has no children, so raise exception */
1231 PyErr_SetString(
1232 PyExc_ValueError,
1233 "list.remove(x): x not in list"
1234 );
1235 return NULL;
1236 }
1237
1238 for (i = 0; i < self->extra->length; i++) {
1239 if (self->extra->children[i] == element)
1240 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001241 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242 break;
1243 }
1244
1245 if (i == self->extra->length) {
1246 /* element is not in children, so raise exception */
1247 PyErr_SetString(
1248 PyExc_ValueError,
1249 "list.remove(x): x not in list"
1250 );
1251 return NULL;
1252 }
1253
1254 Py_DECREF(self->extra->children[i]);
1255
1256 self->extra->length--;
1257
1258 for (; i < self->extra->length; i++)
1259 self->extra->children[i] = self->extra->children[i+1];
1260
1261 Py_RETURN_NONE;
1262}
1263
1264static PyObject*
1265element_repr(ElementObject* self)
1266{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001267 if (self->tag)
1268 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1269 else
1270 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271}
1272
1273static PyObject*
1274element_set(ElementObject* self, PyObject* args)
1275{
1276 PyObject* attrib;
1277
1278 PyObject* key;
1279 PyObject* value;
1280 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1281 return NULL;
1282
1283 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001284 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001285
1286 attrib = element_get_attrib(self);
1287 if (!attrib)
1288 return NULL;
1289
1290 if (PyDict_SetItem(attrib, key, value) < 0)
1291 return NULL;
1292
1293 Py_RETURN_NONE;
1294}
1295
1296static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001297element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001299 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300 int i;
1301 PyObject* old;
1302
1303 if (!self->extra || index < 0 || index >= self->extra->length) {
1304 PyErr_SetString(
1305 PyExc_IndexError,
1306 "child assignment index out of range");
1307 return -1;
1308 }
1309
1310 old = self->extra->children[index];
1311
1312 if (item) {
1313 Py_INCREF(item);
1314 self->extra->children[index] = item;
1315 } else {
1316 self->extra->length--;
1317 for (i = index; i < self->extra->length; i++)
1318 self->extra->children[i] = self->extra->children[i+1];
1319 }
1320
1321 Py_DECREF(old);
1322
1323 return 0;
1324}
1325
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326static PyObject*
1327element_subscr(PyObject* self_, PyObject* item)
1328{
1329 ElementObject* self = (ElementObject*) self_;
1330
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001331 if (PyIndex_Check(item)) {
1332 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001333
1334 if (i == -1 && PyErr_Occurred()) {
1335 return NULL;
1336 }
1337 if (i < 0 && self->extra)
1338 i += self->extra->length;
1339 return element_getitem(self_, i);
1340 }
1341 else if (PySlice_Check(item)) {
1342 Py_ssize_t start, stop, step, slicelen, cur, i;
1343 PyObject* list;
1344
1345 if (!self->extra)
1346 return PyList_New(0);
1347
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001348 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001349 self->extra->length,
1350 &start, &stop, &step, &slicelen) < 0) {
1351 return NULL;
1352 }
1353
1354 if (slicelen <= 0)
1355 return PyList_New(0);
1356 else {
1357 list = PyList_New(slicelen);
1358 if (!list)
1359 return NULL;
1360
1361 for (cur = start, i = 0; i < slicelen;
1362 cur += step, i++) {
1363 PyObject* item = self->extra->children[cur];
1364 Py_INCREF(item);
1365 PyList_SET_ITEM(list, i, item);
1366 }
1367
1368 return list;
1369 }
1370 }
1371 else {
1372 PyErr_SetString(PyExc_TypeError,
1373 "element indices must be integers");
1374 return NULL;
1375 }
1376}
1377
1378static int
1379element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1380{
1381 ElementObject* self = (ElementObject*) self_;
1382
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001383 if (PyIndex_Check(item)) {
1384 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001385
1386 if (i == -1 && PyErr_Occurred()) {
1387 return -1;
1388 }
1389 if (i < 0 && self->extra)
1390 i += self->extra->length;
1391 return element_setitem(self_, i, value);
1392 }
1393 else if (PySlice_Check(item)) {
1394 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1395
1396 PyObject* recycle = NULL;
1397 PyObject* seq = NULL;
1398
1399 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001400 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001401
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001402 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001403 self->extra->length,
1404 &start, &stop, &step, &slicelen) < 0) {
1405 return -1;
1406 }
1407
Eli Bendersky865756a2012-03-09 13:38:15 +02001408 if (value == NULL) {
1409 /* Delete slice */
1410 size_t cur;
1411 Py_ssize_t i;
1412
1413 if (slicelen <= 0)
1414 return 0;
1415
1416 /* Since we're deleting, the direction of the range doesn't matter,
1417 * so for simplicity make it always ascending.
1418 */
1419 if (step < 0) {
1420 stop = start + 1;
1421 start = stop + step * (slicelen - 1) - 1;
1422 step = -step;
1423 }
1424
1425 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1426
1427 /* recycle is a list that will contain all the children
1428 * scheduled for removal.
1429 */
1430 if (!(recycle = PyList_New(slicelen))) {
1431 PyErr_NoMemory();
1432 return -1;
1433 }
1434
1435 /* This loop walks over all the children that have to be deleted,
1436 * with cur pointing at them. num_moved is the amount of children
1437 * until the next deleted child that have to be "shifted down" to
1438 * occupy the deleted's places.
1439 * Note that in the ith iteration, shifting is done i+i places down
1440 * because i children were already removed.
1441 */
1442 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1443 /* Compute how many children have to be moved, clipping at the
1444 * list end.
1445 */
1446 Py_ssize_t num_moved = step - 1;
1447 if (cur + step >= (size_t)self->extra->length) {
1448 num_moved = self->extra->length - cur - 1;
1449 }
1450
1451 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1452
1453 memmove(
1454 self->extra->children + cur - i,
1455 self->extra->children + cur + 1,
1456 num_moved * sizeof(PyObject *));
1457 }
1458
1459 /* Leftover "tail" after the last removed child */
1460 cur = start + (size_t)slicelen * step;
1461 if (cur < (size_t)self->extra->length) {
1462 memmove(
1463 self->extra->children + cur - slicelen,
1464 self->extra->children + cur,
1465 (self->extra->length - cur) * sizeof(PyObject *));
1466 }
1467
1468 self->extra->length -= slicelen;
1469
1470 /* Discard the recycle list with all the deleted sub-elements */
1471 Py_XDECREF(recycle);
1472 return 0;
1473 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001474 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001475 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001476 seq = PySequence_Fast(value, "");
1477 if (!seq) {
1478 PyErr_Format(
1479 PyExc_TypeError,
1480 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1481 );
1482 return -1;
1483 }
1484 newlen = PySequence_Size(seq);
1485 }
1486
1487 if (step != 1 && newlen != slicelen)
1488 {
1489 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001490 "attempt to assign sequence of size %zd "
1491 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492 newlen, slicelen
1493 );
1494 return -1;
1495 }
1496
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001497 /* Resize before creating the recycle bin, to prevent refleaks. */
1498 if (newlen > slicelen) {
1499 if (element_resize(self, newlen - slicelen) < 0) {
1500 if (seq) {
1501 Py_DECREF(seq);
1502 }
1503 return -1;
1504 }
1505 }
1506
1507 if (slicelen > 0) {
1508 /* to avoid recursive calls to this method (via decref), move
1509 old items to the recycle bin here, and get rid of them when
1510 we're done modifying the element */
1511 recycle = PyList_New(slicelen);
1512 if (!recycle) {
1513 if (seq) {
1514 Py_DECREF(seq);
1515 }
1516 return -1;
1517 }
1518 for (cur = start, i = 0; i < slicelen;
1519 cur += step, i++)
1520 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1521 }
1522
1523 if (newlen < slicelen) {
1524 /* delete slice */
1525 for (i = stop; i < self->extra->length; i++)
1526 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1527 } else if (newlen > slicelen) {
1528 /* insert slice */
1529 for (i = self->extra->length-1; i >= stop; i--)
1530 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1531 }
1532
1533 /* replace the slice */
1534 for (cur = start, i = 0; i < newlen;
1535 cur += step, i++) {
1536 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1537 Py_INCREF(element);
1538 self->extra->children[cur] = element;
1539 }
1540
1541 self->extra->length += newlen - slicelen;
1542
1543 if (seq) {
1544 Py_DECREF(seq);
1545 }
1546
1547 /* discard the recycle bin, and everything in it */
1548 Py_XDECREF(recycle);
1549
1550 return 0;
1551 }
1552 else {
1553 PyErr_SetString(PyExc_TypeError,
1554 "element indices must be integers");
1555 return -1;
1556 }
1557}
1558
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559static PyMethodDef element_methods[] = {
1560
Eli Bendersky0192ba32012-03-30 16:38:33 +03001561 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001562
Eli Benderskya8736902013-01-05 06:26:39 -08001563 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564 {"set", (PyCFunction) element_set, METH_VARARGS},
1565
Eli Bendersky737b1732012-05-29 06:02:56 +03001566 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1567 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1568 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569
1570 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001571 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1573 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1574
Eli Benderskya8736902013-01-05 06:26:39 -08001575 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001576 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001577 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001578
Eli Benderskya8736902013-01-05 06:26:39 -08001579 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001580 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1581
1582 {"items", (PyCFunction) element_items, METH_VARARGS},
1583 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1584
1585 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1586
1587 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1588 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001589 {"__sizeof__", element_sizeof, METH_NOARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591 {NULL, NULL}
1592};
1593
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001594static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001595element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596{
1597 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001598 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001599
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001600 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001601 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001602
Alexander Belopolskye239d232010-12-08 23:31:48 +00001603 if (name == NULL)
1604 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001606 /* handle common attributes first */
1607 if (strcmp(name, "tag") == 0) {
1608 res = self->tag;
1609 Py_INCREF(res);
1610 return res;
1611 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001613 Py_INCREF(res);
1614 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 }
1616
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001617 /* methods */
1618 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1619 if (res)
1620 return res;
1621
1622 /* less common attributes */
1623 if (strcmp(name, "tail") == 0) {
1624 PyErr_Clear();
1625 res = element_get_tail(self);
1626 } else if (strcmp(name, "attrib") == 0) {
1627 PyErr_Clear();
1628 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001629 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001630 res = element_get_attrib(self);
1631 }
1632
1633 if (!res)
1634 return NULL;
1635
1636 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637 return res;
1638}
1639
Eli Benderskyb20df952012-05-20 06:33:29 +03001640static PyObject*
1641element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642{
Eli Benderskyb20df952012-05-20 06:33:29 +03001643 char *name = "";
1644 if (PyUnicode_Check(nameobj))
1645 name = _PyUnicode_AsString(nameobj);
1646
1647 if (name == NULL)
1648 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649
1650 if (strcmp(name, "tag") == 0) {
1651 Py_DECREF(self->tag);
1652 self->tag = value;
1653 Py_INCREF(self->tag);
1654 } else if (strcmp(name, "text") == 0) {
1655 Py_DECREF(JOIN_OBJ(self->text));
1656 self->text = value;
1657 Py_INCREF(self->text);
1658 } else if (strcmp(name, "tail") == 0) {
1659 Py_DECREF(JOIN_OBJ(self->tail));
1660 self->tail = value;
1661 Py_INCREF(self->tail);
1662 } else if (strcmp(name, "attrib") == 0) {
1663 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001664 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665 Py_DECREF(self->extra->attrib);
1666 self->extra->attrib = value;
1667 Py_INCREF(self->extra->attrib);
1668 } else {
1669 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001670 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671 }
1672
Eli Benderskyb20df952012-05-20 06:33:29 +03001673 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001674}
1675
1676static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001677 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678 0, /* sq_concat */
1679 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001680 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001681 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001682 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 0,
1684};
1685
1686static PyMappingMethods element_as_mapping = {
1687 (lenfunc) element_length,
1688 (binaryfunc) element_subscr,
1689 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001690};
1691
Neal Norwitz227b5332006-03-22 09:28:35 +00001692static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001693 PyVarObject_HEAD_INIT(NULL, 0)
1694 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001696 (destructor)element_dealloc, /* tp_dealloc */
1697 0, /* tp_print */
1698 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001699 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001700 0, /* tp_reserved */
1701 (reprfunc)element_repr, /* tp_repr */
1702 0, /* tp_as_number */
1703 &element_as_sequence, /* tp_as_sequence */
1704 &element_as_mapping, /* tp_as_mapping */
1705 0, /* tp_hash */
1706 0, /* tp_call */
1707 0, /* tp_str */
1708 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001709 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001710 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001711 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1712 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001713 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001714 (traverseproc)element_gc_traverse, /* tp_traverse */
1715 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001716 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001717 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001718 0, /* tp_iter */
1719 0, /* tp_iternext */
1720 element_methods, /* tp_methods */
1721 0, /* tp_members */
1722 0, /* tp_getset */
1723 0, /* tp_base */
1724 0, /* tp_dict */
1725 0, /* tp_descr_get */
1726 0, /* tp_descr_set */
1727 0, /* tp_dictoffset */
1728 (initproc)element_init, /* tp_init */
1729 PyType_GenericAlloc, /* tp_alloc */
1730 element_new, /* tp_new */
1731 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001732};
1733
Eli Bendersky64d11e62012-06-15 07:42:50 +03001734/******************************* Element iterator ****************************/
1735
1736/* ElementIterObject represents the iteration state over an XML element in
1737 * pre-order traversal. To keep track of which sub-element should be returned
1738 * next, a stack of parents is maintained. This is a standard stack-based
1739 * iterative pre-order traversal of a tree.
1740 * The stack is managed using a single-linked list starting at parent_stack.
1741 * Each stack node contains the saved parent to which we should return after
1742 * the current one is exhausted, and the next child to examine in that parent.
1743 */
1744typedef struct ParentLocator_t {
1745 ElementObject *parent;
1746 Py_ssize_t child_index;
1747 struct ParentLocator_t *next;
1748} ParentLocator;
1749
1750typedef struct {
1751 PyObject_HEAD
1752 ParentLocator *parent_stack;
1753 ElementObject *root_element;
1754 PyObject *sought_tag;
1755 int root_done;
1756 int gettext;
1757} ElementIterObject;
1758
1759
1760static void
1761elementiter_dealloc(ElementIterObject *it)
1762{
1763 ParentLocator *p = it->parent_stack;
1764 while (p) {
1765 ParentLocator *temp = p;
1766 Py_XDECREF(p->parent);
1767 p = p->next;
1768 PyObject_Free(temp);
1769 }
1770
1771 Py_XDECREF(it->sought_tag);
1772 Py_XDECREF(it->root_element);
1773
1774 PyObject_GC_UnTrack(it);
1775 PyObject_GC_Del(it);
1776}
1777
1778static int
1779elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1780{
1781 ParentLocator *p = it->parent_stack;
1782 while (p) {
1783 Py_VISIT(p->parent);
1784 p = p->next;
1785 }
1786
1787 Py_VISIT(it->root_element);
1788 Py_VISIT(it->sought_tag);
1789 return 0;
1790}
1791
1792/* Helper function for elementiter_next. Add a new parent to the parent stack.
1793 */
1794static ParentLocator *
1795parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1796{
1797 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1798 if (new_node) {
1799 new_node->parent = parent;
1800 Py_INCREF(parent);
1801 new_node->child_index = 0;
1802 new_node->next = stack;
1803 }
1804 return new_node;
1805}
1806
1807static PyObject *
1808elementiter_next(ElementIterObject *it)
1809{
1810 /* Sub-element iterator.
1811 *
1812 * A short note on gettext: this function serves both the iter() and
1813 * itertext() methods to avoid code duplication. However, there are a few
1814 * small differences in the way these iterations work. Namely:
1815 * - itertext() only yields text from nodes that have it, and continues
1816 * iterating when a node doesn't have text (so it doesn't return any
1817 * node like iter())
1818 * - itertext() also has to handle tail, after finishing with all the
1819 * children of a node.
1820 */
Eli Bendersky113da642012-06-15 07:52:49 +03001821 ElementObject *cur_parent;
1822 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001823
1824 while (1) {
1825 /* Handle the case reached in the beginning and end of iteration, where
1826 * the parent stack is empty. The root_done flag gives us indication
1827 * whether we've just started iterating (so root_done is 0), in which
1828 * case the root is returned. If root_done is 1 and we're here, the
1829 * iterator is exhausted.
1830 */
1831 if (!it->parent_stack->parent) {
1832 if (it->root_done) {
1833 PyErr_SetNone(PyExc_StopIteration);
1834 return NULL;
1835 } else {
1836 it->parent_stack = parent_stack_push_new(it->parent_stack,
1837 it->root_element);
1838 if (!it->parent_stack) {
1839 PyErr_NoMemory();
1840 return NULL;
1841 }
1842
1843 it->root_done = 1;
1844 if (it->sought_tag == Py_None ||
1845 PyObject_RichCompareBool(it->root_element->tag,
1846 it->sought_tag, Py_EQ) == 1) {
1847 if (it->gettext) {
1848 PyObject *text = JOIN_OBJ(it->root_element->text);
1849 if (PyObject_IsTrue(text)) {
1850 Py_INCREF(text);
1851 return text;
1852 }
1853 } else {
1854 Py_INCREF(it->root_element);
1855 return (PyObject *)it->root_element;
1856 }
1857 }
1858 }
1859 }
1860
1861 /* See if there are children left to traverse in the current parent. If
1862 * yes, visit the next child. If not, pop the stack and try again.
1863 */
Eli Bendersky113da642012-06-15 07:52:49 +03001864 cur_parent = it->parent_stack->parent;
1865 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001866 if (cur_parent->extra && child_index < cur_parent->extra->length) {
1867 ElementObject *child = (ElementObject *)
1868 cur_parent->extra->children[child_index];
1869 it->parent_stack->child_index++;
1870 it->parent_stack = parent_stack_push_new(it->parent_stack,
1871 child);
1872 if (!it->parent_stack) {
1873 PyErr_NoMemory();
1874 return NULL;
1875 }
1876
1877 if (it->gettext) {
1878 PyObject *text = JOIN_OBJ(child->text);
1879 if (PyObject_IsTrue(text)) {
1880 Py_INCREF(text);
1881 return text;
1882 }
1883 } else if (it->sought_tag == Py_None ||
1884 PyObject_RichCompareBool(child->tag,
1885 it->sought_tag, Py_EQ) == 1) {
1886 Py_INCREF(child);
1887 return (PyObject *)child;
1888 }
1889 else
1890 continue;
1891 }
1892 else {
1893 PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
1894 ParentLocator *next = it->parent_stack->next;
1895 Py_XDECREF(it->parent_stack->parent);
1896 PyObject_Free(it->parent_stack);
1897 it->parent_stack = next;
1898
1899 /* Note that extra condition on it->parent_stack->parent here;
1900 * this is because itertext() is supposed to only return *inner*
1901 * text, not text following the element it began iteration with.
1902 */
1903 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
1904 Py_INCREF(tail);
1905 return tail;
1906 }
1907 }
1908 }
1909
1910 return NULL;
1911}
1912
1913
1914static PyTypeObject ElementIter_Type = {
1915 PyVarObject_HEAD_INIT(NULL, 0)
1916 "_elementtree._element_iterator", /* tp_name */
1917 sizeof(ElementIterObject), /* tp_basicsize */
1918 0, /* tp_itemsize */
1919 /* methods */
1920 (destructor)elementiter_dealloc, /* tp_dealloc */
1921 0, /* tp_print */
1922 0, /* tp_getattr */
1923 0, /* tp_setattr */
1924 0, /* tp_reserved */
1925 0, /* tp_repr */
1926 0, /* tp_as_number */
1927 0, /* tp_as_sequence */
1928 0, /* tp_as_mapping */
1929 0, /* tp_hash */
1930 0, /* tp_call */
1931 0, /* tp_str */
1932 0, /* tp_getattro */
1933 0, /* tp_setattro */
1934 0, /* tp_as_buffer */
1935 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1936 0, /* tp_doc */
1937 (traverseproc)elementiter_traverse, /* tp_traverse */
1938 0, /* tp_clear */
1939 0, /* tp_richcompare */
1940 0, /* tp_weaklistoffset */
1941 PyObject_SelfIter, /* tp_iter */
1942 (iternextfunc)elementiter_next, /* tp_iternext */
1943 0, /* tp_methods */
1944 0, /* tp_members */
1945 0, /* tp_getset */
1946 0, /* tp_base */
1947 0, /* tp_dict */
1948 0, /* tp_descr_get */
1949 0, /* tp_descr_set */
1950 0, /* tp_dictoffset */
1951 0, /* tp_init */
1952 0, /* tp_alloc */
1953 0, /* tp_new */
1954};
1955
1956
1957static PyObject *
1958create_elementiter(ElementObject *self, PyObject *tag, int gettext)
1959{
1960 ElementIterObject *it;
1961 PyObject *star = NULL;
1962
1963 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
1964 if (!it)
1965 return NULL;
1966 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
1967 PyObject_GC_Del(it);
1968 return NULL;
1969 }
1970
1971 it->parent_stack->parent = NULL;
1972 it->parent_stack->child_index = 0;
1973 it->parent_stack->next = NULL;
1974
1975 if (PyUnicode_Check(tag))
1976 star = PyUnicode_FromString("*");
1977 else if (PyBytes_Check(tag))
1978 star = PyBytes_FromString("*");
1979
1980 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
1981 tag = Py_None;
1982
1983 Py_XDECREF(star);
1984 it->sought_tag = tag;
1985 it->root_done = 0;
1986 it->gettext = gettext;
1987 it->root_element = self;
1988
1989 Py_INCREF(self);
1990 Py_INCREF(tag);
1991
1992 PyObject_GC_Track(it);
1993 return (PyObject *)it;
1994}
1995
1996
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001997/* ==================================================================== */
1998/* the tree builder type */
1999
2000typedef struct {
2001 PyObject_HEAD
2002
Eli Bendersky58d548d2012-05-29 15:45:16 +03002003 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002004
Antoine Pitrouee329312012-10-04 19:53:29 +02002005 PyObject *this; /* current node */
2006 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002007
Eli Bendersky58d548d2012-05-29 15:45:16 +03002008 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002009
Eli Bendersky58d548d2012-05-29 15:45:16 +03002010 PyObject *stack; /* element stack */
2011 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002012
Eli Bendersky48d358b2012-05-30 17:57:50 +03002013 PyObject *element_factory;
2014
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002015 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002016 PyObject *events; /* list of events, or NULL if not collecting */
2017 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2018 PyObject *end_event_obj;
2019 PyObject *start_ns_event_obj;
2020 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002021} TreeBuilderObject;
2022
Neal Norwitz227b5332006-03-22 09:28:35 +00002023static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002024
Christian Heimes90aa7642007-12-19 02:45:37 +00002025#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002026
2027/* -------------------------------------------------------------------- */
2028/* constructor and destructor */
2029
Eli Bendersky58d548d2012-05-29 15:45:16 +03002030static PyObject *
2031treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002032{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002033 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2034 if (t != NULL) {
2035 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036
Eli Bendersky58d548d2012-05-29 15:45:16 +03002037 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002038 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002039 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002040 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002041
Eli Bendersky58d548d2012-05-29 15:45:16 +03002042 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002043 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002044 t->stack = PyList_New(20);
2045 if (!t->stack) {
2046 Py_DECREF(t->this);
2047 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002048 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002049 return NULL;
2050 }
2051 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002052
Eli Bendersky58d548d2012-05-29 15:45:16 +03002053 t->events = NULL;
2054 t->start_event_obj = t->end_event_obj = NULL;
2055 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2056 }
2057 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002058}
2059
Eli Bendersky58d548d2012-05-29 15:45:16 +03002060static int
2061treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002062{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002063 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002064 PyObject *element_factory = NULL;
2065 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002066 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002067
2068 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2069 &element_factory)) {
2070 return -1;
2071 }
2072
2073 if (element_factory) {
2074 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002075 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002076 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002077 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002078 }
2079
Eli Bendersky58d548d2012-05-29 15:45:16 +03002080 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002081}
2082
Eli Bendersky48d358b2012-05-30 17:57:50 +03002083static int
2084treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2085{
2086 Py_VISIT(self->root);
2087 Py_VISIT(self->this);
2088 Py_VISIT(self->last);
2089 Py_VISIT(self->data);
2090 Py_VISIT(self->stack);
2091 Py_VISIT(self->element_factory);
2092 return 0;
2093}
2094
2095static int
2096treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002097{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002098 Py_CLEAR(self->end_ns_event_obj);
2099 Py_CLEAR(self->start_ns_event_obj);
2100 Py_CLEAR(self->end_event_obj);
2101 Py_CLEAR(self->start_event_obj);
2102 Py_CLEAR(self->events);
2103 Py_CLEAR(self->stack);
2104 Py_CLEAR(self->data);
2105 Py_CLEAR(self->last);
2106 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002107 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002108 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002109 return 0;
2110}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002111
Eli Bendersky48d358b2012-05-30 17:57:50 +03002112static void
2113treebuilder_dealloc(TreeBuilderObject *self)
2114{
2115 PyObject_GC_UnTrack(self);
2116 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002117 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002118}
2119
2120/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002121/* helpers for handling of arbitrary element-like objects */
2122
2123static int
2124treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2125 PyObject **dest, _Py_Identifier *name)
2126{
2127 if (Element_CheckExact(element)) {
2128 Py_DECREF(JOIN_OBJ(*dest));
2129 *dest = JOIN_SET(data, PyList_CheckExact(data));
2130 return 0;
2131 }
2132 else {
2133 PyObject *joined = list_join(data);
2134 int r;
2135 if (joined == NULL)
2136 return -1;
2137 r = _PyObject_SetAttrId(element, name, joined);
2138 Py_DECREF(joined);
2139 return r;
2140 }
2141}
2142
2143/* These two functions steal a reference to data */
2144static int
2145treebuilder_set_element_text(PyObject *element, PyObject *data)
2146{
2147 _Py_IDENTIFIER(text);
2148 return treebuilder_set_element_text_or_tail(
2149 element, data, &((ElementObject *) element)->text, &PyId_text);
2150}
2151
2152static int
2153treebuilder_set_element_tail(PyObject *element, PyObject *data)
2154{
2155 _Py_IDENTIFIER(tail);
2156 return treebuilder_set_element_text_or_tail(
2157 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2158}
2159
2160static int
2161treebuilder_add_subelement(PyObject *element, PyObject *child)
2162{
2163 _Py_IDENTIFIER(append);
2164 if (Element_CheckExact(element)) {
2165 ElementObject *elem = (ElementObject *) element;
2166 return element_add_subelement(elem, child);
2167 }
2168 else {
2169 PyObject *res;
2170 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2171 if (res == NULL)
2172 return -1;
2173 Py_DECREF(res);
2174 return 0;
2175 }
2176}
2177
2178/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002179/* handlers */
2180
2181LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002182treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2183 PyObject* attrib)
2184{
2185 PyObject* node;
2186 PyObject* this;
2187
2188 if (self->data) {
2189 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002190 if (treebuilder_set_element_text(self->last, self->data))
2191 return NULL;
2192 }
2193 else {
2194 if (treebuilder_set_element_tail(self->last, self->data))
2195 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196 }
2197 self->data = NULL;
2198 }
2199
Eli Bendersky48d358b2012-05-30 17:57:50 +03002200 if (self->element_factory) {
2201 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2202 } else {
2203 node = create_new_element(tag, attrib);
2204 }
2205 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002207 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002208
Antoine Pitrouee329312012-10-04 19:53:29 +02002209 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002210
2211 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002212 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002213 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002214 } else {
2215 if (self->root) {
2216 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002217 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002218 "multiple elements on top level"
2219 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002220 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002221 }
2222 Py_INCREF(node);
2223 self->root = node;
2224 }
2225
2226 if (self->index < PyList_GET_SIZE(self->stack)) {
2227 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002228 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002229 Py_INCREF(this);
2230 } else {
2231 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002232 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002233 }
2234 self->index++;
2235
2236 Py_DECREF(this);
2237 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002238 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239
2240 Py_DECREF(self->last);
2241 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002242 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002243
2244 if (self->start_event_obj) {
2245 PyObject* res;
2246 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002247 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002249 PyList_Append(self->events, res);
2250 Py_DECREF(res);
2251 } else
2252 PyErr_Clear(); /* FIXME: propagate error */
2253 }
2254
2255 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002256
2257 error:
2258 Py_DECREF(node);
2259 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002260}
2261
2262LOCAL(PyObject*)
2263treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2264{
2265 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002266 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002267 /* ignore calls to data before the first call to start */
2268 Py_RETURN_NONE;
2269 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002270 /* store the first item as is */
2271 Py_INCREF(data); self->data = data;
2272 } else {
2273 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002274 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2275 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002276 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277 /* expat often generates single character data sections; handle
2278 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002279 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2280 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002281 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002282 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283 } else if (PyList_CheckExact(self->data)) {
2284 if (PyList_Append(self->data, data) < 0)
2285 return NULL;
2286 } else {
2287 PyObject* list = PyList_New(2);
2288 if (!list)
2289 return NULL;
2290 PyList_SET_ITEM(list, 0, self->data);
2291 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2292 self->data = list;
2293 }
2294 }
2295
2296 Py_RETURN_NONE;
2297}
2298
2299LOCAL(PyObject*)
2300treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2301{
2302 PyObject* item;
2303
2304 if (self->data) {
2305 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002306 if (treebuilder_set_element_text(self->last, self->data))
2307 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002308 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002309 if (treebuilder_set_element_tail(self->last, self->data))
2310 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002311 }
2312 self->data = NULL;
2313 }
2314
2315 if (self->index == 0) {
2316 PyErr_SetString(
2317 PyExc_IndexError,
2318 "pop from empty stack"
2319 );
2320 return NULL;
2321 }
2322
2323 self->index--;
2324
2325 item = PyList_GET_ITEM(self->stack, self->index);
2326 Py_INCREF(item);
2327
2328 Py_DECREF(self->last);
2329
Antoine Pitrouee329312012-10-04 19:53:29 +02002330 self->last = self->this;
2331 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002332
2333 if (self->end_event_obj) {
2334 PyObject* res;
2335 PyObject* action = self->end_event_obj;
2336 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002337 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339 PyList_Append(self->events, res);
2340 Py_DECREF(res);
2341 } else
2342 PyErr_Clear(); /* FIXME: propagate error */
2343 }
2344
2345 Py_INCREF(self->last);
2346 return (PyObject*) self->last;
2347}
2348
2349LOCAL(void)
2350treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002351 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352{
2353 PyObject* res;
2354 PyObject* action;
2355 PyObject* parcel;
2356
2357 if (!self->events)
2358 return;
2359
2360 if (start) {
2361 if (!self->start_ns_event_obj)
2362 return;
2363 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002364 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365 if (!parcel)
2366 return;
2367 Py_INCREF(action);
2368 } else {
2369 if (!self->end_ns_event_obj)
2370 return;
2371 action = self->end_ns_event_obj;
2372 Py_INCREF(action);
2373 parcel = Py_None;
2374 Py_INCREF(parcel);
2375 }
2376
2377 res = PyTuple_New(2);
2378
2379 if (res) {
2380 PyTuple_SET_ITEM(res, 0, action);
2381 PyTuple_SET_ITEM(res, 1, parcel);
2382 PyList_Append(self->events, res);
2383 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002384 }
2385 else {
2386 Py_DECREF(action);
2387 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002389 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390}
2391
2392/* -------------------------------------------------------------------- */
2393/* methods (in alphabetical order) */
2394
2395static PyObject*
2396treebuilder_data(TreeBuilderObject* self, PyObject* args)
2397{
2398 PyObject* data;
2399 if (!PyArg_ParseTuple(args, "O:data", &data))
2400 return NULL;
2401
2402 return treebuilder_handle_data(self, data);
2403}
2404
2405static PyObject*
2406treebuilder_end(TreeBuilderObject* self, PyObject* args)
2407{
2408 PyObject* tag;
2409 if (!PyArg_ParseTuple(args, "O:end", &tag))
2410 return NULL;
2411
2412 return treebuilder_handle_end(self, tag);
2413}
2414
2415LOCAL(PyObject*)
2416treebuilder_done(TreeBuilderObject* self)
2417{
2418 PyObject* res;
2419
2420 /* FIXME: check stack size? */
2421
2422 if (self->root)
2423 res = self->root;
2424 else
2425 res = Py_None;
2426
2427 Py_INCREF(res);
2428 return res;
2429}
2430
2431static PyObject*
2432treebuilder_close(TreeBuilderObject* self, PyObject* args)
2433{
2434 if (!PyArg_ParseTuple(args, ":close"))
2435 return NULL;
2436
2437 return treebuilder_done(self);
2438}
2439
2440static PyObject*
2441treebuilder_start(TreeBuilderObject* self, PyObject* args)
2442{
2443 PyObject* tag;
2444 PyObject* attrib = Py_None;
2445 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2446 return NULL;
2447
2448 return treebuilder_handle_start(self, tag, attrib);
2449}
2450
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002451static PyMethodDef treebuilder_methods[] = {
2452 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2453 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2454 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2456 {NULL, NULL}
2457};
2458
Neal Norwitz227b5332006-03-22 09:28:35 +00002459static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002460 PyVarObject_HEAD_INIT(NULL, 0)
2461 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002463 (destructor)treebuilder_dealloc, /* tp_dealloc */
2464 0, /* tp_print */
2465 0, /* tp_getattr */
2466 0, /* tp_setattr */
2467 0, /* tp_reserved */
2468 0, /* tp_repr */
2469 0, /* tp_as_number */
2470 0, /* tp_as_sequence */
2471 0, /* tp_as_mapping */
2472 0, /* tp_hash */
2473 0, /* tp_call */
2474 0, /* tp_str */
2475 0, /* tp_getattro */
2476 0, /* tp_setattro */
2477 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002478 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2479 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002480 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002481 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2482 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002483 0, /* tp_richcompare */
2484 0, /* tp_weaklistoffset */
2485 0, /* tp_iter */
2486 0, /* tp_iternext */
2487 treebuilder_methods, /* tp_methods */
2488 0, /* tp_members */
2489 0, /* tp_getset */
2490 0, /* tp_base */
2491 0, /* tp_dict */
2492 0, /* tp_descr_get */
2493 0, /* tp_descr_set */
2494 0, /* tp_dictoffset */
2495 (initproc)treebuilder_init, /* tp_init */
2496 PyType_GenericAlloc, /* tp_alloc */
2497 treebuilder_new, /* tp_new */
2498 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499};
2500
2501/* ==================================================================== */
2502/* the expat interface */
2503
2504#if defined(USE_EXPAT)
2505
2506#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002508static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510
Eli Bendersky52467b12012-06-01 07:13:08 +03002511static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2512 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2513
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514typedef struct {
2515 PyObject_HEAD
2516
2517 XML_Parser parser;
2518
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002519 PyObject *target;
2520 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002522 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002524 PyObject *handle_start;
2525 PyObject *handle_data;
2526 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002528 PyObject *handle_comment;
2529 PyObject *handle_pi;
2530 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002532 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002533
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534} XMLParserObject;
2535
Neal Norwitz227b5332006-03-22 09:28:35 +00002536static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002538#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2539
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540/* helpers */
2541
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002542LOCAL(PyObject*)
2543makeuniversal(XMLParserObject* self, const char* string)
2544{
2545 /* convert a UTF-8 tag/attribute name from the expat parser
2546 to a universal name string */
2547
Antoine Pitrouc1948842012-10-01 23:40:37 +02002548 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549 PyObject* key;
2550 PyObject* value;
2551
2552 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002553 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554 if (!key)
2555 return NULL;
2556
2557 value = PyDict_GetItem(self->names, key);
2558
2559 if (value) {
2560 Py_INCREF(value);
2561 } else {
2562 /* new name. convert to universal name, and decode as
2563 necessary */
2564
2565 PyObject* tag;
2566 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002567 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568
2569 /* look for namespace separator */
2570 for (i = 0; i < size; i++)
2571 if (string[i] == '}')
2572 break;
2573 if (i != size) {
2574 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002575 tag = PyBytes_FromStringAndSize(NULL, size+1);
2576 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577 p[0] = '{';
2578 memcpy(p+1, string, size);
2579 size++;
2580 } else {
2581 /* plain name; use key as tag */
2582 Py_INCREF(key);
2583 tag = key;
2584 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002585
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002587 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002588 value = PyUnicode_DecodeUTF8(p, size, "strict");
2589 Py_DECREF(tag);
2590 if (!value) {
2591 Py_DECREF(key);
2592 return NULL;
2593 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594
2595 /* add to names dictionary */
2596 if (PyDict_SetItem(self->names, key, value) < 0) {
2597 Py_DECREF(key);
2598 Py_DECREF(value);
2599 return NULL;
2600 }
2601 }
2602
2603 Py_DECREF(key);
2604 return value;
2605}
2606
Eli Bendersky5b77d812012-03-16 08:20:05 +02002607/* Set the ParseError exception with the given parameters.
2608 * If message is not NULL, it's used as the error string. Otherwise, the
2609 * message string is the default for the given error_code.
2610*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002611static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002612expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002613{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002614 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002615
Victor Stinner499dfcf2011-03-21 13:26:24 +01002616 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002617 message ? message : EXPAT(ErrorString)(error_code),
2618 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002619 if (errmsg == NULL)
2620 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002621
Victor Stinner499dfcf2011-03-21 13:26:24 +01002622 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2623 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002624 if (!error)
2625 return;
2626
Eli Bendersky5b77d812012-03-16 08:20:05 +02002627 /* Add code and position attributes */
2628 code = PyLong_FromLong((long)error_code);
2629 if (!code) {
2630 Py_DECREF(error);
2631 return;
2632 }
2633 if (PyObject_SetAttrString(error, "code", code) == -1) {
2634 Py_DECREF(error);
2635 Py_DECREF(code);
2636 return;
2637 }
2638 Py_DECREF(code);
2639
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002640 position = Py_BuildValue("(ii)", line, column);
2641 if (!position) {
2642 Py_DECREF(error);
2643 return;
2644 }
2645 if (PyObject_SetAttrString(error, "position", position) == -1) {
2646 Py_DECREF(error);
2647 Py_DECREF(position);
2648 return;
2649 }
2650 Py_DECREF(position);
2651
2652 PyErr_SetObject(elementtree_parseerror_obj, error);
2653 Py_DECREF(error);
2654}
2655
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656/* -------------------------------------------------------------------- */
2657/* handlers */
2658
2659static void
2660expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2661 int data_len)
2662{
2663 PyObject* key;
2664 PyObject* value;
2665 PyObject* res;
2666
2667 if (data_len < 2 || data_in[0] != '&')
2668 return;
2669
Neal Norwitz0269b912007-08-08 06:56:02 +00002670 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671 if (!key)
2672 return;
2673
2674 value = PyDict_GetItem(self->entity, key);
2675
2676 if (value) {
2677 if (TreeBuilder_CheckExact(self->target))
2678 res = treebuilder_handle_data(
2679 (TreeBuilderObject*) self->target, value
2680 );
2681 else if (self->handle_data)
2682 res = PyObject_CallFunction(self->handle_data, "O", value);
2683 else
2684 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002686 } else if (!PyErr_Occurred()) {
2687 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002688 char message[128] = "undefined entity ";
2689 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002690 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002691 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002693 EXPAT(GetErrorColumnNumber)(self->parser),
2694 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695 );
2696 }
2697
2698 Py_DECREF(key);
2699}
2700
2701static void
2702expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2703 const XML_Char **attrib_in)
2704{
2705 PyObject* res;
2706 PyObject* tag;
2707 PyObject* attrib;
2708 int ok;
2709
2710 /* tag name */
2711 tag = makeuniversal(self, tag_in);
2712 if (!tag)
2713 return; /* parser will look for errors */
2714
2715 /* attributes */
2716 if (attrib_in[0]) {
2717 attrib = PyDict_New();
2718 if (!attrib)
2719 return;
2720 while (attrib_in[0] && attrib_in[1]) {
2721 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002722 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723 if (!key || !value) {
2724 Py_XDECREF(value);
2725 Py_XDECREF(key);
2726 Py_DECREF(attrib);
2727 return;
2728 }
2729 ok = PyDict_SetItem(attrib, key, value);
2730 Py_DECREF(value);
2731 Py_DECREF(key);
2732 if (ok < 0) {
2733 Py_DECREF(attrib);
2734 return;
2735 }
2736 attrib_in += 2;
2737 }
2738 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002739 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002740 attrib = PyDict_New();
2741 if (!attrib)
2742 return;
2743 }
2744
2745 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 /* shortcut */
2747 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2748 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002749 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002750 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002752 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 res = NULL;
2754
2755 Py_DECREF(tag);
2756 Py_DECREF(attrib);
2757
2758 Py_XDECREF(res);
2759}
2760
2761static void
2762expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2763 int data_len)
2764{
2765 PyObject* data;
2766 PyObject* res;
2767
Neal Norwitz0269b912007-08-08 06:56:02 +00002768 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002769 if (!data)
2770 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771
2772 if (TreeBuilder_CheckExact(self->target))
2773 /* shortcut */
2774 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2775 else if (self->handle_data)
2776 res = PyObject_CallFunction(self->handle_data, "O", data);
2777 else
2778 res = NULL;
2779
2780 Py_DECREF(data);
2781
2782 Py_XDECREF(res);
2783}
2784
2785static void
2786expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2787{
2788 PyObject* tag;
2789 PyObject* res = NULL;
2790
2791 if (TreeBuilder_CheckExact(self->target))
2792 /* shortcut */
2793 /* the standard tree builder doesn't look at the end tag */
2794 res = treebuilder_handle_end(
2795 (TreeBuilderObject*) self->target, Py_None
2796 );
2797 else if (self->handle_end) {
2798 tag = makeuniversal(self, tag_in);
2799 if (tag) {
2800 res = PyObject_CallFunction(self->handle_end, "O", tag);
2801 Py_DECREF(tag);
2802 }
2803 }
2804
2805 Py_XDECREF(res);
2806}
2807
2808static void
2809expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2810 const XML_Char *uri)
2811{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002812 PyObject* sprefix = NULL;
2813 PyObject* suri = NULL;
2814
2815 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2816 if (!suri)
2817 return;
2818
2819 if (prefix)
2820 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2821 else
2822 sprefix = PyUnicode_FromString("");
2823 if (!sprefix) {
2824 Py_DECREF(suri);
2825 return;
2826 }
2827
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002829 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002830 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002831
2832 Py_DECREF(sprefix);
2833 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834}
2835
2836static void
2837expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2838{
2839 treebuilder_handle_namespace(
2840 (TreeBuilderObject*) self->target, 0, NULL, NULL
2841 );
2842}
2843
2844static void
2845expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2846{
2847 PyObject* comment;
2848 PyObject* res;
2849
2850 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002851 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002852 if (comment) {
2853 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2854 Py_XDECREF(res);
2855 Py_DECREF(comment);
2856 }
2857 }
2858}
2859
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002860static void
2861expat_start_doctype_handler(XMLParserObject *self,
2862 const XML_Char *doctype_name,
2863 const XML_Char *sysid,
2864 const XML_Char *pubid,
2865 int has_internal_subset)
2866{
2867 PyObject *self_pyobj = (PyObject *)self;
2868 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
2869 PyObject *parser_doctype = NULL;
2870 PyObject *res = NULL;
2871
2872 doctype_name_obj = makeuniversal(self, doctype_name);
2873 if (!doctype_name_obj)
2874 return;
2875
2876 if (sysid) {
2877 sysid_obj = makeuniversal(self, sysid);
2878 if (!sysid_obj) {
2879 Py_DECREF(doctype_name_obj);
2880 return;
2881 }
2882 } else {
2883 Py_INCREF(Py_None);
2884 sysid_obj = Py_None;
2885 }
2886
2887 if (pubid) {
2888 pubid_obj = makeuniversal(self, pubid);
2889 if (!pubid_obj) {
2890 Py_DECREF(doctype_name_obj);
2891 Py_DECREF(sysid_obj);
2892 return;
2893 }
2894 } else {
2895 Py_INCREF(Py_None);
2896 pubid_obj = Py_None;
2897 }
2898
2899 /* If the target has a handler for doctype, call it. */
2900 if (self->handle_doctype) {
2901 res = PyObject_CallFunction(self->handle_doctype, "OOO",
2902 doctype_name_obj, pubid_obj, sysid_obj);
2903 Py_CLEAR(res);
2904 }
2905
2906 /* Now see if the parser itself has a doctype method. If yes and it's
2907 * a subclass, call it but warn about deprecation. If it's not a subclass
2908 * (i.e. vanilla XMLParser), do nothing.
2909 */
2910 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
2911 if (parser_doctype) {
2912 if (!XMLParser_CheckExact(self_pyobj)) {
2913 if (PyErr_WarnEx(PyExc_DeprecationWarning,
2914 "This method of XMLParser is deprecated. Define"
2915 " doctype() method on the TreeBuilder target.",
2916 1) < 0) {
2917 goto clear;
2918 }
2919 res = PyObject_CallFunction(parser_doctype, "OOO",
2920 doctype_name_obj, pubid_obj, sysid_obj);
2921 Py_CLEAR(res);
2922 }
2923 }
2924
2925clear:
2926 Py_XDECREF(parser_doctype);
2927 Py_DECREF(doctype_name_obj);
2928 Py_DECREF(pubid_obj);
2929 Py_DECREF(sysid_obj);
2930}
2931
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932static void
2933expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2934 const XML_Char* data_in)
2935{
2936 PyObject* target;
2937 PyObject* data;
2938 PyObject* res;
2939
2940 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002941 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2942 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 if (target && data) {
2944 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2945 Py_XDECREF(res);
2946 Py_DECREF(data);
2947 Py_DECREF(target);
2948 } else {
2949 Py_XDECREF(data);
2950 Py_XDECREF(target);
2951 }
2952 }
2953}
2954
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955static int
2956expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2957 XML_Encoding *info)
2958{
2959 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 unsigned char s[256];
2961 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002962 void *data;
2963 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964
2965 memset(info, 0, sizeof(XML_Encoding));
2966
2967 for (i = 0; i < 256; i++)
2968 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002969
Fredrik Lundhc3389992005-12-25 11:40:19 +00002970 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 if (!u)
2972 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002973 if (PyUnicode_READY(u))
2974 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002976 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 Py_DECREF(u);
2978 return XML_STATUS_ERROR;
2979 }
2980
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002981 kind = PyUnicode_KIND(u);
2982 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002984 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2985 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2986 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002988 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002989 }
2990
2991 Py_DECREF(u);
2992
2993 return XML_STATUS_OK;
2994}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995
2996/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997
Eli Bendersky52467b12012-06-01 07:13:08 +03002998static PyObject *
2999xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003000{
Eli Bendersky52467b12012-06-01 07:13:08 +03003001 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3002 if (self) {
3003 self->parser = NULL;
3004 self->target = self->entity = self->names = NULL;
3005 self->handle_start = self->handle_data = self->handle_end = NULL;
3006 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003007 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003009 return (PyObject *)self;
3010}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003011
Eli Bendersky52467b12012-06-01 07:13:08 +03003012static int
3013xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3014{
3015 XMLParserObject *self_xp = (XMLParserObject *)self;
3016 PyObject *target = NULL, *html = NULL;
3017 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003018 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019
Eli Bendersky52467b12012-06-01 07:13:08 +03003020 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3021 &html, &target, &encoding)) {
3022 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003024
Eli Bendersky52467b12012-06-01 07:13:08 +03003025 self_xp->entity = PyDict_New();
3026 if (!self_xp->entity)
3027 return -1;
3028
3029 self_xp->names = PyDict_New();
3030 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003031 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003032 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003033 }
3034
Eli Bendersky52467b12012-06-01 07:13:08 +03003035 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3036 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003037 Py_CLEAR(self_xp->entity);
3038 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003040 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041 }
3042
Eli Bendersky52467b12012-06-01 07:13:08 +03003043 if (target) {
3044 Py_INCREF(target);
3045 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003046 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003047 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003048 Py_CLEAR(self_xp->entity);
3049 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003050 EXPAT(ParserFree)(self_xp->parser);
3051 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003053 }
3054 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055
Eli Bendersky52467b12012-06-01 07:13:08 +03003056 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3057 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3058 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3059 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3060 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3061 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003062 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063
3064 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003065
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003067 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003069 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 (XML_StartElementHandler) expat_start_handler,
3071 (XML_EndElementHandler) expat_end_handler
3072 );
3073 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003074 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 (XML_DefaultHandler) expat_default_handler
3076 );
3077 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003078 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079 (XML_CharacterDataHandler) expat_data_handler
3080 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003081 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003083 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084 (XML_CommentHandler) expat_comment_handler
3085 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003086 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003088 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089 (XML_ProcessingInstructionHandler) expat_pi_handler
3090 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003091 EXPAT(SetStartDoctypeDeclHandler)(
3092 self_xp->parser,
3093 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3094 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003095 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003096 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003097 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3098 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099
Eli Bendersky52467b12012-06-01 07:13:08 +03003100 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101}
3102
Eli Bendersky52467b12012-06-01 07:13:08 +03003103static int
3104xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3105{
3106 Py_VISIT(self->handle_close);
3107 Py_VISIT(self->handle_pi);
3108 Py_VISIT(self->handle_comment);
3109 Py_VISIT(self->handle_end);
3110 Py_VISIT(self->handle_data);
3111 Py_VISIT(self->handle_start);
3112
3113 Py_VISIT(self->target);
3114 Py_VISIT(self->entity);
3115 Py_VISIT(self->names);
3116
3117 return 0;
3118}
3119
3120static int
3121xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122{
3123 EXPAT(ParserFree)(self->parser);
3124
Antoine Pitrouc1948842012-10-01 23:40:37 +02003125 Py_CLEAR(self->handle_close);
3126 Py_CLEAR(self->handle_pi);
3127 Py_CLEAR(self->handle_comment);
3128 Py_CLEAR(self->handle_end);
3129 Py_CLEAR(self->handle_data);
3130 Py_CLEAR(self->handle_start);
3131 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132
Antoine Pitrouc1948842012-10-01 23:40:37 +02003133 Py_CLEAR(self->target);
3134 Py_CLEAR(self->entity);
3135 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003136
Eli Bendersky52467b12012-06-01 07:13:08 +03003137 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003138}
3139
Eli Bendersky52467b12012-06-01 07:13:08 +03003140static void
3141xmlparser_dealloc(XMLParserObject* self)
3142{
3143 PyObject_GC_UnTrack(self);
3144 xmlparser_gc_clear(self);
3145 Py_TYPE(self)->tp_free((PyObject *)self);
3146}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003147
3148LOCAL(PyObject*)
3149expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3150{
3151 int ok;
3152
3153 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3154
3155 if (PyErr_Occurred())
3156 return NULL;
3157
3158 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003159 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003160 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003161 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003162 EXPAT(GetErrorColumnNumber)(self->parser),
3163 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003164 );
3165 return NULL;
3166 }
3167
3168 Py_RETURN_NONE;
3169}
3170
3171static PyObject*
3172xmlparser_close(XMLParserObject* self, PyObject* args)
3173{
3174 /* end feeding data to parser */
3175
3176 PyObject* res;
3177 if (!PyArg_ParseTuple(args, ":close"))
3178 return NULL;
3179
3180 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003181 if (!res)
3182 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003183
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003184 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 Py_DECREF(res);
3186 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003187 } if (self->handle_close) {
3188 Py_DECREF(res);
3189 return PyObject_CallFunction(self->handle_close, "");
3190 } else
3191 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003192}
3193
3194static PyObject*
3195xmlparser_feed(XMLParserObject* self, PyObject* args)
3196{
3197 /* feed data to parser */
3198
3199 char* data;
3200 int data_len;
3201 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3202 return NULL;
3203
3204 return expat_parse(self, data, data_len, 0);
3205}
3206
3207static PyObject*
3208xmlparser_parse(XMLParserObject* self, PyObject* args)
3209{
3210 /* (internal) parse until end of input stream */
3211
3212 PyObject* reader;
3213 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003214 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215 PyObject* res;
3216
3217 PyObject* fileobj;
3218 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3219 return NULL;
3220
3221 reader = PyObject_GetAttrString(fileobj, "read");
3222 if (!reader)
3223 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003224
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225 /* read from open file object */
3226 for (;;) {
3227
3228 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3229
3230 if (!buffer) {
3231 /* read failed (e.g. due to KeyboardInterrupt) */
3232 Py_DECREF(reader);
3233 return NULL;
3234 }
3235
Eli Benderskyf996e772012-03-16 05:53:30 +02003236 if (PyUnicode_CheckExact(buffer)) {
3237 /* A unicode object is encoded into bytes using UTF-8 */
3238 if (PyUnicode_GET_SIZE(buffer) == 0) {
3239 Py_DECREF(buffer);
3240 break;
3241 }
3242 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003243 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003244 if (!temp) {
3245 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003246 Py_DECREF(reader);
3247 return NULL;
3248 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003249 buffer = temp;
3250 }
3251 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252 Py_DECREF(buffer);
3253 break;
3254 }
3255
3256 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003257 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003258 );
3259
3260 Py_DECREF(buffer);
3261
3262 if (!res) {
3263 Py_DECREF(reader);
3264 return NULL;
3265 }
3266 Py_DECREF(res);
3267
3268 }
3269
3270 Py_DECREF(reader);
3271
3272 res = expat_parse(self, "", 0, 1);
3273
3274 if (res && TreeBuilder_CheckExact(self->target)) {
3275 Py_DECREF(res);
3276 return treebuilder_done((TreeBuilderObject*) self->target);
3277 }
3278
3279 return res;
3280}
3281
3282static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003283xmlparser_doctype(XMLParserObject *self, PyObject *args)
3284{
3285 Py_RETURN_NONE;
3286}
3287
3288static PyObject*
3289xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290{
3291 /* activate element event reporting */
3292
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003293 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 TreeBuilderObject* target;
3295
3296 PyObject* events; /* event collector */
3297 PyObject* event_set = Py_None;
3298 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3299 &event_set))
3300 return NULL;
3301
3302 if (!TreeBuilder_CheckExact(self->target)) {
3303 PyErr_SetString(
3304 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003305 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 "targets"
3307 );
3308 return NULL;
3309 }
3310
3311 target = (TreeBuilderObject*) self->target;
3312
3313 Py_INCREF(events);
3314 Py_XDECREF(target->events);
3315 target->events = events;
3316
3317 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003318 Py_CLEAR(target->start_event_obj);
3319 Py_CLEAR(target->end_event_obj);
3320 Py_CLEAR(target->start_ns_event_obj);
3321 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003322
3323 if (event_set == Py_None) {
3324 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003325 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003326 Py_RETURN_NONE;
3327 }
3328
3329 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3330 goto error;
3331
3332 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3333 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3334 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003335 if (PyUnicode_Check(item)) {
3336 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003337 if (event == NULL)
3338 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003339 } else if (PyBytes_Check(item))
3340 event = PyBytes_AS_STRING(item);
3341 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003343 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344 if (strcmp(event, "start") == 0) {
3345 Py_INCREF(item);
3346 target->start_event_obj = item;
3347 } else if (strcmp(event, "end") == 0) {
3348 Py_INCREF(item);
3349 Py_XDECREF(target->end_event_obj);
3350 target->end_event_obj = item;
3351 } else if (strcmp(event, "start-ns") == 0) {
3352 Py_INCREF(item);
3353 Py_XDECREF(target->start_ns_event_obj);
3354 target->start_ns_event_obj = item;
3355 EXPAT(SetNamespaceDeclHandler)(
3356 self->parser,
3357 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3358 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3359 );
3360 } else if (strcmp(event, "end-ns") == 0) {
3361 Py_INCREF(item);
3362 Py_XDECREF(target->end_ns_event_obj);
3363 target->end_ns_event_obj = item;
3364 EXPAT(SetNamespaceDeclHandler)(
3365 self->parser,
3366 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3367 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3368 );
3369 } else {
3370 PyErr_Format(
3371 PyExc_ValueError,
3372 "unknown event '%s'", event
3373 );
3374 return NULL;
3375 }
3376 }
3377
3378 Py_RETURN_NONE;
3379
3380 error:
3381 PyErr_SetString(
3382 PyExc_TypeError,
3383 "invalid event tuple"
3384 );
3385 return NULL;
3386}
3387
3388static PyMethodDef xmlparser_methods[] = {
3389 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3390 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3391 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3392 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003393 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 {NULL, NULL}
3395};
3396
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003397static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003398xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003400 if (PyUnicode_Check(nameobj)) {
3401 PyObject* res;
3402 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3403 res = self->entity;
3404 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3405 res = self->target;
3406 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3407 return PyUnicode_FromFormat(
3408 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003410 }
3411 else
3412 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413
Alexander Belopolskye239d232010-12-08 23:31:48 +00003414 Py_INCREF(res);
3415 return res;
3416 }
3417 generic:
3418 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419}
3420
Neal Norwitz227b5332006-03-22 09:28:35 +00003421static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003422 PyVarObject_HEAD_INIT(NULL, 0)
3423 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003424 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003425 (destructor)xmlparser_dealloc, /* tp_dealloc */
3426 0, /* tp_print */
3427 0, /* tp_getattr */
3428 0, /* tp_setattr */
3429 0, /* tp_reserved */
3430 0, /* tp_repr */
3431 0, /* tp_as_number */
3432 0, /* tp_as_sequence */
3433 0, /* tp_as_mapping */
3434 0, /* tp_hash */
3435 0, /* tp_call */
3436 0, /* tp_str */
3437 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3438 0, /* tp_setattro */
3439 0, /* tp_as_buffer */
3440 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3441 /* tp_flags */
3442 0, /* tp_doc */
3443 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3444 (inquiry)xmlparser_gc_clear, /* tp_clear */
3445 0, /* tp_richcompare */
3446 0, /* tp_weaklistoffset */
3447 0, /* tp_iter */
3448 0, /* tp_iternext */
3449 xmlparser_methods, /* tp_methods */
3450 0, /* tp_members */
3451 0, /* tp_getset */
3452 0, /* tp_base */
3453 0, /* tp_dict */
3454 0, /* tp_descr_get */
3455 0, /* tp_descr_set */
3456 0, /* tp_dictoffset */
3457 (initproc)xmlparser_init, /* tp_init */
3458 PyType_GenericAlloc, /* tp_alloc */
3459 xmlparser_new, /* tp_new */
3460 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461};
3462
3463#endif
3464
3465/* ==================================================================== */
3466/* python module interface */
3467
3468static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003469 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003470 {NULL, NULL}
3471};
3472
Martin v. Löwis1a214512008-06-11 05:26:20 +00003473
3474static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003475 PyModuleDef_HEAD_INIT,
3476 "_elementtree",
3477 NULL,
3478 -1,
3479 _functions,
3480 NULL,
3481 NULL,
3482 NULL,
3483 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003484};
3485
Neal Norwitzf6657e62006-12-28 04:47:50 +00003486PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003487PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003489 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003491 /* Initialize object types */
3492 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003493 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003494 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003495 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003496#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003497 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003498 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003499#endif
3500
Martin v. Löwis1a214512008-06-11 05:26:20 +00003501 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003502 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003503 return NULL;
3504
Eli Bendersky828efde2012-04-05 05:40:58 +03003505 if (!(temp = PyImport_ImportModule("copy")))
3506 return NULL;
3507 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3508 Py_XDECREF(temp);
3509
3510 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3511 return NULL;
3512
Eli Bendersky20d41742012-06-01 09:48:37 +03003513 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003514 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3515 if (expat_capi) {
3516 /* check that it's usable */
3517 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3518 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3519 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3520 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003521 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003522 PyErr_SetString(PyExc_ImportError,
3523 "pyexpat version is incompatible");
3524 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003525 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003526 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003527 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003528 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003529
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003530 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003531 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003532 );
3533 Py_INCREF(elementtree_parseerror_obj);
3534 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3535
Eli Bendersky092af1f2012-03-04 07:14:03 +02003536 Py_INCREF((PyObject *)&Element_Type);
3537 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3538
Eli Bendersky58d548d2012-05-29 15:45:16 +03003539 Py_INCREF((PyObject *)&TreeBuilder_Type);
3540 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3541
Eli Bendersky52467b12012-06-01 07:13:08 +03003542#if defined(USE_EXPAT)
3543 Py_INCREF((PyObject *)&XMLParser_Type);
3544 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3545#endif
3546
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003547 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003548}