blob: bf4bc4a8a24cf83f263308285896800371c5112a [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Eli Bendersky092af1f2012-03-04 07:14:03 +0200373static int
374element_init(PyObject *self, PyObject *args, PyObject *kwds)
375{
376 PyObject *tag;
377 PyObject *tmp;
378 PyObject *attrib = NULL;
379 ElementObject *self_elem;
380
381 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
382 return -1;
383
Eli Bendersky737b1732012-05-29 06:02:56 +0300384 if (attrib) {
385 /* attrib passed as positional arg */
386 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (!attrib)
388 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300389 if (kwds) {
390 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200391 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 return -1;
393 }
394 }
395 } else if (kwds) {
396 /* have keywords args */
397 attrib = get_attrib_from_keywords(kwds);
398 if (!attrib)
399 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 }
401
402 self_elem = (ElementObject *)self;
403
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200406 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 return -1;
408 }
409 }
410
Eli Bendersky48d358b2012-05-30 17:57:50 +0300411 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413
414 /* Replace the objects already pointed to by tag, text and tail. */
415 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200417 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 Py_DECREF(tmp);
419
420 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200422 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423 Py_DECREF(JOIN_OBJ(tmp));
424
425 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200427 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_DECREF(JOIN_OBJ(tmp));
429
430 return 0;
431}
432
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200434element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000435{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200436 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437 PyObject* *children;
438
439 /* make sure self->children can hold the given number of extra
440 elements. set an exception and return -1 if allocation failed */
441
Victor Stinner5f0af232013-07-11 23:01:36 +0200442 if (!self->extra) {
443 if (create_extra(self, NULL) < 0)
444 return -1;
445 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000446
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200447 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000448
449 if (size > self->extra->allocated) {
450 /* use Python 2.4's list growth strategy */
451 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000452 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100453 * which needs at least 4 bytes.
454 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000455 * be safe.
456 */
457 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200458 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
459 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000460 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000461 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100462 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * false alarm always assume at least one child to be safe.
464 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000465 children = PyObject_Realloc(self->extra->children,
466 size * sizeof(PyObject*));
467 if (!children)
468 goto nomemory;
469 } else {
470 children = PyObject_Malloc(size * sizeof(PyObject*));
471 if (!children)
472 goto nomemory;
473 /* copy existing children from static area to malloc buffer */
474 memcpy(children, self->extra->children,
475 self->extra->length * sizeof(PyObject*));
476 }
477 self->extra->children = children;
478 self->extra->allocated = size;
479 }
480
481 return 0;
482
483 nomemory:
484 PyErr_NoMemory();
485 return -1;
486}
487
488LOCAL(int)
489element_add_subelement(ElementObject* self, PyObject* element)
490{
491 /* add a child element to a parent */
492
493 if (element_resize(self, 1) < 0)
494 return -1;
495
496 Py_INCREF(element);
497 self->extra->children[self->extra->length] = element;
498
499 self->extra->length++;
500
501 return 0;
502}
503
504LOCAL(PyObject*)
505element_get_attrib(ElementObject* self)
506{
507 /* return borrowed reference to attrib dictionary */
508 /* note: this function assumes that the extra section exists */
509
510 PyObject* res = self->extra->attrib;
511
512 if (res == Py_None) {
513 /* create missing dictionary */
514 res = PyDict_New();
515 if (!res)
516 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200517 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000518 self->extra->attrib = res;
519 }
520
521 return res;
522}
523
524LOCAL(PyObject*)
525element_get_text(ElementObject* self)
526{
527 /* return borrowed reference to text attribute */
528
529 PyObject* res = self->text;
530
531 if (JOIN_GET(res)) {
532 res = JOIN_OBJ(res);
533 if (PyList_CheckExact(res)) {
534 res = list_join(res);
535 if (!res)
536 return NULL;
537 self->text = res;
538 }
539 }
540
541 return res;
542}
543
544LOCAL(PyObject*)
545element_get_tail(ElementObject* self)
546{
547 /* return borrowed reference to text attribute */
548
549 PyObject* res = self->tail;
550
551 if (JOIN_GET(res)) {
552 res = JOIN_OBJ(res);
553 if (PyList_CheckExact(res)) {
554 res = list_join(res);
555 if (!res)
556 return NULL;
557 self->tail = res;
558 }
559 }
560
561 return res;
562}
563
564static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300565subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000566{
567 PyObject* elem;
568
569 ElementObject* parent;
570 PyObject* tag;
571 PyObject* attrib = NULL;
572 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
573 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800574 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800576 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 if (attrib) {
579 /* attrib passed as positional arg */
580 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000581 if (!attrib)
582 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300583 if (kwds) {
584 if (PyDict_Update(attrib, kwds) < 0) {
585 return NULL;
586 }
587 }
588 } else if (kwds) {
589 /* have keyword args */
590 attrib = get_attrib_from_keywords(kwds);
591 if (!attrib)
592 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000593 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300594 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000595 Py_INCREF(Py_None);
596 attrib = Py_None;
597 }
598
Eli Bendersky092af1f2012-03-04 07:14:03 +0200599 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200601 if (elem == NULL)
602 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000604 if (element_add_subelement(parent, elem) < 0) {
605 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000607 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608
609 return elem;
610}
611
Eli Bendersky0192ba32012-03-30 16:38:33 +0300612static int
613element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
614{
615 Py_VISIT(self->tag);
616 Py_VISIT(JOIN_OBJ(self->text));
617 Py_VISIT(JOIN_OBJ(self->tail));
618
619 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200620 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621 Py_VISIT(self->extra->attrib);
622
623 for (i = 0; i < self->extra->length; ++i)
624 Py_VISIT(self->extra->children[i]);
625 }
626 return 0;
627}
628
629static int
630element_gc_clear(ElementObject *self)
631{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300632 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700633 _clear_joined_ptr(&self->text);
634 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635
636 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300637 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300638 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300639 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 return 0;
641}
642
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000643static void
644element_dealloc(ElementObject* self)
645{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647
648 if (self->weakreflist != NULL)
649 PyObject_ClearWeakRefs((PyObject *) self);
650
Eli Bendersky0192ba32012-03-30 16:38:33 +0300651 /* element_gc_clear clears all references and deallocates extra
652 */
653 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000654
655 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200656 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000657}
658
659/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660
661static PyObject*
662element_append(ElementObject* self, PyObject* args)
663{
664 PyObject* element;
665 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
666 return NULL;
667
668 if (element_add_subelement(self, element) < 0)
669 return NULL;
670
671 Py_RETURN_NONE;
672}
673
674static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300675element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676{
677 if (!PyArg_ParseTuple(args, ":clear"))
678 return NULL;
679
Eli Benderskyebf37a22012-04-03 22:02:37 +0300680 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681
682 Py_INCREF(Py_None);
683 Py_DECREF(JOIN_OBJ(self->text));
684 self->text = Py_None;
685
686 Py_INCREF(Py_None);
687 Py_DECREF(JOIN_OBJ(self->tail));
688 self->tail = Py_None;
689
690 Py_RETURN_NONE;
691}
692
693static PyObject*
694element_copy(ElementObject* self, PyObject* args)
695{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200696 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 ElementObject* element;
698
699 if (!PyArg_ParseTuple(args, ":__copy__"))
700 return NULL;
701
Eli Bendersky092af1f2012-03-04 07:14:03 +0200702 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800703 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704 if (!element)
705 return NULL;
706
707 Py_DECREF(JOIN_OBJ(element->text));
708 element->text = self->text;
709 Py_INCREF(JOIN_OBJ(element->text));
710
711 Py_DECREF(JOIN_OBJ(element->tail));
712 element->tail = self->tail;
713 Py_INCREF(JOIN_OBJ(element->tail));
714
715 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000716 if (element_resize(element, self->extra->length) < 0) {
717 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000719 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720
721 for (i = 0; i < self->extra->length; i++) {
722 Py_INCREF(self->extra->children[i]);
723 element->extra->children[i] = self->extra->children[i];
724 }
725
726 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727 }
728
729 return (PyObject*) element;
730}
731
732static PyObject*
733element_deepcopy(ElementObject* self, PyObject* args)
734{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200735 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 ElementObject* element;
737 PyObject* tag;
738 PyObject* attrib;
739 PyObject* text;
740 PyObject* tail;
741 PyObject* id;
742
743 PyObject* memo;
744 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
745 return NULL;
746
747 tag = deepcopy(self->tag, memo);
748 if (!tag)
749 return NULL;
750
751 if (self->extra) {
752 attrib = deepcopy(self->extra->attrib, memo);
753 if (!attrib) {
754 Py_DECREF(tag);
755 return NULL;
756 }
757 } else {
758 Py_INCREF(Py_None);
759 attrib = Py_None;
760 }
761
Eli Bendersky092af1f2012-03-04 07:14:03 +0200762 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763
764 Py_DECREF(tag);
765 Py_DECREF(attrib);
766
767 if (!element)
768 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100769
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000770 text = deepcopy(JOIN_OBJ(self->text), memo);
771 if (!text)
772 goto error;
773 Py_DECREF(element->text);
774 element->text = JOIN_SET(text, JOIN_GET(self->text));
775
776 tail = deepcopy(JOIN_OBJ(self->tail), memo);
777 if (!tail)
778 goto error;
779 Py_DECREF(element->tail);
780 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
781
782 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783 if (element_resize(element, self->extra->length) < 0)
784 goto error;
785
786 for (i = 0; i < self->extra->length; i++) {
787 PyObject* child = deepcopy(self->extra->children[i], memo);
788 if (!child) {
789 element->extra->length = i;
790 goto error;
791 }
792 element->extra->children[i] = child;
793 }
794
795 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796 }
797
798 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200799 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000800 if (!id)
801 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802
803 i = PyDict_SetItem(memo, id, (PyObject*) element);
804
805 Py_DECREF(id);
806
807 if (i < 0)
808 goto error;
809
810 return (PyObject*) element;
811
812 error:
813 Py_DECREF(element);
814 return NULL;
815}
816
Martin v. Löwisbce16662012-06-17 10:41:22 +0200817static PyObject*
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200818element_sizeof(PyObject* myself, PyObject* args)
Martin v. Löwisbce16662012-06-17 10:41:22 +0200819{
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200820 ElementObject *self = (ElementObject*)myself;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200821 Py_ssize_t result = sizeof(ElementObject);
822 if (self->extra) {
823 result += sizeof(ElementObjectExtra);
824 if (self->extra->children != self->extra->_children)
825 result += sizeof(PyObject*) * self->extra->allocated;
826 }
827 return PyLong_FromSsize_t(result);
828}
829
Eli Bendersky698bdb22013-01-10 06:01:06 -0800830/* dict keys for getstate/setstate. */
831#define PICKLED_TAG "tag"
832#define PICKLED_CHILDREN "_children"
833#define PICKLED_ATTRIB "attrib"
834#define PICKLED_TAIL "tail"
835#define PICKLED_TEXT "text"
836
837/* __getstate__ returns a fabricated instance dict as in the pure-Python
838 * Element implementation, for interoperability/interchangeability. This
839 * makes the pure-Python implementation details an API, but (a) there aren't
840 * any unnecessary structures there; and (b) it buys compatibility with 3.2
841 * pickles. See issue #16076.
842 */
843static PyObject *
844element_getstate(ElementObject *self)
845{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200846 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800847 PyObject *instancedict = NULL, *children;
848
849 /* Build a list of children. */
850 children = PyList_New(self->extra ? self->extra->length : 0);
851 if (!children)
852 return NULL;
853 for (i = 0; i < PyList_GET_SIZE(children); i++) {
854 PyObject *child = self->extra->children[i];
855 Py_INCREF(child);
856 PyList_SET_ITEM(children, i, child);
857 }
858
859 /* Construct the state object. */
860 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
861 if (noattrib)
862 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
863 PICKLED_TAG, self->tag,
864 PICKLED_CHILDREN, children,
865 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700866 PICKLED_TEXT, JOIN_OBJ(self->text),
867 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800868 else
869 instancedict = Py_BuildValue("{sOsOsOsOsO}",
870 PICKLED_TAG, self->tag,
871 PICKLED_CHILDREN, children,
872 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700873 PICKLED_TEXT, JOIN_OBJ(self->text),
874 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800875 if (instancedict) {
876 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800877 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800878 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800879 else {
880 for (i = 0; i < PyList_GET_SIZE(children); i++)
881 Py_DECREF(PyList_GET_ITEM(children, i));
882 Py_DECREF(children);
883
884 return NULL;
885 }
886}
887
888static PyObject *
889element_setstate_from_attributes(ElementObject *self,
890 PyObject *tag,
891 PyObject *attrib,
892 PyObject *text,
893 PyObject *tail,
894 PyObject *children)
895{
896 Py_ssize_t i, nchildren;
897
898 if (!tag) {
899 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
900 return NULL;
901 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800902
903 Py_CLEAR(self->tag);
904 self->tag = tag;
905 Py_INCREF(self->tag);
906
Eli Benderskydd3661e2013-09-13 06:24:25 -0700907 _clear_joined_ptr(&self->text);
908 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
909 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910
Eli Benderskydd3661e2013-09-13 06:24:25 -0700911 _clear_joined_ptr(&self->tail);
912 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
913 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914
915 /* Handle ATTRIB and CHILDREN. */
916 if (!children && !attrib)
917 Py_RETURN_NONE;
918
919 /* Compute 'nchildren'. */
920 if (children) {
921 if (!PyList_Check(children)) {
922 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
923 return NULL;
924 }
925 nchildren = PyList_Size(children);
926 }
927 else {
928 nchildren = 0;
929 }
930
931 /* Allocate 'extra'. */
932 if (element_resize(self, nchildren)) {
933 return NULL;
934 }
935 assert(self->extra && self->extra->allocated >= nchildren);
936
937 /* Copy children */
938 for (i = 0; i < nchildren; i++) {
939 self->extra->children[i] = PyList_GET_ITEM(children, i);
940 Py_INCREF(self->extra->children[i]);
941 }
942
943 self->extra->length = nchildren;
944 self->extra->allocated = nchildren;
945
946 /* Stash attrib. */
947 if (attrib) {
948 Py_CLEAR(self->extra->attrib);
949 self->extra->attrib = attrib;
950 Py_INCREF(attrib);
951 }
952
953 Py_RETURN_NONE;
954}
955
956/* __setstate__ for Element instance from the Python implementation.
957 * 'state' should be the instance dict.
958 */
959static PyObject *
960element_setstate_from_Python(ElementObject *self, PyObject *state)
961{
962 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
963 PICKLED_TAIL, PICKLED_CHILDREN, 0};
964 PyObject *args;
965 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800966 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968 tag = attrib = text = tail = children = NULL;
969 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800970 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800971 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800972
973 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
974 &attrib, &text, &tail, &children))
975 retval = element_setstate_from_attributes(self, tag, attrib, text,
976 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800977 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800978 retval = NULL;
979
980 Py_DECREF(args);
981 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800982}
983
984static PyObject *
985element_setstate(ElementObject *self, PyObject *state)
986{
987 if (!PyDict_CheckExact(state)) {
988 PyErr_Format(PyExc_TypeError,
989 "Don't know how to unpickle \"%.200R\" as an Element",
990 state);
991 return NULL;
992 }
993 else
994 return element_setstate_from_Python(self, state);
995}
996
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000997LOCAL(int)
998checkpath(PyObject* tag)
999{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001000 Py_ssize_t i;
1001 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001002
1003 /* check if a tag contains an xpath character */
1004
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001005#define PATHCHAR(ch) \
1006 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001009 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1010 void *data = PyUnicode_DATA(tag);
1011 unsigned int kind = PyUnicode_KIND(tag);
1012 for (i = 0; i < len; i++) {
1013 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1014 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001015 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001016 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001017 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001018 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001019 return 1;
1020 }
1021 return 0;
1022 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001023 if (PyBytes_Check(tag)) {
1024 char *p = PyBytes_AS_STRING(tag);
1025 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001026 if (p[i] == '{')
1027 check = 0;
1028 else if (p[i] == '}')
1029 check = 1;
1030 else if (check && PATHCHAR(p[i]))
1031 return 1;
1032 }
1033 return 0;
1034 }
1035
1036 return 1; /* unknown type; might be path expression */
1037}
1038
1039static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001040element_extend(ElementObject* self, PyObject* args)
1041{
1042 PyObject* seq;
1043 Py_ssize_t i, seqlen = 0;
1044
1045 PyObject* seq_in;
1046 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1047 return NULL;
1048
1049 seq = PySequence_Fast(seq_in, "");
1050 if (!seq) {
1051 PyErr_Format(
1052 PyExc_TypeError,
1053 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1054 );
1055 return NULL;
1056 }
1057
1058 seqlen = PySequence_Size(seq);
1059 for (i = 0; i < seqlen; i++) {
1060 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001061 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1062 Py_DECREF(seq);
1063 PyErr_Format(
1064 PyExc_TypeError,
1065 "expected an Element, not \"%.200s\"",
1066 Py_TYPE(element)->tp_name);
1067 return NULL;
1068 }
1069
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001070 if (element_add_subelement(self, element) < 0) {
1071 Py_DECREF(seq);
1072 return NULL;
1073 }
1074 }
1075
1076 Py_DECREF(seq);
1077
1078 Py_RETURN_NONE;
1079}
1080
1081static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001082element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001083{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001084 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001085 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001086 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001087 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001088 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001089
Eli Bendersky737b1732012-05-29 06:02:56 +03001090 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1091 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 return NULL;
1093
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001094 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001095 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001096 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001097 st->elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001098 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001099 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001100
1101 if (!self->extra)
1102 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001104 for (i = 0; i < self->extra->length; i++) {
1105 PyObject* item = self->extra->children[i];
1106 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001107 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001108 Py_INCREF(item);
1109 return item;
1110 }
1111 }
1112
1113 Py_RETURN_NONE;
1114}
1115
1116static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001117element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001118{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001119 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001120 PyObject* tag;
1121 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001123 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001124 static char *kwlist[] = {"path", "default", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001125 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001126
Eli Bendersky737b1732012-05-29 06:02:56 +03001127 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1128 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129 return NULL;
1130
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001131 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001132 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001133 st->elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001134 );
1135
1136 if (!self->extra) {
1137 Py_INCREF(default_value);
1138 return default_value;
1139 }
1140
1141 for (i = 0; i < self->extra->length; i++) {
1142 ElementObject* item = (ElementObject*) self->extra->children[i];
Eli Bendersky163d7f02013-11-24 06:55:04 -08001143 if (Element_CheckExact(item) &&
1144 (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001145 PyObject* text = element_get_text(item);
1146 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001147 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001148 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149 return text;
1150 }
1151 }
1152
1153 Py_INCREF(default_value);
1154 return default_value;
1155}
1156
1157static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001158element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001160 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001161 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001163 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001164 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001165 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001166
Eli Bendersky737b1732012-05-29 06:02:56 +03001167 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1168 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001169 return NULL;
1170
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001171 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001172 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001173 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001174 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001175 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001176 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001177
1178 out = PyList_New(0);
1179 if (!out)
1180 return NULL;
1181
1182 if (!self->extra)
1183 return out;
1184
1185 for (i = 0; i < self->extra->length; i++) {
1186 PyObject* item = self->extra->children[i];
1187 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001188 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001189 if (PyList_Append(out, item) < 0) {
1190 Py_DECREF(out);
1191 return NULL;
1192 }
1193 }
1194 }
1195
1196 return out;
1197}
1198
1199static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001200element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001201{
1202 PyObject* tag;
1203 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001204 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001205 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001206 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001207
Eli Bendersky737b1732012-05-29 06:02:56 +03001208 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
Eli Bendersky163d7f02013-11-24 06:55:04 -08001209 &tag, &namespaces)) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001210 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -08001211 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001212
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001213 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001214 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001215}
1216
1217static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001218element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219{
1220 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001221 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222
1223 PyObject* key;
1224 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001225
1226 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1227 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001228 return NULL;
1229
1230 if (!self->extra || self->extra->attrib == Py_None)
1231 value = default_value;
1232 else {
1233 value = PyDict_GetItem(self->extra->attrib, key);
1234 if (!value)
1235 value = default_value;
1236 }
1237
1238 Py_INCREF(value);
1239 return value;
1240}
1241
1242static PyObject*
1243element_getchildren(ElementObject* self, PyObject* args)
1244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 PyObject* list;
1247
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001248 /* FIXME: report as deprecated? */
1249
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250 if (!PyArg_ParseTuple(args, ":getchildren"))
1251 return NULL;
1252
1253 if (!self->extra)
1254 return PyList_New(0);
1255
1256 list = PyList_New(self->extra->length);
1257 if (!list)
1258 return NULL;
1259
1260 for (i = 0; i < self->extra->length; i++) {
1261 PyObject* item = self->extra->children[i];
1262 Py_INCREF(item);
1263 PyList_SET_ITEM(list, i, item);
1264 }
1265
1266 return list;
1267}
1268
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001269
Eli Bendersky64d11e62012-06-15 07:42:50 +03001270static PyObject *
1271create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1272
1273
1274static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001275element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001276{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001278 static char* kwlist[] = {"tag", 0};
1279
1280 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281 return NULL;
1282
Eli Bendersky64d11e62012-06-15 07:42:50 +03001283 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001284}
1285
1286
1287static PyObject*
1288element_itertext(ElementObject* self, PyObject* args)
1289{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001290 if (!PyArg_ParseTuple(args, ":itertext"))
1291 return NULL;
1292
Eli Bendersky64d11e62012-06-15 07:42:50 +03001293 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001294}
1295
Eli Bendersky64d11e62012-06-15 07:42:50 +03001296
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001297static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001298element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001299{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001300 ElementObject* self = (ElementObject*) self_;
1301
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 if (!self->extra || index < 0 || index >= self->extra->length) {
1303 PyErr_SetString(
1304 PyExc_IndexError,
1305 "child index out of range"
1306 );
1307 return NULL;
1308 }
1309
1310 Py_INCREF(self->extra->children[index]);
1311 return self->extra->children[index];
1312}
1313
1314static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315element_insert(ElementObject* self, PyObject* args)
1316{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001317 Py_ssize_t index, i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 PyObject* element;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001319 if (!PyArg_ParseTuple(args, "nO!:insert", &index,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320 &Element_Type, &element))
1321 return NULL;
1322
Victor Stinner5f0af232013-07-11 23:01:36 +02001323 if (!self->extra) {
1324 if (create_extra(self, NULL) < 0)
1325 return NULL;
1326 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001327
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001328 if (index < 0) {
1329 index += self->extra->length;
1330 if (index < 0)
1331 index = 0;
1332 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001333 if (index > self->extra->length)
1334 index = self->extra->length;
1335
1336 if (element_resize(self, 1) < 0)
1337 return NULL;
1338
1339 for (i = self->extra->length; i > index; i--)
1340 self->extra->children[i] = self->extra->children[i-1];
1341
1342 Py_INCREF(element);
1343 self->extra->children[index] = element;
1344
1345 self->extra->length++;
1346
1347 Py_RETURN_NONE;
1348}
1349
1350static PyObject*
1351element_items(ElementObject* self, PyObject* args)
1352{
1353 if (!PyArg_ParseTuple(args, ":items"))
1354 return NULL;
1355
1356 if (!self->extra || self->extra->attrib == Py_None)
1357 return PyList_New(0);
1358
1359 return PyDict_Items(self->extra->attrib);
1360}
1361
1362static PyObject*
1363element_keys(ElementObject* self, PyObject* args)
1364{
1365 if (!PyArg_ParseTuple(args, ":keys"))
1366 return NULL;
1367
1368 if (!self->extra || self->extra->attrib == Py_None)
1369 return PyList_New(0);
1370
1371 return PyDict_Keys(self->extra->attrib);
1372}
1373
Martin v. Löwis18e16552006-02-15 17:27:45 +00001374static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001375element_length(ElementObject* self)
1376{
1377 if (!self->extra)
1378 return 0;
1379
1380 return self->extra->length;
1381}
1382
1383static PyObject*
1384element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1385{
1386 PyObject* elem;
1387
1388 PyObject* tag;
1389 PyObject* attrib;
1390 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1391 return NULL;
1392
1393 attrib = PyDict_Copy(attrib);
1394 if (!attrib)
1395 return NULL;
1396
Eli Bendersky092af1f2012-03-04 07:14:03 +02001397 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001398
1399 Py_DECREF(attrib);
1400
1401 return elem;
1402}
1403
1404static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001405element_remove(ElementObject* self, PyObject* args)
1406{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001407 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408
1409 PyObject* element;
1410 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1411 return NULL;
1412
1413 if (!self->extra) {
1414 /* element has no children, so raise exception */
1415 PyErr_SetString(
1416 PyExc_ValueError,
1417 "list.remove(x): x not in list"
1418 );
1419 return NULL;
1420 }
1421
1422 for (i = 0; i < self->extra->length; i++) {
1423 if (self->extra->children[i] == element)
1424 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001425 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001426 break;
1427 }
1428
1429 if (i == self->extra->length) {
1430 /* element is not in children, so raise exception */
1431 PyErr_SetString(
1432 PyExc_ValueError,
1433 "list.remove(x): x not in list"
1434 );
1435 return NULL;
1436 }
1437
1438 Py_DECREF(self->extra->children[i]);
1439
1440 self->extra->length--;
1441
1442 for (; i < self->extra->length; i++)
1443 self->extra->children[i] = self->extra->children[i+1];
1444
1445 Py_RETURN_NONE;
1446}
1447
1448static PyObject*
1449element_repr(ElementObject* self)
1450{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001451 if (self->tag)
1452 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1453 else
1454 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455}
1456
1457static PyObject*
1458element_set(ElementObject* self, PyObject* args)
1459{
1460 PyObject* attrib;
1461
1462 PyObject* key;
1463 PyObject* value;
1464 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1465 return NULL;
1466
Victor Stinner5f0af232013-07-11 23:01:36 +02001467 if (!self->extra) {
1468 if (create_extra(self, NULL) < 0)
1469 return NULL;
1470 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001471
1472 attrib = element_get_attrib(self);
1473 if (!attrib)
1474 return NULL;
1475
1476 if (PyDict_SetItem(attrib, key, value) < 0)
1477 return NULL;
1478
1479 Py_RETURN_NONE;
1480}
1481
1482static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001483element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001485 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001486 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001487 PyObject* old;
1488
1489 if (!self->extra || index < 0 || index >= self->extra->length) {
1490 PyErr_SetString(
1491 PyExc_IndexError,
1492 "child assignment index out of range");
1493 return -1;
1494 }
1495
1496 old = self->extra->children[index];
1497
1498 if (item) {
1499 Py_INCREF(item);
1500 self->extra->children[index] = item;
1501 } else {
1502 self->extra->length--;
1503 for (i = index; i < self->extra->length; i++)
1504 self->extra->children[i] = self->extra->children[i+1];
1505 }
1506
1507 Py_DECREF(old);
1508
1509 return 0;
1510}
1511
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001512static PyObject*
1513element_subscr(PyObject* self_, PyObject* item)
1514{
1515 ElementObject* self = (ElementObject*) self_;
1516
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001517 if (PyIndex_Check(item)) {
1518 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001519
1520 if (i == -1 && PyErr_Occurred()) {
1521 return NULL;
1522 }
1523 if (i < 0 && self->extra)
1524 i += self->extra->length;
1525 return element_getitem(self_, i);
1526 }
1527 else if (PySlice_Check(item)) {
1528 Py_ssize_t start, stop, step, slicelen, cur, i;
1529 PyObject* list;
1530
1531 if (!self->extra)
1532 return PyList_New(0);
1533
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001534 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001535 self->extra->length,
1536 &start, &stop, &step, &slicelen) < 0) {
1537 return NULL;
1538 }
1539
1540 if (slicelen <= 0)
1541 return PyList_New(0);
1542 else {
1543 list = PyList_New(slicelen);
1544 if (!list)
1545 return NULL;
1546
1547 for (cur = start, i = 0; i < slicelen;
1548 cur += step, i++) {
1549 PyObject* item = self->extra->children[cur];
1550 Py_INCREF(item);
1551 PyList_SET_ITEM(list, i, item);
1552 }
1553
1554 return list;
1555 }
1556 }
1557 else {
1558 PyErr_SetString(PyExc_TypeError,
1559 "element indices must be integers");
1560 return NULL;
1561 }
1562}
1563
1564static int
1565element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1566{
1567 ElementObject* self = (ElementObject*) self_;
1568
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001569 if (PyIndex_Check(item)) {
1570 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001571
1572 if (i == -1 && PyErr_Occurred()) {
1573 return -1;
1574 }
1575 if (i < 0 && self->extra)
1576 i += self->extra->length;
1577 return element_setitem(self_, i, value);
1578 }
1579 else if (PySlice_Check(item)) {
1580 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1581
1582 PyObject* recycle = NULL;
1583 PyObject* seq = NULL;
1584
Victor Stinner5f0af232013-07-11 23:01:36 +02001585 if (!self->extra) {
1586 if (create_extra(self, NULL) < 0)
1587 return -1;
1588 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001589
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001590 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001591 self->extra->length,
1592 &start, &stop, &step, &slicelen) < 0) {
1593 return -1;
1594 }
1595
Eli Bendersky865756a2012-03-09 13:38:15 +02001596 if (value == NULL) {
1597 /* Delete slice */
1598 size_t cur;
1599 Py_ssize_t i;
1600
1601 if (slicelen <= 0)
1602 return 0;
1603
1604 /* Since we're deleting, the direction of the range doesn't matter,
1605 * so for simplicity make it always ascending.
1606 */
1607 if (step < 0) {
1608 stop = start + 1;
1609 start = stop + step * (slicelen - 1) - 1;
1610 step = -step;
1611 }
1612
1613 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1614
1615 /* recycle is a list that will contain all the children
1616 * scheduled for removal.
1617 */
1618 if (!(recycle = PyList_New(slicelen))) {
1619 PyErr_NoMemory();
1620 return -1;
1621 }
1622
1623 /* This loop walks over all the children that have to be deleted,
1624 * with cur pointing at them. num_moved is the amount of children
1625 * until the next deleted child that have to be "shifted down" to
1626 * occupy the deleted's places.
1627 * Note that in the ith iteration, shifting is done i+i places down
1628 * because i children were already removed.
1629 */
1630 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1631 /* Compute how many children have to be moved, clipping at the
1632 * list end.
1633 */
1634 Py_ssize_t num_moved = step - 1;
1635 if (cur + step >= (size_t)self->extra->length) {
1636 num_moved = self->extra->length - cur - 1;
1637 }
1638
1639 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1640
1641 memmove(
1642 self->extra->children + cur - i,
1643 self->extra->children + cur + 1,
1644 num_moved * sizeof(PyObject *));
1645 }
1646
1647 /* Leftover "tail" after the last removed child */
1648 cur = start + (size_t)slicelen * step;
1649 if (cur < (size_t)self->extra->length) {
1650 memmove(
1651 self->extra->children + cur - slicelen,
1652 self->extra->children + cur,
1653 (self->extra->length - cur) * sizeof(PyObject *));
1654 }
1655
1656 self->extra->length -= slicelen;
1657
1658 /* Discard the recycle list with all the deleted sub-elements */
1659 Py_XDECREF(recycle);
1660 return 0;
1661 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001662 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001663 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001664 seq = PySequence_Fast(value, "");
1665 if (!seq) {
1666 PyErr_Format(
1667 PyExc_TypeError,
1668 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1669 );
1670 return -1;
1671 }
1672 newlen = PySequence_Size(seq);
1673 }
1674
1675 if (step != 1 && newlen != slicelen)
1676 {
1677 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001678 "attempt to assign sequence of size %zd "
1679 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001680 newlen, slicelen
1681 );
1682 return -1;
1683 }
1684
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001685 /* Resize before creating the recycle bin, to prevent refleaks. */
1686 if (newlen > slicelen) {
1687 if (element_resize(self, newlen - slicelen) < 0) {
1688 if (seq) {
1689 Py_DECREF(seq);
1690 }
1691 return -1;
1692 }
1693 }
1694
1695 if (slicelen > 0) {
1696 /* to avoid recursive calls to this method (via decref), move
1697 old items to the recycle bin here, and get rid of them when
1698 we're done modifying the element */
1699 recycle = PyList_New(slicelen);
1700 if (!recycle) {
1701 if (seq) {
1702 Py_DECREF(seq);
1703 }
1704 return -1;
1705 }
1706 for (cur = start, i = 0; i < slicelen;
1707 cur += step, i++)
1708 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1709 }
1710
1711 if (newlen < slicelen) {
1712 /* delete slice */
1713 for (i = stop; i < self->extra->length; i++)
1714 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1715 } else if (newlen > slicelen) {
1716 /* insert slice */
1717 for (i = self->extra->length-1; i >= stop; i--)
1718 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1719 }
1720
1721 /* replace the slice */
1722 for (cur = start, i = 0; i < newlen;
1723 cur += step, i++) {
1724 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1725 Py_INCREF(element);
1726 self->extra->children[cur] = element;
1727 }
1728
1729 self->extra->length += newlen - slicelen;
1730
1731 if (seq) {
1732 Py_DECREF(seq);
1733 }
1734
1735 /* discard the recycle bin, and everything in it */
1736 Py_XDECREF(recycle);
1737
1738 return 0;
1739 }
1740 else {
1741 PyErr_SetString(PyExc_TypeError,
1742 "element indices must be integers");
1743 return -1;
1744 }
1745}
1746
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001747static PyMethodDef element_methods[] = {
1748
Eli Bendersky0192ba32012-03-30 16:38:33 +03001749 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001750
Eli Benderskya8736902013-01-05 06:26:39 -08001751 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001752 {"set", (PyCFunction) element_set, METH_VARARGS},
1753
Eli Bendersky737b1732012-05-29 06:02:56 +03001754 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1755 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1756 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001757
1758 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001759 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001760 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1761 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1762
Eli Benderskya8736902013-01-05 06:26:39 -08001763 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001764 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001765 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001766
Eli Benderskya8736902013-01-05 06:26:39 -08001767 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1769
1770 {"items", (PyCFunction) element_items, METH_VARARGS},
1771 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1772
1773 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1774
1775 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1776 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001777 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001778 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1779 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001780
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001781 {NULL, NULL}
1782};
1783
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001784static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001785element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001786{
1787 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001788 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001790 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001791 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001792
Alexander Belopolskye239d232010-12-08 23:31:48 +00001793 if (name == NULL)
1794 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001795
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796 /* handle common attributes first */
1797 if (strcmp(name, "tag") == 0) {
1798 res = self->tag;
1799 Py_INCREF(res);
1800 return res;
1801 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001802 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001803 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001804 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805 }
1806
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001807 /* methods */
1808 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1809 if (res)
1810 return res;
1811
1812 /* less common attributes */
1813 if (strcmp(name, "tail") == 0) {
1814 PyErr_Clear();
1815 res = element_get_tail(self);
1816 } else if (strcmp(name, "attrib") == 0) {
1817 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001818 if (!self->extra) {
1819 if (create_extra(self, NULL) < 0)
1820 return NULL;
1821 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001822 res = element_get_attrib(self);
1823 }
1824
1825 if (!res)
1826 return NULL;
1827
1828 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001829 return res;
1830}
1831
Eli Benderskyef9683b2013-05-18 07:52:34 -07001832static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001833element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001834{
Eli Benderskyb20df952012-05-20 06:33:29 +03001835 char *name = "";
1836 if (PyUnicode_Check(nameobj))
1837 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001838 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001839 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001840
1841 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 Py_DECREF(self->tag);
1843 self->tag = value;
1844 Py_INCREF(self->tag);
1845 } else if (strcmp(name, "text") == 0) {
1846 Py_DECREF(JOIN_OBJ(self->text));
1847 self->text = value;
1848 Py_INCREF(self->text);
1849 } else if (strcmp(name, "tail") == 0) {
1850 Py_DECREF(JOIN_OBJ(self->tail));
1851 self->tail = value;
1852 Py_INCREF(self->tail);
1853 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001854 if (!self->extra) {
1855 if (create_extra(self, NULL) < 0)
1856 return -1;
1857 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001858 Py_DECREF(self->extra->attrib);
1859 self->extra->attrib = value;
1860 Py_INCREF(self->extra->attrib);
1861 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001862 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001863 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001864 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001865 }
1866
Eli Benderskyef9683b2013-05-18 07:52:34 -07001867 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001868}
1869
1870static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001871 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001872 0, /* sq_concat */
1873 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001874 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001875 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001876 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001877 0,
1878};
1879
1880static PyMappingMethods element_as_mapping = {
1881 (lenfunc) element_length,
1882 (binaryfunc) element_subscr,
1883 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001884};
1885
Neal Norwitz227b5332006-03-22 09:28:35 +00001886static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001887 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001888 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001889 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001890 (destructor)element_dealloc, /* tp_dealloc */
1891 0, /* tp_print */
1892 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001893 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001894 0, /* tp_reserved */
1895 (reprfunc)element_repr, /* tp_repr */
1896 0, /* tp_as_number */
1897 &element_as_sequence, /* tp_as_sequence */
1898 &element_as_mapping, /* tp_as_mapping */
1899 0, /* tp_hash */
1900 0, /* tp_call */
1901 0, /* tp_str */
1902 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001903 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001904 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001905 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1906 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001907 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001908 (traverseproc)element_gc_traverse, /* tp_traverse */
1909 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001910 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001911 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001912 0, /* tp_iter */
1913 0, /* tp_iternext */
1914 element_methods, /* tp_methods */
1915 0, /* tp_members */
1916 0, /* tp_getset */
1917 0, /* tp_base */
1918 0, /* tp_dict */
1919 0, /* tp_descr_get */
1920 0, /* tp_descr_set */
1921 0, /* tp_dictoffset */
1922 (initproc)element_init, /* tp_init */
1923 PyType_GenericAlloc, /* tp_alloc */
1924 element_new, /* tp_new */
1925 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001926};
1927
Eli Bendersky64d11e62012-06-15 07:42:50 +03001928/******************************* Element iterator ****************************/
1929
1930/* ElementIterObject represents the iteration state over an XML element in
1931 * pre-order traversal. To keep track of which sub-element should be returned
1932 * next, a stack of parents is maintained. This is a standard stack-based
1933 * iterative pre-order traversal of a tree.
1934 * The stack is managed using a single-linked list starting at parent_stack.
1935 * Each stack node contains the saved parent to which we should return after
1936 * the current one is exhausted, and the next child to examine in that parent.
1937 */
1938typedef struct ParentLocator_t {
1939 ElementObject *parent;
1940 Py_ssize_t child_index;
1941 struct ParentLocator_t *next;
1942} ParentLocator;
1943
1944typedef struct {
1945 PyObject_HEAD
1946 ParentLocator *parent_stack;
1947 ElementObject *root_element;
1948 PyObject *sought_tag;
1949 int root_done;
1950 int gettext;
1951} ElementIterObject;
1952
1953
1954static void
1955elementiter_dealloc(ElementIterObject *it)
1956{
1957 ParentLocator *p = it->parent_stack;
1958 while (p) {
1959 ParentLocator *temp = p;
1960 Py_XDECREF(p->parent);
1961 p = p->next;
1962 PyObject_Free(temp);
1963 }
1964
1965 Py_XDECREF(it->sought_tag);
1966 Py_XDECREF(it->root_element);
1967
1968 PyObject_GC_UnTrack(it);
1969 PyObject_GC_Del(it);
1970}
1971
1972static int
1973elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1974{
1975 ParentLocator *p = it->parent_stack;
1976 while (p) {
1977 Py_VISIT(p->parent);
1978 p = p->next;
1979 }
1980
1981 Py_VISIT(it->root_element);
1982 Py_VISIT(it->sought_tag);
1983 return 0;
1984}
1985
1986/* Helper function for elementiter_next. Add a new parent to the parent stack.
1987 */
1988static ParentLocator *
1989parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1990{
1991 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1992 if (new_node) {
1993 new_node->parent = parent;
1994 Py_INCREF(parent);
1995 new_node->child_index = 0;
1996 new_node->next = stack;
1997 }
1998 return new_node;
1999}
2000
2001static PyObject *
2002elementiter_next(ElementIterObject *it)
2003{
2004 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002005 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002006 * A short note on gettext: this function serves both the iter() and
2007 * itertext() methods to avoid code duplication. However, there are a few
2008 * small differences in the way these iterations work. Namely:
2009 * - itertext() only yields text from nodes that have it, and continues
2010 * iterating when a node doesn't have text (so it doesn't return any
2011 * node like iter())
2012 * - itertext() also has to handle tail, after finishing with all the
2013 * children of a node.
2014 */
Eli Bendersky113da642012-06-15 07:52:49 +03002015 ElementObject *cur_parent;
2016 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002017
2018 while (1) {
2019 /* Handle the case reached in the beginning and end of iteration, where
2020 * the parent stack is empty. The root_done flag gives us indication
2021 * whether we've just started iterating (so root_done is 0), in which
2022 * case the root is returned. If root_done is 1 and we're here, the
2023 * iterator is exhausted.
2024 */
2025 if (!it->parent_stack->parent) {
2026 if (it->root_done) {
2027 PyErr_SetNone(PyExc_StopIteration);
2028 return NULL;
2029 } else {
2030 it->parent_stack = parent_stack_push_new(it->parent_stack,
2031 it->root_element);
2032 if (!it->parent_stack) {
2033 PyErr_NoMemory();
2034 return NULL;
2035 }
2036
2037 it->root_done = 1;
2038 if (it->sought_tag == Py_None ||
2039 PyObject_RichCompareBool(it->root_element->tag,
2040 it->sought_tag, Py_EQ) == 1) {
2041 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002042 PyObject *text = element_get_text(it->root_element);
2043 if (!text)
2044 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002045 if (PyObject_IsTrue(text)) {
2046 Py_INCREF(text);
2047 return text;
2048 }
2049 } else {
2050 Py_INCREF(it->root_element);
2051 return (PyObject *)it->root_element;
2052 }
2053 }
2054 }
2055 }
2056
2057 /* See if there are children left to traverse in the current parent. If
2058 * yes, visit the next child. If not, pop the stack and try again.
2059 */
Eli Bendersky113da642012-06-15 07:52:49 +03002060 cur_parent = it->parent_stack->parent;
2061 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002062 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2063 ElementObject *child = (ElementObject *)
2064 cur_parent->extra->children[child_index];
2065 it->parent_stack->child_index++;
2066 it->parent_stack = parent_stack_push_new(it->parent_stack,
2067 child);
2068 if (!it->parent_stack) {
2069 PyErr_NoMemory();
2070 return NULL;
2071 }
2072
2073 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002074 PyObject *text = element_get_text(child);
2075 if (!text)
2076 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002077 if (PyObject_IsTrue(text)) {
2078 Py_INCREF(text);
2079 return text;
2080 }
2081 } else if (it->sought_tag == Py_None ||
2082 PyObject_RichCompareBool(child->tag,
2083 it->sought_tag, Py_EQ) == 1) {
2084 Py_INCREF(child);
2085 return (PyObject *)child;
2086 }
2087 else
2088 continue;
2089 }
2090 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002091 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002092 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002093 if (it->gettext) {
2094 tail = element_get_tail(cur_parent);
2095 if (!tail)
2096 return NULL;
2097 }
2098 else
2099 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002100 Py_XDECREF(it->parent_stack->parent);
2101 PyObject_Free(it->parent_stack);
2102 it->parent_stack = next;
2103
2104 /* Note that extra condition on it->parent_stack->parent here;
2105 * this is because itertext() is supposed to only return *inner*
2106 * text, not text following the element it began iteration with.
2107 */
2108 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2109 Py_INCREF(tail);
2110 return tail;
2111 }
2112 }
2113 }
2114
2115 return NULL;
2116}
2117
2118
2119static PyTypeObject ElementIter_Type = {
2120 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002121 /* Using the module's name since the pure-Python implementation does not
2122 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002123 "_elementtree._element_iterator", /* tp_name */
2124 sizeof(ElementIterObject), /* tp_basicsize */
2125 0, /* tp_itemsize */
2126 /* methods */
2127 (destructor)elementiter_dealloc, /* tp_dealloc */
2128 0, /* tp_print */
2129 0, /* tp_getattr */
2130 0, /* tp_setattr */
2131 0, /* tp_reserved */
2132 0, /* tp_repr */
2133 0, /* tp_as_number */
2134 0, /* tp_as_sequence */
2135 0, /* tp_as_mapping */
2136 0, /* tp_hash */
2137 0, /* tp_call */
2138 0, /* tp_str */
2139 0, /* tp_getattro */
2140 0, /* tp_setattro */
2141 0, /* tp_as_buffer */
2142 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2143 0, /* tp_doc */
2144 (traverseproc)elementiter_traverse, /* tp_traverse */
2145 0, /* tp_clear */
2146 0, /* tp_richcompare */
2147 0, /* tp_weaklistoffset */
2148 PyObject_SelfIter, /* tp_iter */
2149 (iternextfunc)elementiter_next, /* tp_iternext */
2150 0, /* tp_methods */
2151 0, /* tp_members */
2152 0, /* tp_getset */
2153 0, /* tp_base */
2154 0, /* tp_dict */
2155 0, /* tp_descr_get */
2156 0, /* tp_descr_set */
2157 0, /* tp_dictoffset */
2158 0, /* tp_init */
2159 0, /* tp_alloc */
2160 0, /* tp_new */
2161};
2162
2163
2164static PyObject *
2165create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2166{
2167 ElementIterObject *it;
2168 PyObject *star = NULL;
2169
2170 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2171 if (!it)
2172 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002173
2174 if (PyUnicode_Check(tag))
2175 star = PyUnicode_FromString("*");
2176 else if (PyBytes_Check(tag))
2177 star = PyBytes_FromString("*");
2178
2179 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2180 tag = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002181 Py_XDECREF(star);
Victor Stinner4d463432013-07-11 23:05:03 +02002182
2183 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002184 it->sought_tag = tag;
2185 it->root_done = 0;
2186 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002187 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 it->root_element = self;
2189
Eli Bendersky64d11e62012-06-15 07:42:50 +03002190 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002191
2192 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2193 if (it->parent_stack == NULL) {
2194 Py_DECREF(it);
2195 PyErr_NoMemory();
2196 return NULL;
2197 }
2198 it->parent_stack->parent = NULL;
2199 it->parent_stack->child_index = 0;
2200 it->parent_stack->next = NULL;
2201
Eli Bendersky64d11e62012-06-15 07:42:50 +03002202 return (PyObject *)it;
2203}
2204
2205
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002206/* ==================================================================== */
2207/* the tree builder type */
2208
2209typedef struct {
2210 PyObject_HEAD
2211
Eli Bendersky58d548d2012-05-29 15:45:16 +03002212 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002213
Antoine Pitrouee329312012-10-04 19:53:29 +02002214 PyObject *this; /* current node */
2215 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002216
Eli Bendersky58d548d2012-05-29 15:45:16 +03002217 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002218
Eli Bendersky58d548d2012-05-29 15:45:16 +03002219 PyObject *stack; /* element stack */
2220 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002221
Eli Bendersky48d358b2012-05-30 17:57:50 +03002222 PyObject *element_factory;
2223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002224 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002225 PyObject *events; /* list of events, or NULL if not collecting */
2226 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2227 PyObject *end_event_obj;
2228 PyObject *start_ns_event_obj;
2229 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002230} TreeBuilderObject;
2231
Christian Heimes90aa7642007-12-19 02:45:37 +00002232#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002233
2234/* -------------------------------------------------------------------- */
2235/* constructor and destructor */
2236
Eli Bendersky58d548d2012-05-29 15:45:16 +03002237static PyObject *
2238treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002239{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002240 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2241 if (t != NULL) {
2242 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002243
Eli Bendersky58d548d2012-05-29 15:45:16 +03002244 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002245 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002246 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002247 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248
Eli Bendersky58d548d2012-05-29 15:45:16 +03002249 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002250 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002251 t->stack = PyList_New(20);
2252 if (!t->stack) {
2253 Py_DECREF(t->this);
2254 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002255 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002256 return NULL;
2257 }
2258 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002259
Eli Bendersky58d548d2012-05-29 15:45:16 +03002260 t->events = NULL;
2261 t->start_event_obj = t->end_event_obj = NULL;
2262 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2263 }
2264 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002265}
2266
Eli Bendersky58d548d2012-05-29 15:45:16 +03002267static int
2268treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002269{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002270 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002271 PyObject *element_factory = NULL;
2272 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002273 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002274
2275 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2276 &element_factory)) {
2277 return -1;
2278 }
2279
2280 if (element_factory) {
2281 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002282 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002283 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002284 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002285 }
2286
Eli Bendersky58d548d2012-05-29 15:45:16 +03002287 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288}
2289
Eli Bendersky48d358b2012-05-30 17:57:50 +03002290static int
2291treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2292{
2293 Py_VISIT(self->root);
2294 Py_VISIT(self->this);
2295 Py_VISIT(self->last);
2296 Py_VISIT(self->data);
2297 Py_VISIT(self->stack);
2298 Py_VISIT(self->element_factory);
2299 return 0;
2300}
2301
2302static int
2303treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002304{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002305 Py_CLEAR(self->end_ns_event_obj);
2306 Py_CLEAR(self->start_ns_event_obj);
2307 Py_CLEAR(self->end_event_obj);
2308 Py_CLEAR(self->start_event_obj);
2309 Py_CLEAR(self->events);
2310 Py_CLEAR(self->stack);
2311 Py_CLEAR(self->data);
2312 Py_CLEAR(self->last);
2313 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002314 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002315 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002316 return 0;
2317}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002318
Eli Bendersky48d358b2012-05-30 17:57:50 +03002319static void
2320treebuilder_dealloc(TreeBuilderObject *self)
2321{
2322 PyObject_GC_UnTrack(self);
2323 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002324 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325}
2326
2327/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002328/* helpers for handling of arbitrary element-like objects */
2329
2330static int
2331treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2332 PyObject **dest, _Py_Identifier *name)
2333{
2334 if (Element_CheckExact(element)) {
2335 Py_DECREF(JOIN_OBJ(*dest));
2336 *dest = JOIN_SET(data, PyList_CheckExact(data));
2337 return 0;
2338 }
2339 else {
2340 PyObject *joined = list_join(data);
2341 int r;
2342 if (joined == NULL)
2343 return -1;
2344 r = _PyObject_SetAttrId(element, name, joined);
2345 Py_DECREF(joined);
2346 return r;
2347 }
2348}
2349
2350/* These two functions steal a reference to data */
2351static int
2352treebuilder_set_element_text(PyObject *element, PyObject *data)
2353{
2354 _Py_IDENTIFIER(text);
2355 return treebuilder_set_element_text_or_tail(
2356 element, data, &((ElementObject *) element)->text, &PyId_text);
2357}
2358
2359static int
2360treebuilder_set_element_tail(PyObject *element, PyObject *data)
2361{
2362 _Py_IDENTIFIER(tail);
2363 return treebuilder_set_element_text_or_tail(
2364 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2365}
2366
2367static int
2368treebuilder_add_subelement(PyObject *element, PyObject *child)
2369{
2370 _Py_IDENTIFIER(append);
2371 if (Element_CheckExact(element)) {
2372 ElementObject *elem = (ElementObject *) element;
2373 return element_add_subelement(elem, child);
2374 }
2375 else {
2376 PyObject *res;
2377 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2378 if (res == NULL)
2379 return -1;
2380 Py_DECREF(res);
2381 return 0;
2382 }
2383}
2384
2385/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386/* handlers */
2387
2388LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2390 PyObject* attrib)
2391{
2392 PyObject* node;
2393 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002394 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395
2396 if (self->data) {
2397 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002398 if (treebuilder_set_element_text(self->last, self->data))
2399 return NULL;
2400 }
2401 else {
2402 if (treebuilder_set_element_tail(self->last, self->data))
2403 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404 }
2405 self->data = NULL;
2406 }
2407
Eli Bendersky08231a92013-05-18 15:47:16 -07002408 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002409 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2410 } else {
2411 node = create_new_element(tag, attrib);
2412 }
2413 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002415 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002416
Antoine Pitrouee329312012-10-04 19:53:29 +02002417 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418
2419 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002420 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002421 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422 } else {
2423 if (self->root) {
2424 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002425 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426 "multiple elements on top level"
2427 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002428 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429 }
2430 Py_INCREF(node);
2431 self->root = node;
2432 }
2433
2434 if (self->index < PyList_GET_SIZE(self->stack)) {
2435 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002436 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002437 Py_INCREF(this);
2438 } else {
2439 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002440 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441 }
2442 self->index++;
2443
2444 Py_DECREF(this);
2445 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002446 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002447
2448 Py_DECREF(self->last);
2449 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002450 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002451
2452 if (self->start_event_obj) {
2453 PyObject* res;
2454 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002455 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002456 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 PyList_Append(self->events, res);
2458 Py_DECREF(res);
2459 } else
2460 PyErr_Clear(); /* FIXME: propagate error */
2461 }
2462
2463 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002464
2465 error:
2466 Py_DECREF(node);
2467 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002468}
2469
2470LOCAL(PyObject*)
2471treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2472{
2473 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002474 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002475 /* ignore calls to data before the first call to start */
2476 Py_RETURN_NONE;
2477 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002478 /* store the first item as is */
2479 Py_INCREF(data); self->data = data;
2480 } else {
2481 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002482 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2483 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002484 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002485 /* expat often generates single character data sections; handle
2486 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002487 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2488 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002490 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 } else if (PyList_CheckExact(self->data)) {
2492 if (PyList_Append(self->data, data) < 0)
2493 return NULL;
2494 } else {
2495 PyObject* list = PyList_New(2);
2496 if (!list)
2497 return NULL;
2498 PyList_SET_ITEM(list, 0, self->data);
2499 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2500 self->data = list;
2501 }
2502 }
2503
2504 Py_RETURN_NONE;
2505}
2506
2507LOCAL(PyObject*)
2508treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2509{
2510 PyObject* item;
2511
2512 if (self->data) {
2513 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002514 if (treebuilder_set_element_text(self->last, self->data))
2515 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002517 if (treebuilder_set_element_tail(self->last, self->data))
2518 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 }
2520 self->data = NULL;
2521 }
2522
2523 if (self->index == 0) {
2524 PyErr_SetString(
2525 PyExc_IndexError,
2526 "pop from empty stack"
2527 );
2528 return NULL;
2529 }
2530
2531 self->index--;
2532
2533 item = PyList_GET_ITEM(self->stack, self->index);
2534 Py_INCREF(item);
2535
2536 Py_DECREF(self->last);
2537
Antoine Pitrouee329312012-10-04 19:53:29 +02002538 self->last = self->this;
2539 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540
2541 if (self->end_event_obj) {
2542 PyObject* res;
2543 PyObject* action = self->end_event_obj;
2544 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002545 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547 PyList_Append(self->events, res);
2548 Py_DECREF(res);
2549 } else
2550 PyErr_Clear(); /* FIXME: propagate error */
2551 }
2552
2553 Py_INCREF(self->last);
2554 return (PyObject*) self->last;
2555}
2556
2557LOCAL(void)
2558treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002559 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560{
2561 PyObject* res;
2562 PyObject* action;
2563 PyObject* parcel;
2564
2565 if (!self->events)
2566 return;
2567
2568 if (start) {
2569 if (!self->start_ns_event_obj)
2570 return;
2571 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002572 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 if (!parcel)
2574 return;
2575 Py_INCREF(action);
2576 } else {
2577 if (!self->end_ns_event_obj)
2578 return;
2579 action = self->end_ns_event_obj;
2580 Py_INCREF(action);
2581 parcel = Py_None;
2582 Py_INCREF(parcel);
2583 }
2584
2585 res = PyTuple_New(2);
2586
2587 if (res) {
2588 PyTuple_SET_ITEM(res, 0, action);
2589 PyTuple_SET_ITEM(res, 1, parcel);
2590 PyList_Append(self->events, res);
2591 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002592 }
2593 else {
2594 Py_DECREF(action);
2595 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598}
2599
2600/* -------------------------------------------------------------------- */
2601/* methods (in alphabetical order) */
2602
2603static PyObject*
2604treebuilder_data(TreeBuilderObject* self, PyObject* args)
2605{
2606 PyObject* data;
2607 if (!PyArg_ParseTuple(args, "O:data", &data))
2608 return NULL;
2609
2610 return treebuilder_handle_data(self, data);
2611}
2612
2613static PyObject*
2614treebuilder_end(TreeBuilderObject* self, PyObject* args)
2615{
2616 PyObject* tag;
2617 if (!PyArg_ParseTuple(args, "O:end", &tag))
2618 return NULL;
2619
2620 return treebuilder_handle_end(self, tag);
2621}
2622
2623LOCAL(PyObject*)
2624treebuilder_done(TreeBuilderObject* self)
2625{
2626 PyObject* res;
2627
2628 /* FIXME: check stack size? */
2629
2630 if (self->root)
2631 res = self->root;
2632 else
2633 res = Py_None;
2634
2635 Py_INCREF(res);
2636 return res;
2637}
2638
2639static PyObject*
2640treebuilder_close(TreeBuilderObject* self, PyObject* args)
2641{
2642 if (!PyArg_ParseTuple(args, ":close"))
2643 return NULL;
2644
2645 return treebuilder_done(self);
2646}
2647
2648static PyObject*
2649treebuilder_start(TreeBuilderObject* self, PyObject* args)
2650{
2651 PyObject* tag;
2652 PyObject* attrib = Py_None;
2653 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2654 return NULL;
2655
2656 return treebuilder_handle_start(self, tag, attrib);
2657}
2658
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002659static PyMethodDef treebuilder_methods[] = {
2660 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2661 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2662 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2664 {NULL, NULL}
2665};
2666
Neal Norwitz227b5332006-03-22 09:28:35 +00002667static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002668 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002669 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002670 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002671 (destructor)treebuilder_dealloc, /* tp_dealloc */
2672 0, /* tp_print */
2673 0, /* tp_getattr */
2674 0, /* tp_setattr */
2675 0, /* tp_reserved */
2676 0, /* tp_repr */
2677 0, /* tp_as_number */
2678 0, /* tp_as_sequence */
2679 0, /* tp_as_mapping */
2680 0, /* tp_hash */
2681 0, /* tp_call */
2682 0, /* tp_str */
2683 0, /* tp_getattro */
2684 0, /* tp_setattro */
2685 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002686 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2687 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002688 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002689 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2690 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002691 0, /* tp_richcompare */
2692 0, /* tp_weaklistoffset */
2693 0, /* tp_iter */
2694 0, /* tp_iternext */
2695 treebuilder_methods, /* tp_methods */
2696 0, /* tp_members */
2697 0, /* tp_getset */
2698 0, /* tp_base */
2699 0, /* tp_dict */
2700 0, /* tp_descr_get */
2701 0, /* tp_descr_set */
2702 0, /* tp_dictoffset */
2703 (initproc)treebuilder_init, /* tp_init */
2704 PyType_GenericAlloc, /* tp_alloc */
2705 treebuilder_new, /* tp_new */
2706 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707};
2708
2709/* ==================================================================== */
2710/* the expat interface */
2711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002714
2715/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2716 * cached globally without being in per-module state.
2717 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002718static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720
Eli Bendersky52467b12012-06-01 07:13:08 +03002721static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2722 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2723
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724typedef struct {
2725 PyObject_HEAD
2726
2727 XML_Parser parser;
2728
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002729 PyObject *target;
2730 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002732 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002734 PyObject *handle_start;
2735 PyObject *handle_data;
2736 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002738 PyObject *handle_comment;
2739 PyObject *handle_pi;
2740 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002742 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002743
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744} XMLParserObject;
2745
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002746#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748/* helpers */
2749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750LOCAL(PyObject*)
2751makeuniversal(XMLParserObject* self, const char* string)
2752{
2753 /* convert a UTF-8 tag/attribute name from the expat parser
2754 to a universal name string */
2755
Antoine Pitrouc1948842012-10-01 23:40:37 +02002756 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757 PyObject* key;
2758 PyObject* value;
2759
2760 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002761 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 if (!key)
2763 return NULL;
2764
2765 value = PyDict_GetItem(self->names, key);
2766
2767 if (value) {
2768 Py_INCREF(value);
2769 } else {
2770 /* new name. convert to universal name, and decode as
2771 necessary */
2772
2773 PyObject* tag;
2774 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002775 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776
2777 /* look for namespace separator */
2778 for (i = 0; i < size; i++)
2779 if (string[i] == '}')
2780 break;
2781 if (i != size) {
2782 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002783 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002784 if (tag == NULL) {
2785 Py_DECREF(key);
2786 return NULL;
2787 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002788 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 p[0] = '{';
2790 memcpy(p+1, string, size);
2791 size++;
2792 } else {
2793 /* plain name; use key as tag */
2794 Py_INCREF(key);
2795 tag = key;
2796 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002797
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002799 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002800 value = PyUnicode_DecodeUTF8(p, size, "strict");
2801 Py_DECREF(tag);
2802 if (!value) {
2803 Py_DECREF(key);
2804 return NULL;
2805 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002806
2807 /* add to names dictionary */
2808 if (PyDict_SetItem(self->names, key, value) < 0) {
2809 Py_DECREF(key);
2810 Py_DECREF(value);
2811 return NULL;
2812 }
2813 }
2814
2815 Py_DECREF(key);
2816 return value;
2817}
2818
Eli Bendersky5b77d812012-03-16 08:20:05 +02002819/* Set the ParseError exception with the given parameters.
2820 * If message is not NULL, it's used as the error string. Otherwise, the
2821 * message string is the default for the given error_code.
2822*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002823static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002824expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2825 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002827 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002828 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002829
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002830 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002831 message ? message : EXPAT(ErrorString)(error_code),
2832 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002833 if (errmsg == NULL)
2834 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002835
Eli Bendersky532d03e2013-08-10 08:00:39 -07002836 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002837 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002838 if (!error)
2839 return;
2840
Eli Bendersky5b77d812012-03-16 08:20:05 +02002841 /* Add code and position attributes */
2842 code = PyLong_FromLong((long)error_code);
2843 if (!code) {
2844 Py_DECREF(error);
2845 return;
2846 }
2847 if (PyObject_SetAttrString(error, "code", code) == -1) {
2848 Py_DECREF(error);
2849 Py_DECREF(code);
2850 return;
2851 }
2852 Py_DECREF(code);
2853
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002854 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002855 if (!position) {
2856 Py_DECREF(error);
2857 return;
2858 }
2859 if (PyObject_SetAttrString(error, "position", position) == -1) {
2860 Py_DECREF(error);
2861 Py_DECREF(position);
2862 return;
2863 }
2864 Py_DECREF(position);
2865
Eli Bendersky532d03e2013-08-10 08:00:39 -07002866 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002867 Py_DECREF(error);
2868}
2869
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002870/* -------------------------------------------------------------------- */
2871/* handlers */
2872
2873static void
2874expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2875 int data_len)
2876{
2877 PyObject* key;
2878 PyObject* value;
2879 PyObject* res;
2880
2881 if (data_len < 2 || data_in[0] != '&')
2882 return;
2883
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002884 if (PyErr_Occurred())
2885 return;
2886
Neal Norwitz0269b912007-08-08 06:56:02 +00002887 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002888 if (!key)
2889 return;
2890
2891 value = PyDict_GetItem(self->entity, key);
2892
2893 if (value) {
2894 if (TreeBuilder_CheckExact(self->target))
2895 res = treebuilder_handle_data(
2896 (TreeBuilderObject*) self->target, value
2897 );
2898 else if (self->handle_data)
2899 res = PyObject_CallFunction(self->handle_data, "O", value);
2900 else
2901 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002903 } else if (!PyErr_Occurred()) {
2904 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002905 char message[128] = "undefined entity ";
2906 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002907 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002908 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002910 EXPAT(GetErrorColumnNumber)(self->parser),
2911 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002912 );
2913 }
2914
2915 Py_DECREF(key);
2916}
2917
2918static void
2919expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2920 const XML_Char **attrib_in)
2921{
2922 PyObject* res;
2923 PyObject* tag;
2924 PyObject* attrib;
2925 int ok;
2926
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002927 if (PyErr_Occurred())
2928 return;
2929
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930 /* tag name */
2931 tag = makeuniversal(self, tag_in);
2932 if (!tag)
2933 return; /* parser will look for errors */
2934
2935 /* attributes */
2936 if (attrib_in[0]) {
2937 attrib = PyDict_New();
2938 if (!attrib)
2939 return;
2940 while (attrib_in[0] && attrib_in[1]) {
2941 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002942 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 if (!key || !value) {
2944 Py_XDECREF(value);
2945 Py_XDECREF(key);
2946 Py_DECREF(attrib);
2947 return;
2948 }
2949 ok = PyDict_SetItem(attrib, key, value);
2950 Py_DECREF(value);
2951 Py_DECREF(key);
2952 if (ok < 0) {
2953 Py_DECREF(attrib);
2954 return;
2955 }
2956 attrib_in += 2;
2957 }
2958 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002959 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002960 attrib = PyDict_New();
2961 if (!attrib)
2962 return;
2963 }
2964
2965 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 /* shortcut */
2967 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2968 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002969 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002970 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002972 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973 res = NULL;
2974
2975 Py_DECREF(tag);
2976 Py_DECREF(attrib);
2977
2978 Py_XDECREF(res);
2979}
2980
2981static void
2982expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2983 int data_len)
2984{
2985 PyObject* data;
2986 PyObject* res;
2987
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002988 if (PyErr_Occurred())
2989 return;
2990
Neal Norwitz0269b912007-08-08 06:56:02 +00002991 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002992 if (!data)
2993 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994
2995 if (TreeBuilder_CheckExact(self->target))
2996 /* shortcut */
2997 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2998 else if (self->handle_data)
2999 res = PyObject_CallFunction(self->handle_data, "O", data);
3000 else
3001 res = NULL;
3002
3003 Py_DECREF(data);
3004
3005 Py_XDECREF(res);
3006}
3007
3008static void
3009expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3010{
3011 PyObject* tag;
3012 PyObject* res = NULL;
3013
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003014 if (PyErr_Occurred())
3015 return;
3016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017 if (TreeBuilder_CheckExact(self->target))
3018 /* shortcut */
3019 /* the standard tree builder doesn't look at the end tag */
3020 res = treebuilder_handle_end(
3021 (TreeBuilderObject*) self->target, Py_None
3022 );
3023 else if (self->handle_end) {
3024 tag = makeuniversal(self, tag_in);
3025 if (tag) {
3026 res = PyObject_CallFunction(self->handle_end, "O", tag);
3027 Py_DECREF(tag);
3028 }
3029 }
3030
3031 Py_XDECREF(res);
3032}
3033
3034static void
3035expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3036 const XML_Char *uri)
3037{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003038 PyObject* sprefix = NULL;
3039 PyObject* suri = NULL;
3040
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003041 if (PyErr_Occurred())
3042 return;
3043
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003044 if (uri)
Eli Bendersky4b795182013-11-28 06:33:21 -08003045 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003046 else
Eli Bendersky4b795182013-11-28 06:33:21 -08003047 suri = PyUnicode_FromString("");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003048 if (!suri)
3049 return;
3050
3051 if (prefix)
3052 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3053 else
3054 sprefix = PyUnicode_FromString("");
3055 if (!sprefix) {
3056 Py_DECREF(suri);
3057 return;
3058 }
3059
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003061 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003063
3064 Py_DECREF(sprefix);
3065 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066}
3067
3068static void
3069expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3070{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003071 if (PyErr_Occurred())
3072 return;
3073
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003074 treebuilder_handle_namespace(
3075 (TreeBuilderObject*) self->target, 0, NULL, NULL
3076 );
3077}
3078
3079static void
3080expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3081{
3082 PyObject* comment;
3083 PyObject* res;
3084
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003085 if (PyErr_Occurred())
3086 return;
3087
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003088 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003089 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090 if (comment) {
3091 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3092 Py_XDECREF(res);
3093 Py_DECREF(comment);
3094 }
3095 }
3096}
3097
Eli Bendersky45839902013-01-13 05:14:47 -08003098static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003099expat_start_doctype_handler(XMLParserObject *self,
3100 const XML_Char *doctype_name,
3101 const XML_Char *sysid,
3102 const XML_Char *pubid,
3103 int has_internal_subset)
3104{
3105 PyObject *self_pyobj = (PyObject *)self;
3106 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3107 PyObject *parser_doctype = NULL;
3108 PyObject *res = NULL;
3109
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003110 if (PyErr_Occurred())
3111 return;
3112
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003113 doctype_name_obj = makeuniversal(self, doctype_name);
3114 if (!doctype_name_obj)
3115 return;
3116
3117 if (sysid) {
3118 sysid_obj = makeuniversal(self, sysid);
3119 if (!sysid_obj) {
3120 Py_DECREF(doctype_name_obj);
3121 return;
3122 }
3123 } else {
3124 Py_INCREF(Py_None);
3125 sysid_obj = Py_None;
3126 }
3127
3128 if (pubid) {
3129 pubid_obj = makeuniversal(self, pubid);
3130 if (!pubid_obj) {
3131 Py_DECREF(doctype_name_obj);
3132 Py_DECREF(sysid_obj);
3133 return;
3134 }
3135 } else {
3136 Py_INCREF(Py_None);
3137 pubid_obj = Py_None;
3138 }
3139
3140 /* If the target has a handler for doctype, call it. */
3141 if (self->handle_doctype) {
3142 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3143 doctype_name_obj, pubid_obj, sysid_obj);
3144 Py_CLEAR(res);
3145 }
3146
3147 /* Now see if the parser itself has a doctype method. If yes and it's
3148 * a subclass, call it but warn about deprecation. If it's not a subclass
3149 * (i.e. vanilla XMLParser), do nothing.
3150 */
3151 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3152 if (parser_doctype) {
3153 if (!XMLParser_CheckExact(self_pyobj)) {
3154 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3155 "This method of XMLParser is deprecated. Define"
3156 " doctype() method on the TreeBuilder target.",
3157 1) < 0) {
3158 goto clear;
3159 }
3160 res = PyObject_CallFunction(parser_doctype, "OOO",
3161 doctype_name_obj, pubid_obj, sysid_obj);
3162 Py_CLEAR(res);
3163 }
3164 }
3165
3166clear:
3167 Py_XDECREF(parser_doctype);
3168 Py_DECREF(doctype_name_obj);
3169 Py_DECREF(pubid_obj);
3170 Py_DECREF(sysid_obj);
3171}
3172
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003173static void
3174expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3175 const XML_Char* data_in)
3176{
3177 PyObject* target;
3178 PyObject* data;
3179 PyObject* res;
3180
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003181 if (PyErr_Occurred())
3182 return;
3183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003184 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003185 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3186 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003187 if (target && data) {
3188 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3189 Py_XDECREF(res);
3190 Py_DECREF(data);
3191 Py_DECREF(target);
3192 } else {
3193 Py_XDECREF(data);
3194 Py_XDECREF(target);
3195 }
3196 }
3197}
3198
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200
Eli Bendersky52467b12012-06-01 07:13:08 +03003201static PyObject *
3202xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003203{
Eli Bendersky52467b12012-06-01 07:13:08 +03003204 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3205 if (self) {
3206 self->parser = NULL;
3207 self->target = self->entity = self->names = NULL;
3208 self->handle_start = self->handle_data = self->handle_end = NULL;
3209 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003210 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003211 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003212 return (PyObject *)self;
3213}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214
Eli Bendersky52467b12012-06-01 07:13:08 +03003215static int
3216xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3217{
3218 XMLParserObject *self_xp = (XMLParserObject *)self;
3219 PyObject *target = NULL, *html = NULL;
3220 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003221 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003222
Eli Bendersky52467b12012-06-01 07:13:08 +03003223 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3224 &html, &target, &encoding)) {
3225 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003227
Eli Bendersky52467b12012-06-01 07:13:08 +03003228 self_xp->entity = PyDict_New();
3229 if (!self_xp->entity)
3230 return -1;
3231
3232 self_xp->names = PyDict_New();
3233 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003234 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 }
3237
Eli Bendersky52467b12012-06-01 07:13:08 +03003238 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3239 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003240 Py_CLEAR(self_xp->entity);
3241 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003243 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 }
3245
Eli Bendersky52467b12012-06-01 07:13:08 +03003246 if (target) {
3247 Py_INCREF(target);
3248 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003249 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003251 Py_CLEAR(self_xp->entity);
3252 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003253 EXPAT(ParserFree)(self_xp->parser);
3254 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003256 }
3257 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003258
Eli Bendersky52467b12012-06-01 07:13:08 +03003259 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3260 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3261 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3262 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3263 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3264 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003265 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266
3267 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003268
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003270 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003272 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 (XML_StartElementHandler) expat_start_handler,
3274 (XML_EndElementHandler) expat_end_handler
3275 );
3276 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003277 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278 (XML_DefaultHandler) expat_default_handler
3279 );
3280 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003281 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 (XML_CharacterDataHandler) expat_data_handler
3283 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003284 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003286 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 (XML_CommentHandler) expat_comment_handler
3288 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003291 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 (XML_ProcessingInstructionHandler) expat_pi_handler
3293 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003294 EXPAT(SetStartDoctypeDeclHandler)(
3295 self_xp->parser,
3296 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3297 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003299 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003300 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302
Eli Bendersky52467b12012-06-01 07:13:08 +03003303 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304}
3305
Eli Bendersky52467b12012-06-01 07:13:08 +03003306static int
3307xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3308{
3309 Py_VISIT(self->handle_close);
3310 Py_VISIT(self->handle_pi);
3311 Py_VISIT(self->handle_comment);
3312 Py_VISIT(self->handle_end);
3313 Py_VISIT(self->handle_data);
3314 Py_VISIT(self->handle_start);
3315
3316 Py_VISIT(self->target);
3317 Py_VISIT(self->entity);
3318 Py_VISIT(self->names);
3319
3320 return 0;
3321}
3322
3323static int
3324xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003325{
3326 EXPAT(ParserFree)(self->parser);
3327
Antoine Pitrouc1948842012-10-01 23:40:37 +02003328 Py_CLEAR(self->handle_close);
3329 Py_CLEAR(self->handle_pi);
3330 Py_CLEAR(self->handle_comment);
3331 Py_CLEAR(self->handle_end);
3332 Py_CLEAR(self->handle_data);
3333 Py_CLEAR(self->handle_start);
3334 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003335
Antoine Pitrouc1948842012-10-01 23:40:37 +02003336 Py_CLEAR(self->target);
3337 Py_CLEAR(self->entity);
3338 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339
Eli Bendersky52467b12012-06-01 07:13:08 +03003340 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003341}
3342
Eli Bendersky52467b12012-06-01 07:13:08 +03003343static void
3344xmlparser_dealloc(XMLParserObject* self)
3345{
3346 PyObject_GC_UnTrack(self);
3347 xmlparser_gc_clear(self);
3348 Py_TYPE(self)->tp_free((PyObject *)self);
3349}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350
3351LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003352expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353{
3354 int ok;
3355
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003356 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3358
3359 if (PyErr_Occurred())
3360 return NULL;
3361
3362 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003363 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003364 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003365 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003366 EXPAT(GetErrorColumnNumber)(self->parser),
3367 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003368 );
3369 return NULL;
3370 }
3371
3372 Py_RETURN_NONE;
3373}
3374
3375static PyObject*
3376xmlparser_close(XMLParserObject* self, PyObject* args)
3377{
3378 /* end feeding data to parser */
3379
3380 PyObject* res;
3381 if (!PyArg_ParseTuple(args, ":close"))
3382 return NULL;
3383
3384 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003385 if (!res)
3386 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003388 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 Py_DECREF(res);
3390 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003391 }
3392 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003393 Py_DECREF(res);
3394 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003395 }
3396 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003397 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003398 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399}
3400
3401static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003402xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403{
3404 /* feed data to parser */
3405
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003406 if (PyUnicode_Check(arg)) {
3407 Py_ssize_t data_len;
3408 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3409 if (data == NULL)
3410 return NULL;
3411 if (data_len > INT_MAX) {
3412 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3413 return NULL;
3414 }
3415 /* Explicitly set UTF-8 encoding. Return code ignored. */
3416 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3417 return expat_parse(self, data, (int)data_len, 0);
3418 }
3419 else {
3420 Py_buffer view;
3421 PyObject *res;
3422 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3423 return NULL;
3424 if (view.len > INT_MAX) {
3425 PyBuffer_Release(&view);
3426 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3427 return NULL;
3428 }
3429 res = expat_parse(self, view.buf, (int)view.len, 0);
3430 PyBuffer_Release(&view);
3431 return res;
3432 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433}
3434
3435static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003436xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003437{
Eli Benderskya3699232013-05-19 18:47:23 -07003438 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003439 PyObject* reader;
3440 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003441 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003442 PyObject* res;
3443
3444 PyObject* fileobj;
3445 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3446 return NULL;
3447
3448 reader = PyObject_GetAttrString(fileobj, "read");
3449 if (!reader)
3450 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003451
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003452 /* read from open file object */
3453 for (;;) {
3454
3455 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3456
3457 if (!buffer) {
3458 /* read failed (e.g. due to KeyboardInterrupt) */
3459 Py_DECREF(reader);
3460 return NULL;
3461 }
3462
Eli Benderskyf996e772012-03-16 05:53:30 +02003463 if (PyUnicode_CheckExact(buffer)) {
3464 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003465 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003466 Py_DECREF(buffer);
3467 break;
3468 }
3469 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003470 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003471 if (!temp) {
3472 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003473 Py_DECREF(reader);
3474 return NULL;
3475 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003476 buffer = temp;
3477 }
3478 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 Py_DECREF(buffer);
3480 break;
3481 }
3482
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003483 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3484 Py_DECREF(buffer);
3485 Py_DECREF(reader);
3486 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3487 return NULL;
3488 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003489 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003490 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003491 );
3492
3493 Py_DECREF(buffer);
3494
3495 if (!res) {
3496 Py_DECREF(reader);
3497 return NULL;
3498 }
3499 Py_DECREF(res);
3500
3501 }
3502
3503 Py_DECREF(reader);
3504
3505 res = expat_parse(self, "", 0, 1);
3506
3507 if (res && TreeBuilder_CheckExact(self->target)) {
3508 Py_DECREF(res);
3509 return treebuilder_done((TreeBuilderObject*) self->target);
3510 }
3511
3512 return res;
3513}
3514
3515static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003516xmlparser_doctype(XMLParserObject *self, PyObject *args)
3517{
3518 Py_RETURN_NONE;
3519}
3520
3521static PyObject*
3522xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003523{
3524 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003525 Py_ssize_t i, seqlen;
3526 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003528 PyObject *events_queue;
3529 PyObject *events_to_report = Py_None;
3530 PyObject *events_seq;
3531 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3532 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003533 return NULL;
3534
3535 if (!TreeBuilder_CheckExact(self->target)) {
3536 PyErr_SetString(
3537 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003538 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003539 "targets"
3540 );
3541 return NULL;
3542 }
3543
3544 target = (TreeBuilderObject*) self->target;
3545
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003546 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003547 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003548 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003549
3550 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003551 Py_CLEAR(target->start_event_obj);
3552 Py_CLEAR(target->end_event_obj);
3553 Py_CLEAR(target->start_ns_event_obj);
3554 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003556 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003557 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003558 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003559 Py_RETURN_NONE;
3560 }
3561
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003562 if (!(events_seq = PySequence_Fast(events_to_report,
3563 "events must be a sequence"))) {
3564 return NULL;
3565 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003566
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003567 seqlen = PySequence_Size(events_seq);
3568 for (i = 0; i < seqlen; ++i) {
3569 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3570 char *event_name = NULL;
3571 if (PyUnicode_Check(event_name_obj)) {
3572 event_name = _PyUnicode_AsString(event_name_obj);
3573 } else if (PyBytes_Check(event_name_obj)) {
3574 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003575 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003576
3577 if (event_name == NULL) {
3578 Py_DECREF(events_seq);
3579 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3580 return NULL;
3581 } else if (strcmp(event_name, "start") == 0) {
3582 Py_INCREF(event_name_obj);
3583 target->start_event_obj = event_name_obj;
3584 } else if (strcmp(event_name, "end") == 0) {
3585 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003586 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003587 target->end_event_obj = event_name_obj;
3588 } else if (strcmp(event_name, "start-ns") == 0) {
3589 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003590 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003591 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003592 EXPAT(SetNamespaceDeclHandler)(
3593 self->parser,
3594 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3595 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3596 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003597 } else if (strcmp(event_name, "end-ns") == 0) {
3598 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003600 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003601 EXPAT(SetNamespaceDeclHandler)(
3602 self->parser,
3603 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3604 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3605 );
3606 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003607 Py_DECREF(events_seq);
3608 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609 return NULL;
3610 }
3611 }
3612
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003613 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003615}
3616
3617static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003618 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003620 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003622 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003623 {NULL, NULL}
3624};
3625
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003626static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003627xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003628{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003629 if (PyUnicode_Check(nameobj)) {
3630 PyObject* res;
3631 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3632 res = self->entity;
3633 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3634 res = self->target;
3635 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3636 return PyUnicode_FromFormat(
3637 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003639 }
3640 else
3641 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003642
Alexander Belopolskye239d232010-12-08 23:31:48 +00003643 Py_INCREF(res);
3644 return res;
3645 }
3646 generic:
3647 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648}
3649
Neal Norwitz227b5332006-03-22 09:28:35 +00003650static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003651 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003652 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003654 (destructor)xmlparser_dealloc, /* tp_dealloc */
3655 0, /* tp_print */
3656 0, /* tp_getattr */
3657 0, /* tp_setattr */
3658 0, /* tp_reserved */
3659 0, /* tp_repr */
3660 0, /* tp_as_number */
3661 0, /* tp_as_sequence */
3662 0, /* tp_as_mapping */
3663 0, /* tp_hash */
3664 0, /* tp_call */
3665 0, /* tp_str */
3666 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3667 0, /* tp_setattro */
3668 0, /* tp_as_buffer */
3669 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3670 /* tp_flags */
3671 0, /* tp_doc */
3672 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3673 (inquiry)xmlparser_gc_clear, /* tp_clear */
3674 0, /* tp_richcompare */
3675 0, /* tp_weaklistoffset */
3676 0, /* tp_iter */
3677 0, /* tp_iternext */
3678 xmlparser_methods, /* tp_methods */
3679 0, /* tp_members */
3680 0, /* tp_getset */
3681 0, /* tp_base */
3682 0, /* tp_dict */
3683 0, /* tp_descr_get */
3684 0, /* tp_descr_set */
3685 0, /* tp_dictoffset */
3686 (initproc)xmlparser_init, /* tp_init */
3687 PyType_GenericAlloc, /* tp_alloc */
3688 xmlparser_new, /* tp_new */
3689 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003690};
3691
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003692/* ==================================================================== */
3693/* python module interface */
3694
3695static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003696 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003697 {NULL, NULL}
3698};
3699
Martin v. Löwis1a214512008-06-11 05:26:20 +00003700
Eli Bendersky532d03e2013-08-10 08:00:39 -07003701static struct PyModuleDef elementtreemodule = {
3702 PyModuleDef_HEAD_INIT,
3703 "_elementtree",
3704 NULL,
3705 sizeof(elementtreestate),
3706 _functions,
3707 NULL,
3708 elementtree_traverse,
3709 elementtree_clear,
3710 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003711};
3712
Neal Norwitzf6657e62006-12-28 04:47:50 +00003713PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003714PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003715{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003716 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003717 elementtreestate *st;
3718
3719 m = PyState_FindModule(&elementtreemodule);
3720 if (m) {
3721 Py_INCREF(m);
3722 return m;
3723 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003724
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003725 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003726 if (PyType_Ready(&ElementIter_Type) < 0)
3727 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003728 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003729 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003730 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003731 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003732 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003733 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003734
Eli Bendersky532d03e2013-08-10 08:00:39 -07003735 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003736 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003737 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003738 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003739
Eli Bendersky828efde2012-04-05 05:40:58 +03003740 if (!(temp = PyImport_ImportModule("copy")))
3741 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003742 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003743 Py_XDECREF(temp);
3744
Eli Bendersky532d03e2013-08-10 08:00:39 -07003745 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003746 return NULL;
3747
Eli Bendersky20d41742012-06-01 09:48:37 +03003748 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003749 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3750 if (expat_capi) {
3751 /* check that it's usable */
3752 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003753 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003754 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3755 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003756 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003757 PyErr_SetString(PyExc_ImportError,
3758 "pyexpat version is incompatible");
3759 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003760 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003761 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003762 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003763 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003764
Eli Bendersky532d03e2013-08-10 08:00:39 -07003765 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003766 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003767 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003768 Py_INCREF(st->parseerror_obj);
3769 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003770
Eli Bendersky092af1f2012-03-04 07:14:03 +02003771 Py_INCREF((PyObject *)&Element_Type);
3772 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3773
Eli Bendersky58d548d2012-05-29 15:45:16 +03003774 Py_INCREF((PyObject *)&TreeBuilder_Type);
3775 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3776
Eli Bendersky52467b12012-06-01 07:13:08 +03003777 Py_INCREF((PyObject *)&XMLParser_Type);
3778 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003779
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003780 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003781}