blob: f2f370fe22e4e1400f4f234c023bcc9fadbf154d [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Florent Xiclunaf15351d2010-03-13 23:24:31 +000061/* Define to do all expat calls via pyexpat's embedded expat library */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000062/* #define USE_PYEXPAT_CAPI */
63
64/* An element can hold this many children without extra memory
65 allocations. */
66#define STATIC_CHILDREN 4
67
68/* For best performance, chose a value so that 80-90% of all nodes
69 have no more than the given number of children. Set this to zero
70 to minimize the size of the element structure itself (this only
71 helps if you have lots of leaf nodes with attributes). */
72
73/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010074 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000075 that the number of children should be an even number, at least on
76 32-bit platforms. */
77
78/* -------------------------------------------------------------------- */
79
80#if 0
81static int memory = 0;
82#define ALLOC(size, comment)\
83do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
84#define RELEASE(size, comment)\
85do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
86#else
87#define ALLOC(size, comment)
88#define RELEASE(size, comment)
89#endif
90
91/* compiler tweaks */
92#if defined(_MSC_VER)
93#define LOCAL(type) static __inline type __fastcall
94#else
95#define LOCAL(type) static type
96#endif
97
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000098/* macros used to store 'join' flags in string object pointers. note
99 that all use of text and tail as object pointers must be wrapped in
100 JOIN_OBJ. see comments in the ElementObject definition for more
101 info. */
102#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
103#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
104#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
105
106/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000107static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000108static PyObject* elementtree_deepcopy_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000109static PyObject* elementtree_iter_obj;
110static PyObject* elementtree_itertext_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000111static PyObject* elementpath_obj;
112
113/* helpers */
114
115LOCAL(PyObject*)
116deepcopy(PyObject* object, PyObject* memo)
117{
118 /* do a deep copy of the given object */
119
120 PyObject* args;
121 PyObject* result;
122
123 if (!elementtree_deepcopy_obj) {
124 PyErr_SetString(
125 PyExc_RuntimeError,
126 "deepcopy helper not found"
127 );
128 return NULL;
129 }
130
131 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000132 if (!args)
133 return NULL;
134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
136 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
137
138 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
139
140 Py_DECREF(args);
141
142 return result;
143}
144
145LOCAL(PyObject*)
146list_join(PyObject* list)
147{
148 /* join list elements (destroying the list in the process) */
149
150 PyObject* joiner;
151 PyObject* function;
152 PyObject* args;
153 PyObject* result;
154
155 switch (PyList_GET_SIZE(list)) {
156 case 0:
157 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000158 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 case 1:
160 result = PyList_GET_ITEM(list, 0);
161 Py_INCREF(result);
162 Py_DECREF(list);
163 return result;
164 }
165
166 /* two or more elements: slice out a suitable separator from the
167 first member, and use that to join the entire list */
168
169 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
170 if (!joiner)
171 return NULL;
172
173 function = PyObject_GetAttrString(joiner, "join");
174 if (!function) {
175 Py_DECREF(joiner);
176 return NULL;
177 }
178
179 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000180 if (!args)
181 return NULL;
182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183 PyTuple_SET_ITEM(args, 0, list);
184
185 result = PyObject_CallObject(function, args);
186
187 Py_DECREF(args); /* also removes list */
188 Py_DECREF(function);
189 Py_DECREF(joiner);
190
191 return result;
192}
193
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000194/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200195/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196
197typedef struct {
198
199 /* attributes (a dictionary object), or None if no attributes */
200 PyObject* attrib;
201
202 /* child elements */
203 int length; /* actual number of items */
204 int allocated; /* allocated items */
205
206 /* this either points to _children or to a malloced buffer */
207 PyObject* *children;
208
209 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211} ElementObjectExtra;
212
213typedef struct {
214 PyObject_HEAD
215
216 /* element tag (a string). */
217 PyObject* tag;
218
219 /* text before first child. note that this is a tagged pointer;
220 use JOIN_OBJ to get the object pointer. the join flag is used
221 to distinguish lists created by the tree builder from lists
222 assigned to the attribute by application code; the former
223 should be joined before being returned to the user, the latter
224 should be left intact. */
225 PyObject* text;
226
227 /* text after this element, in parent. note that this is a tagged
228 pointer; use JOIN_OBJ to get the object pointer. */
229 PyObject* tail;
230
231 ElementObjectExtra* extra;
232
Eli Benderskyebf37a22012-04-03 22:02:37 +0300233 PyObject *weakreflist; /* For tp_weaklistoffset */
234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235} ElementObject;
236
Neal Norwitz227b5332006-03-22 09:28:35 +0000237static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
Christian Heimes90aa7642007-12-19 02:45:37 +0000239#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000240
241/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200242/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000243
244LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200245create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246{
247 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
248 if (!self->extra)
249 return -1;
250
251 if (!attrib)
252 attrib = Py_None;
253
254 Py_INCREF(attrib);
255 self->extra->attrib = attrib;
256
257 self->extra->length = 0;
258 self->extra->allocated = STATIC_CHILDREN;
259 self->extra->children = self->extra->_children;
260
261 return 0;
262}
263
264LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200265dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266{
Eli Bendersky08b85292012-04-04 15:55:07 +0300267 ElementObjectExtra *myextra;
268 int i;
269
Eli Benderskyebf37a22012-04-03 22:02:37 +0300270 if (!self->extra)
271 return;
272
273 /* Avoid DECREFs calling into this code again (cycles, etc.)
274 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300275 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300276 self->extra = NULL;
277
278 Py_DECREF(myextra->attrib);
279
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 for (i = 0; i < myextra->length; i++)
281 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282
Eli Benderskyebf37a22012-04-03 22:02:37 +0300283 if (myextra->children != myextra->_children)
284 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285
Eli Benderskyebf37a22012-04-03 22:02:37 +0300286 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287}
288
Eli Bendersky092af1f2012-03-04 07:14:03 +0200289/* Convenience internal function to create new Element objects with the given
290 * tag and attributes.
291*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200293create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000294{
295 ElementObject* self;
296
Eli Bendersky0192ba32012-03-30 16:38:33 +0300297 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298 if (self == NULL)
299 return NULL;
300
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
304
305 self->extra = NULL;
306
307 if (attrib != Py_None) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200308 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000309 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000311 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000312 }
313
314 Py_INCREF(tag);
315 self->tag = tag;
316
317 Py_INCREF(Py_None);
318 self->text = Py_None;
319
320 Py_INCREF(Py_None);
321 self->tail = Py_None;
322
Eli Benderskyebf37a22012-04-03 22:02:37 +0300323 self->weakreflist = NULL;
324
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000325 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300326 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000327 return (PyObject*) self;
328}
329
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330static PyObject *
331element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
332{
333 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
334 if (e != NULL) {
335 Py_INCREF(Py_None);
336 e->tag = Py_None;
337
338 Py_INCREF(Py_None);
339 e->text = Py_None;
340
341 Py_INCREF(Py_None);
342 e->tail = Py_None;
343
344 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300345 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200346 }
347 return (PyObject *)e;
348}
349
Eli Bendersky737b1732012-05-29 06:02:56 +0300350/* Helper function for extracting the attrib dictionary from a keywords dict.
351 * This is required by some constructors/functions in this module that can
352 * either accept attrib as a keyword argument or all attributes splashed
353 * directly into *kwds.
354 * If there is no 'attrib' keyword, return an empty dict.
355 */
356static PyObject*
357get_attrib_from_keywords(PyObject *kwds)
358{
359 PyObject *attrib_str = PyUnicode_FromString("attrib");
360 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
361
362 if (attrib) {
363 /* If attrib was found in kwds, copy its value and remove it from
364 * kwds
365 */
366 if (!PyDict_Check(attrib)) {
367 Py_DECREF(attrib_str);
368 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
369 Py_TYPE(attrib)->tp_name);
370 return NULL;
371 }
372 attrib = PyDict_Copy(attrib);
373 PyDict_DelItem(kwds, attrib_str);
374 } else {
375 attrib = PyDict_New();
376 }
377
378 Py_DECREF(attrib_str);
379
380 if (attrib)
381 PyDict_Update(attrib, kwds);
382 return attrib;
383}
384
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385static int
386element_init(PyObject *self, PyObject *args, PyObject *kwds)
387{
388 PyObject *tag;
389 PyObject *tmp;
390 PyObject *attrib = NULL;
391 ElementObject *self_elem;
392
393 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
394 return -1;
395
Eli Bendersky737b1732012-05-29 06:02:56 +0300396 if (attrib) {
397 /* attrib passed as positional arg */
398 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 if (!attrib)
400 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300401 if (kwds) {
402 if (PyDict_Update(attrib, kwds) < 0) {
403 return -1;
404 }
405 }
406 } else if (kwds) {
407 /* have keywords args */
408 attrib = get_attrib_from_keywords(kwds);
409 if (!attrib)
410 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300412 /* no attrib arg, no kwds, so no attributes */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 Py_INCREF(Py_None);
414 attrib = Py_None;
415 }
416
417 self_elem = (ElementObject *)self;
418
419 /* Use None for empty dictionaries */
420 if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
421 Py_INCREF(Py_None);
422 attrib = Py_None;
423 }
424
425 if (attrib != Py_None) {
426 if (create_extra(self_elem, attrib) < 0) {
427 PyObject_Del(self_elem);
428 return -1;
429 }
430 }
431
432 /* If create_extra needed attrib, it took a reference to it, so we can
433 * release ours anyway.
434 */
435 Py_DECREF(attrib);
436
437 /* Replace the objects already pointed to by tag, text and tail. */
438 tmp = self_elem->tag;
439 self_elem->tag = tag;
440 Py_INCREF(tag);
441 Py_DECREF(tmp);
442
443 tmp = self_elem->text;
444 self_elem->text = Py_None;
445 Py_INCREF(Py_None);
446 Py_DECREF(JOIN_OBJ(tmp));
447
448 tmp = self_elem->tail;
449 self_elem->tail = Py_None;
450 Py_INCREF(Py_None);
451 Py_DECREF(JOIN_OBJ(tmp));
452
453 return 0;
454}
455
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456LOCAL(int)
457element_resize(ElementObject* self, int extra)
458{
459 int size;
460 PyObject* *children;
461
462 /* make sure self->children can hold the given number of extra
463 elements. set an exception and return -1 if allocation failed */
464
465 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200466 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000467
468 size = self->extra->length + extra;
469
470 if (size > self->extra->allocated) {
471 /* use Python 2.4's list growth strategy */
472 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000473 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100474 * which needs at least 4 bytes.
475 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000476 * be safe.
477 */
478 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000479 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000480 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100481 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000482 * false alarm always assume at least one child to be safe.
483 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000484 children = PyObject_Realloc(self->extra->children,
485 size * sizeof(PyObject*));
486 if (!children)
487 goto nomemory;
488 } else {
489 children = PyObject_Malloc(size * sizeof(PyObject*));
490 if (!children)
491 goto nomemory;
492 /* copy existing children from static area to malloc buffer */
493 memcpy(children, self->extra->children,
494 self->extra->length * sizeof(PyObject*));
495 }
496 self->extra->children = children;
497 self->extra->allocated = size;
498 }
499
500 return 0;
501
502 nomemory:
503 PyErr_NoMemory();
504 return -1;
505}
506
507LOCAL(int)
508element_add_subelement(ElementObject* self, PyObject* element)
509{
510 /* add a child element to a parent */
511
512 if (element_resize(self, 1) < 0)
513 return -1;
514
515 Py_INCREF(element);
516 self->extra->children[self->extra->length] = element;
517
518 self->extra->length++;
519
520 return 0;
521}
522
523LOCAL(PyObject*)
524element_get_attrib(ElementObject* self)
525{
526 /* return borrowed reference to attrib dictionary */
527 /* note: this function assumes that the extra section exists */
528
529 PyObject* res = self->extra->attrib;
530
531 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000532 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000533 /* create missing dictionary */
534 res = PyDict_New();
535 if (!res)
536 return NULL;
537 self->extra->attrib = res;
538 }
539
540 return res;
541}
542
543LOCAL(PyObject*)
544element_get_text(ElementObject* self)
545{
546 /* return borrowed reference to text attribute */
547
548 PyObject* res = self->text;
549
550 if (JOIN_GET(res)) {
551 res = JOIN_OBJ(res);
552 if (PyList_CheckExact(res)) {
553 res = list_join(res);
554 if (!res)
555 return NULL;
556 self->text = res;
557 }
558 }
559
560 return res;
561}
562
563LOCAL(PyObject*)
564element_get_tail(ElementObject* self)
565{
566 /* return borrowed reference to text attribute */
567
568 PyObject* res = self->tail;
569
570 if (JOIN_GET(res)) {
571 res = JOIN_OBJ(res);
572 if (PyList_CheckExact(res)) {
573 res = list_join(res);
574 if (!res)
575 return NULL;
576 self->tail = res;
577 }
578 }
579
580 return res;
581}
582
583static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300584subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585{
586 PyObject* elem;
587
588 ElementObject* parent;
589 PyObject* tag;
590 PyObject* attrib = NULL;
591 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
592 &Element_Type, &parent, &tag,
593 &PyDict_Type, &attrib))
594 return NULL;
595
Eli Bendersky737b1732012-05-29 06:02:56 +0300596 if (attrib) {
597 /* attrib passed as positional arg */
598 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 if (!attrib)
600 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300601 if (kwds) {
602 if (PyDict_Update(attrib, kwds) < 0) {
603 return NULL;
604 }
605 }
606 } else if (kwds) {
607 /* have keyword args */
608 attrib = get_attrib_from_keywords(kwds);
609 if (!attrib)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300612 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000613 Py_INCREF(Py_None);
614 attrib = Py_None;
615 }
616
Eli Bendersky092af1f2012-03-04 07:14:03 +0200617 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618
619 Py_DECREF(attrib);
620
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000621 if (element_add_subelement(parent, elem) < 0) {
622 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000624 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625
626 return elem;
627}
628
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629static int
630element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
631{
632 Py_VISIT(self->tag);
633 Py_VISIT(JOIN_OBJ(self->text));
634 Py_VISIT(JOIN_OBJ(self->tail));
635
636 if (self->extra) {
637 int i;
638 Py_VISIT(self->extra->attrib);
639
640 for (i = 0; i < self->extra->length; ++i)
641 Py_VISIT(self->extra->children[i]);
642 }
643 return 0;
644}
645
646static int
647element_gc_clear(ElementObject *self)
648{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300649 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300650
651 /* The following is like Py_CLEAR for self->text and self->tail, but
652 * written explicitily because the real pointers hide behind access
653 * macros.
654 */
655 if (self->text) {
656 PyObject *tmp = JOIN_OBJ(self->text);
657 self->text = NULL;
658 Py_DECREF(tmp);
659 }
660
661 if (self->tail) {
662 PyObject *tmp = JOIN_OBJ(self->tail);
663 self->tail = NULL;
664 Py_DECREF(tmp);
665 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300666
667 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300668 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300669 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 return 0;
672}
673
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000674static void
675element_dealloc(ElementObject* self)
676{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300678
679 if (self->weakreflist != NULL)
680 PyObject_ClearWeakRefs((PyObject *) self);
681
Eli Bendersky0192ba32012-03-30 16:38:33 +0300682 /* element_gc_clear clears all references and deallocates extra
683 */
684 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685
686 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200687 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000688}
689
690/* -------------------------------------------------------------------- */
691/* methods (in alphabetical order) */
692
693static PyObject*
694element_append(ElementObject* self, PyObject* args)
695{
696 PyObject* element;
697 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
698 return NULL;
699
700 if (element_add_subelement(self, element) < 0)
701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
706static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300707element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708{
709 if (!PyArg_ParseTuple(args, ":clear"))
710 return NULL;
711
Eli Benderskyebf37a22012-04-03 22:02:37 +0300712 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 Py_INCREF(Py_None);
715 Py_DECREF(JOIN_OBJ(self->text));
716 self->text = Py_None;
717
718 Py_INCREF(Py_None);
719 Py_DECREF(JOIN_OBJ(self->tail));
720 self->tail = Py_None;
721
722 Py_RETURN_NONE;
723}
724
725static PyObject*
726element_copy(ElementObject* self, PyObject* args)
727{
728 int i;
729 ElementObject* element;
730
731 if (!PyArg_ParseTuple(args, ":__copy__"))
732 return NULL;
733
Eli Bendersky092af1f2012-03-04 07:14:03 +0200734 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000735 self->tag, (self->extra) ? self->extra->attrib : Py_None
736 );
737 if (!element)
738 return NULL;
739
740 Py_DECREF(JOIN_OBJ(element->text));
741 element->text = self->text;
742 Py_INCREF(JOIN_OBJ(element->text));
743
744 Py_DECREF(JOIN_OBJ(element->tail));
745 element->tail = self->tail;
746 Py_INCREF(JOIN_OBJ(element->tail));
747
748 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100749
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000750 if (element_resize(element, self->extra->length) < 0) {
751 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000753 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000754
755 for (i = 0; i < self->extra->length; i++) {
756 Py_INCREF(self->extra->children[i]);
757 element->extra->children[i] = self->extra->children[i];
758 }
759
760 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100761
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 }
763
764 return (PyObject*) element;
765}
766
767static PyObject*
768element_deepcopy(ElementObject* self, PyObject* args)
769{
770 int i;
771 ElementObject* element;
772 PyObject* tag;
773 PyObject* attrib;
774 PyObject* text;
775 PyObject* tail;
776 PyObject* id;
777
778 PyObject* memo;
779 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
780 return NULL;
781
782 tag = deepcopy(self->tag, memo);
783 if (!tag)
784 return NULL;
785
786 if (self->extra) {
787 attrib = deepcopy(self->extra->attrib, memo);
788 if (!attrib) {
789 Py_DECREF(tag);
790 return NULL;
791 }
792 } else {
793 Py_INCREF(Py_None);
794 attrib = Py_None;
795 }
796
Eli Bendersky092af1f2012-03-04 07:14:03 +0200797 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
799 Py_DECREF(tag);
800 Py_DECREF(attrib);
801
802 if (!element)
803 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100804
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805 text = deepcopy(JOIN_OBJ(self->text), memo);
806 if (!text)
807 goto error;
808 Py_DECREF(element->text);
809 element->text = JOIN_SET(text, JOIN_GET(self->text));
810
811 tail = deepcopy(JOIN_OBJ(self->tail), memo);
812 if (!tail)
813 goto error;
814 Py_DECREF(element->tail);
815 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
816
817 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100818
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 if (element_resize(element, self->extra->length) < 0)
820 goto error;
821
822 for (i = 0; i < self->extra->length; i++) {
823 PyObject* child = deepcopy(self->extra->children[i], memo);
824 if (!child) {
825 element->extra->length = i;
826 goto error;
827 }
828 element->extra->children[i] = child;
829 }
830
831 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100832
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833 }
834
835 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000836 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000837 if (!id)
838 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839
840 i = PyDict_SetItem(memo, id, (PyObject*) element);
841
842 Py_DECREF(id);
843
844 if (i < 0)
845 goto error;
846
847 return (PyObject*) element;
848
849 error:
850 Py_DECREF(element);
851 return NULL;
852}
853
854LOCAL(int)
855checkpath(PyObject* tag)
856{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000857 Py_ssize_t i;
858 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000859
860 /* check if a tag contains an xpath character */
861
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000862#define PATHCHAR(ch) \
863 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000864
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200866 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
867 void *data = PyUnicode_DATA(tag);
868 unsigned int kind = PyUnicode_KIND(tag);
869 for (i = 0; i < len; i++) {
870 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
871 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000872 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200873 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000874 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200875 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000876 return 1;
877 }
878 return 0;
879 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000880 if (PyBytes_Check(tag)) {
881 char *p = PyBytes_AS_STRING(tag);
882 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000883 if (p[i] == '{')
884 check = 0;
885 else if (p[i] == '}')
886 check = 1;
887 else if (check && PATHCHAR(p[i]))
888 return 1;
889 }
890 return 0;
891 }
892
893 return 1; /* unknown type; might be path expression */
894}
895
896static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000897element_extend(ElementObject* self, PyObject* args)
898{
899 PyObject* seq;
900 Py_ssize_t i, seqlen = 0;
901
902 PyObject* seq_in;
903 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
904 return NULL;
905
906 seq = PySequence_Fast(seq_in, "");
907 if (!seq) {
908 PyErr_Format(
909 PyExc_TypeError,
910 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
911 );
912 return NULL;
913 }
914
915 seqlen = PySequence_Size(seq);
916 for (i = 0; i < seqlen; i++) {
917 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200918 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
919 Py_DECREF(seq);
920 PyErr_Format(
921 PyExc_TypeError,
922 "expected an Element, not \"%.200s\"",
923 Py_TYPE(element)->tp_name);
924 return NULL;
925 }
926
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000927 if (element_add_subelement(self, element) < 0) {
928 Py_DECREF(seq);
929 return NULL;
930 }
931 }
932
933 Py_DECREF(seq);
934
935 Py_RETURN_NONE;
936}
937
938static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300939element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000940{
941 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000942 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000943 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300944 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200945
Eli Bendersky737b1732012-05-29 06:02:56 +0300946 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
947 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000948 return NULL;
949
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200950 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200951 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200952 return _PyObject_CallMethodId(
953 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000954 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200955 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000956
957 if (!self->extra)
958 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100959
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000960 for (i = 0; i < self->extra->length; i++) {
961 PyObject* item = self->extra->children[i];
962 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000963 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000964 Py_INCREF(item);
965 return item;
966 }
967 }
968
969 Py_RETURN_NONE;
970}
971
972static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300973element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000974{
975 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000976 PyObject* tag;
977 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000978 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200979 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300980 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200981
Eli Bendersky737b1732012-05-29 06:02:56 +0300982 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
983 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000984 return NULL;
985
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000986 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200987 return _PyObject_CallMethodId(
988 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000989 );
990
991 if (!self->extra) {
992 Py_INCREF(default_value);
993 return default_value;
994 }
995
996 for (i = 0; i < self->extra->length; i++) {
997 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000998 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
999
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001000 PyObject* text = element_get_text(item);
1001 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +00001002 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001003 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 return text;
1005 }
1006 }
1007
1008 Py_INCREF(default_value);
1009 return default_value;
1010}
1011
1012static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001013element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001014{
1015 int i;
1016 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001017 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001018 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001019 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001020
Eli Bendersky737b1732012-05-29 06:02:56 +03001021 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1022 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001023 return NULL;
1024
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001025 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001026 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001027 return _PyObject_CallMethodId(
1028 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001029 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001030 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001031
1032 out = PyList_New(0);
1033 if (!out)
1034 return NULL;
1035
1036 if (!self->extra)
1037 return out;
1038
1039 for (i = 0; i < self->extra->length; i++) {
1040 PyObject* item = self->extra->children[i];
1041 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001042 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001043 if (PyList_Append(out, item) < 0) {
1044 Py_DECREF(out);
1045 return NULL;
1046 }
1047 }
1048 }
1049
1050 return out;
1051}
1052
1053static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001054element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001055{
1056 PyObject* tag;
1057 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001058 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001059 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001060
Eli Bendersky737b1732012-05-29 06:02:56 +03001061 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1062 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001063 return NULL;
1064
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001065 return _PyObject_CallMethodId(
1066 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001067 );
1068}
1069
1070static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071element_get(ElementObject* self, PyObject* args)
1072{
1073 PyObject* value;
1074
1075 PyObject* key;
1076 PyObject* default_value = Py_None;
1077 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1078 return NULL;
1079
1080 if (!self->extra || self->extra->attrib == Py_None)
1081 value = default_value;
1082 else {
1083 value = PyDict_GetItem(self->extra->attrib, key);
1084 if (!value)
1085 value = default_value;
1086 }
1087
1088 Py_INCREF(value);
1089 return value;
1090}
1091
1092static PyObject*
1093element_getchildren(ElementObject* self, PyObject* args)
1094{
1095 int i;
1096 PyObject* list;
1097
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098 /* FIXME: report as deprecated? */
1099
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001100 if (!PyArg_ParseTuple(args, ":getchildren"))
1101 return NULL;
1102
1103 if (!self->extra)
1104 return PyList_New(0);
1105
1106 list = PyList_New(self->extra->length);
1107 if (!list)
1108 return NULL;
1109
1110 for (i = 0; i < self->extra->length; i++) {
1111 PyObject* item = self->extra->children[i];
1112 Py_INCREF(item);
1113 PyList_SET_ITEM(list, i, item);
1114 }
1115
1116 return list;
1117}
1118
1119static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001120element_iter(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121{
1122 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001123
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001124 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001125 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001126 return NULL;
1127
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 if (!elementtree_iter_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129 PyErr_SetString(
1130 PyExc_RuntimeError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001131 "iter helper not found"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132 );
1133 return NULL;
1134 }
1135
1136 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001137 if (!args)
1138 return NULL;
Neal Norwitz02876df2006-02-07 06:58:52 +00001139
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001140 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1141 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
1142
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001143 result = PyObject_CallObject(elementtree_iter_obj, args);
1144
1145 Py_DECREF(args);
1146
1147 return result;
1148}
1149
1150
1151static PyObject*
1152element_itertext(ElementObject* self, PyObject* args)
1153{
1154 PyObject* result;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001155
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001156 if (!PyArg_ParseTuple(args, ":itertext"))
1157 return NULL;
1158
1159 if (!elementtree_itertext_obj) {
1160 PyErr_SetString(
1161 PyExc_RuntimeError,
1162 "itertext helper not found"
1163 );
1164 return NULL;
1165 }
1166
1167 args = PyTuple_New(1);
1168 if (!args)
1169 return NULL;
1170
1171 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
1172
1173 result = PyObject_CallObject(elementtree_itertext_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001174
1175 Py_DECREF(args);
1176
1177 return result;
1178}
1179
1180static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001181element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001183 ElementObject* self = (ElementObject*) self_;
1184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185 if (!self->extra || index < 0 || index >= self->extra->length) {
1186 PyErr_SetString(
1187 PyExc_IndexError,
1188 "child index out of range"
1189 );
1190 return NULL;
1191 }
1192
1193 Py_INCREF(self->extra->children[index]);
1194 return self->extra->children[index];
1195}
1196
1197static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198element_insert(ElementObject* self, PyObject* args)
1199{
1200 int i;
1201
1202 int index;
1203 PyObject* element;
1204 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1205 &Element_Type, &element))
1206 return NULL;
1207
1208 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001209 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001210
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001211 if (index < 0) {
1212 index += self->extra->length;
1213 if (index < 0)
1214 index = 0;
1215 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001216 if (index > self->extra->length)
1217 index = self->extra->length;
1218
1219 if (element_resize(self, 1) < 0)
1220 return NULL;
1221
1222 for (i = self->extra->length; i > index; i--)
1223 self->extra->children[i] = self->extra->children[i-1];
1224
1225 Py_INCREF(element);
1226 self->extra->children[index] = element;
1227
1228 self->extra->length++;
1229
1230 Py_RETURN_NONE;
1231}
1232
1233static PyObject*
1234element_items(ElementObject* self, PyObject* args)
1235{
1236 if (!PyArg_ParseTuple(args, ":items"))
1237 return NULL;
1238
1239 if (!self->extra || self->extra->attrib == Py_None)
1240 return PyList_New(0);
1241
1242 return PyDict_Items(self->extra->attrib);
1243}
1244
1245static PyObject*
1246element_keys(ElementObject* self, PyObject* args)
1247{
1248 if (!PyArg_ParseTuple(args, ":keys"))
1249 return NULL;
1250
1251 if (!self->extra || self->extra->attrib == Py_None)
1252 return PyList_New(0);
1253
1254 return PyDict_Keys(self->extra->attrib);
1255}
1256
Martin v. Löwis18e16552006-02-15 17:27:45 +00001257static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001258element_length(ElementObject* self)
1259{
1260 if (!self->extra)
1261 return 0;
1262
1263 return self->extra->length;
1264}
1265
1266static PyObject*
1267element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1268{
1269 PyObject* elem;
1270
1271 PyObject* tag;
1272 PyObject* attrib;
1273 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1274 return NULL;
1275
1276 attrib = PyDict_Copy(attrib);
1277 if (!attrib)
1278 return NULL;
1279
Eli Bendersky092af1f2012-03-04 07:14:03 +02001280 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281
1282 Py_DECREF(attrib);
1283
1284 return elem;
1285}
1286
1287static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288element_remove(ElementObject* self, PyObject* args)
1289{
1290 int i;
1291
1292 PyObject* element;
1293 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1294 return NULL;
1295
1296 if (!self->extra) {
1297 /* element has no children, so raise exception */
1298 PyErr_SetString(
1299 PyExc_ValueError,
1300 "list.remove(x): x not in list"
1301 );
1302 return NULL;
1303 }
1304
1305 for (i = 0; i < self->extra->length; i++) {
1306 if (self->extra->children[i] == element)
1307 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001308 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001309 break;
1310 }
1311
1312 if (i == self->extra->length) {
1313 /* element is not in children, so raise exception */
1314 PyErr_SetString(
1315 PyExc_ValueError,
1316 "list.remove(x): x not in list"
1317 );
1318 return NULL;
1319 }
1320
1321 Py_DECREF(self->extra->children[i]);
1322
1323 self->extra->length--;
1324
1325 for (; i < self->extra->length; i++)
1326 self->extra->children[i] = self->extra->children[i+1];
1327
1328 Py_RETURN_NONE;
1329}
1330
1331static PyObject*
1332element_repr(ElementObject* self)
1333{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001334 if (self->tag)
1335 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1336 else
1337 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338}
1339
1340static PyObject*
1341element_set(ElementObject* self, PyObject* args)
1342{
1343 PyObject* attrib;
1344
1345 PyObject* key;
1346 PyObject* value;
1347 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1348 return NULL;
1349
1350 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001351 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001352
1353 attrib = element_get_attrib(self);
1354 if (!attrib)
1355 return NULL;
1356
1357 if (PyDict_SetItem(attrib, key, value) < 0)
1358 return NULL;
1359
1360 Py_RETURN_NONE;
1361}
1362
1363static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001364element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001366 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367 int i;
1368 PyObject* old;
1369
1370 if (!self->extra || index < 0 || index >= self->extra->length) {
1371 PyErr_SetString(
1372 PyExc_IndexError,
1373 "child assignment index out of range");
1374 return -1;
1375 }
1376
1377 old = self->extra->children[index];
1378
1379 if (item) {
1380 Py_INCREF(item);
1381 self->extra->children[index] = item;
1382 } else {
1383 self->extra->length--;
1384 for (i = index; i < self->extra->length; i++)
1385 self->extra->children[i] = self->extra->children[i+1];
1386 }
1387
1388 Py_DECREF(old);
1389
1390 return 0;
1391}
1392
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393static PyObject*
1394element_subscr(PyObject* self_, PyObject* item)
1395{
1396 ElementObject* self = (ElementObject*) self_;
1397
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001398 if (PyIndex_Check(item)) {
1399 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001400
1401 if (i == -1 && PyErr_Occurred()) {
1402 return NULL;
1403 }
1404 if (i < 0 && self->extra)
1405 i += self->extra->length;
1406 return element_getitem(self_, i);
1407 }
1408 else if (PySlice_Check(item)) {
1409 Py_ssize_t start, stop, step, slicelen, cur, i;
1410 PyObject* list;
1411
1412 if (!self->extra)
1413 return PyList_New(0);
1414
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001415 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001416 self->extra->length,
1417 &start, &stop, &step, &slicelen) < 0) {
1418 return NULL;
1419 }
1420
1421 if (slicelen <= 0)
1422 return PyList_New(0);
1423 else {
1424 list = PyList_New(slicelen);
1425 if (!list)
1426 return NULL;
1427
1428 for (cur = start, i = 0; i < slicelen;
1429 cur += step, i++) {
1430 PyObject* item = self->extra->children[cur];
1431 Py_INCREF(item);
1432 PyList_SET_ITEM(list, i, item);
1433 }
1434
1435 return list;
1436 }
1437 }
1438 else {
1439 PyErr_SetString(PyExc_TypeError,
1440 "element indices must be integers");
1441 return NULL;
1442 }
1443}
1444
1445static int
1446element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1447{
1448 ElementObject* self = (ElementObject*) self_;
1449
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001450 if (PyIndex_Check(item)) {
1451 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001452
1453 if (i == -1 && PyErr_Occurred()) {
1454 return -1;
1455 }
1456 if (i < 0 && self->extra)
1457 i += self->extra->length;
1458 return element_setitem(self_, i, value);
1459 }
1460 else if (PySlice_Check(item)) {
1461 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1462
1463 PyObject* recycle = NULL;
1464 PyObject* seq = NULL;
1465
1466 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001467 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001468
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001469 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001470 self->extra->length,
1471 &start, &stop, &step, &slicelen) < 0) {
1472 return -1;
1473 }
1474
Eli Bendersky865756a2012-03-09 13:38:15 +02001475 if (value == NULL) {
1476 /* Delete slice */
1477 size_t cur;
1478 Py_ssize_t i;
1479
1480 if (slicelen <= 0)
1481 return 0;
1482
1483 /* Since we're deleting, the direction of the range doesn't matter,
1484 * so for simplicity make it always ascending.
1485 */
1486 if (step < 0) {
1487 stop = start + 1;
1488 start = stop + step * (slicelen - 1) - 1;
1489 step = -step;
1490 }
1491
1492 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1493
1494 /* recycle is a list that will contain all the children
1495 * scheduled for removal.
1496 */
1497 if (!(recycle = PyList_New(slicelen))) {
1498 PyErr_NoMemory();
1499 return -1;
1500 }
1501
1502 /* This loop walks over all the children that have to be deleted,
1503 * with cur pointing at them. num_moved is the amount of children
1504 * until the next deleted child that have to be "shifted down" to
1505 * occupy the deleted's places.
1506 * Note that in the ith iteration, shifting is done i+i places down
1507 * because i children were already removed.
1508 */
1509 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1510 /* Compute how many children have to be moved, clipping at the
1511 * list end.
1512 */
1513 Py_ssize_t num_moved = step - 1;
1514 if (cur + step >= (size_t)self->extra->length) {
1515 num_moved = self->extra->length - cur - 1;
1516 }
1517
1518 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1519
1520 memmove(
1521 self->extra->children + cur - i,
1522 self->extra->children + cur + 1,
1523 num_moved * sizeof(PyObject *));
1524 }
1525
1526 /* Leftover "tail" after the last removed child */
1527 cur = start + (size_t)slicelen * step;
1528 if (cur < (size_t)self->extra->length) {
1529 memmove(
1530 self->extra->children + cur - slicelen,
1531 self->extra->children + cur,
1532 (self->extra->length - cur) * sizeof(PyObject *));
1533 }
1534
1535 self->extra->length -= slicelen;
1536
1537 /* Discard the recycle list with all the deleted sub-elements */
1538 Py_XDECREF(recycle);
1539 return 0;
1540 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001541 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001542 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001543 seq = PySequence_Fast(value, "");
1544 if (!seq) {
1545 PyErr_Format(
1546 PyExc_TypeError,
1547 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1548 );
1549 return -1;
1550 }
1551 newlen = PySequence_Size(seq);
1552 }
1553
1554 if (step != 1 && newlen != slicelen)
1555 {
1556 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 "attempt to assign sequence of size %zd "
1558 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001559 newlen, slicelen
1560 );
1561 return -1;
1562 }
1563
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001564 /* Resize before creating the recycle bin, to prevent refleaks. */
1565 if (newlen > slicelen) {
1566 if (element_resize(self, newlen - slicelen) < 0) {
1567 if (seq) {
1568 Py_DECREF(seq);
1569 }
1570 return -1;
1571 }
1572 }
1573
1574 if (slicelen > 0) {
1575 /* to avoid recursive calls to this method (via decref), move
1576 old items to the recycle bin here, and get rid of them when
1577 we're done modifying the element */
1578 recycle = PyList_New(slicelen);
1579 if (!recycle) {
1580 if (seq) {
1581 Py_DECREF(seq);
1582 }
1583 return -1;
1584 }
1585 for (cur = start, i = 0; i < slicelen;
1586 cur += step, i++)
1587 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1588 }
1589
1590 if (newlen < slicelen) {
1591 /* delete slice */
1592 for (i = stop; i < self->extra->length; i++)
1593 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1594 } else if (newlen > slicelen) {
1595 /* insert slice */
1596 for (i = self->extra->length-1; i >= stop; i--)
1597 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1598 }
1599
1600 /* replace the slice */
1601 for (cur = start, i = 0; i < newlen;
1602 cur += step, i++) {
1603 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1604 Py_INCREF(element);
1605 self->extra->children[cur] = element;
1606 }
1607
1608 self->extra->length += newlen - slicelen;
1609
1610 if (seq) {
1611 Py_DECREF(seq);
1612 }
1613
1614 /* discard the recycle bin, and everything in it */
1615 Py_XDECREF(recycle);
1616
1617 return 0;
1618 }
1619 else {
1620 PyErr_SetString(PyExc_TypeError,
1621 "element indices must be integers");
1622 return -1;
1623 }
1624}
1625
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626static PyMethodDef element_methods[] = {
1627
Eli Bendersky0192ba32012-03-30 16:38:33 +03001628 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629
1630 {"get", (PyCFunction) element_get, METH_VARARGS},
1631 {"set", (PyCFunction) element_set, METH_VARARGS},
1632
Eli Bendersky737b1732012-05-29 06:02:56 +03001633 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1634 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1635 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636
1637 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001638 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1640 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1641
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001642 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1643 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001644 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001645
1646 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001647 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1648
1649 {"items", (PyCFunction) element_items, METH_VARARGS},
1650 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1651
1652 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1653
1654 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1655 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1656
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657 {NULL, NULL}
1658};
1659
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001660static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001661element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662{
1663 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001664 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001666 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001667 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001668
Alexander Belopolskye239d232010-12-08 23:31:48 +00001669 if (name == NULL)
1670 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001672 /* handle common attributes first */
1673 if (strcmp(name, "tag") == 0) {
1674 res = self->tag;
1675 Py_INCREF(res);
1676 return res;
1677 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001679 Py_INCREF(res);
1680 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001681 }
1682
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 /* methods */
1684 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1685 if (res)
1686 return res;
1687
1688 /* less common attributes */
1689 if (strcmp(name, "tail") == 0) {
1690 PyErr_Clear();
1691 res = element_get_tail(self);
1692 } else if (strcmp(name, "attrib") == 0) {
1693 PyErr_Clear();
1694 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001695 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001696 res = element_get_attrib(self);
1697 }
1698
1699 if (!res)
1700 return NULL;
1701
1702 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001703 return res;
1704}
1705
Eli Benderskyb20df952012-05-20 06:33:29 +03001706static PyObject*
1707element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001708{
Eli Benderskyb20df952012-05-20 06:33:29 +03001709 char *name = "";
1710 if (PyUnicode_Check(nameobj))
1711 name = _PyUnicode_AsString(nameobj);
1712
1713 if (name == NULL)
1714 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001715
1716 if (strcmp(name, "tag") == 0) {
1717 Py_DECREF(self->tag);
1718 self->tag = value;
1719 Py_INCREF(self->tag);
1720 } else if (strcmp(name, "text") == 0) {
1721 Py_DECREF(JOIN_OBJ(self->text));
1722 self->text = value;
1723 Py_INCREF(self->text);
1724 } else if (strcmp(name, "tail") == 0) {
1725 Py_DECREF(JOIN_OBJ(self->tail));
1726 self->tail = value;
1727 Py_INCREF(self->tail);
1728 } else if (strcmp(name, "attrib") == 0) {
1729 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001730 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731 Py_DECREF(self->extra->attrib);
1732 self->extra->attrib = value;
1733 Py_INCREF(self->extra->attrib);
1734 } else {
1735 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001736 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001737 }
1738
Eli Benderskyb20df952012-05-20 06:33:29 +03001739 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740}
1741
1742static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744 0, /* sq_concat */
1745 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001746 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001747 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001748 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001749 0,
1750};
1751
1752static PyMappingMethods element_as_mapping = {
1753 (lenfunc) element_length,
1754 (binaryfunc) element_subscr,
1755 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001756};
1757
Neal Norwitz227b5332006-03-22 09:28:35 +00001758static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001759 PyVarObject_HEAD_INIT(NULL, 0)
1760 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001761 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001762 (destructor)element_dealloc, /* tp_dealloc */
1763 0, /* tp_print */
1764 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001765 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001766 0, /* tp_reserved */
1767 (reprfunc)element_repr, /* tp_repr */
1768 0, /* tp_as_number */
1769 &element_as_sequence, /* tp_as_sequence */
1770 &element_as_mapping, /* tp_as_mapping */
1771 0, /* tp_hash */
1772 0, /* tp_call */
1773 0, /* tp_str */
1774 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001775 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001776 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1778 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001779 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001780 (traverseproc)element_gc_traverse, /* tp_traverse */
1781 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001782 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001783 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001784 0, /* tp_iter */
1785 0, /* tp_iternext */
1786 element_methods, /* tp_methods */
1787 0, /* tp_members */
1788 0, /* tp_getset */
1789 0, /* tp_base */
1790 0, /* tp_dict */
1791 0, /* tp_descr_get */
1792 0, /* tp_descr_set */
1793 0, /* tp_dictoffset */
1794 (initproc)element_init, /* tp_init */
1795 PyType_GenericAlloc, /* tp_alloc */
1796 element_new, /* tp_new */
1797 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001798};
1799
1800/* ==================================================================== */
1801/* the tree builder type */
1802
1803typedef struct {
1804 PyObject_HEAD
1805
1806 PyObject* root; /* root node (first created node) */
1807
1808 ElementObject* this; /* current node */
1809 ElementObject* last; /* most recently created node */
1810
1811 PyObject* data; /* data collector (string or list), or NULL */
1812
1813 PyObject* stack; /* element stack */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001814 Py_ssize_t index; /* current stack size (0=empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001815
1816 /* element tracing */
1817 PyObject* events; /* list of events, or NULL if not collecting */
1818 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1819 PyObject* end_event_obj;
1820 PyObject* start_ns_event_obj;
1821 PyObject* end_ns_event_obj;
1822
1823} TreeBuilderObject;
1824
Neal Norwitz227b5332006-03-22 09:28:35 +00001825static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826
Christian Heimes90aa7642007-12-19 02:45:37 +00001827#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001828
1829/* -------------------------------------------------------------------- */
1830/* constructor and destructor */
1831
1832LOCAL(PyObject*)
1833treebuilder_new(void)
1834{
1835 TreeBuilderObject* self;
1836
1837 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1838 if (self == NULL)
1839 return NULL;
1840
1841 self->root = NULL;
1842
1843 Py_INCREF(Py_None);
1844 self->this = (ElementObject*) Py_None;
1845
1846 Py_INCREF(Py_None);
1847 self->last = (ElementObject*) Py_None;
1848
1849 self->data = NULL;
1850
1851 self->stack = PyList_New(20);
1852 self->index = 0;
1853
1854 self->events = NULL;
1855 self->start_event_obj = self->end_event_obj = NULL;
1856 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1857
1858 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1859
1860 return (PyObject*) self;
1861}
1862
1863static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00001864treebuilder(PyObject* self_, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001865{
1866 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1867 return NULL;
1868
1869 return treebuilder_new();
1870}
1871
1872static void
1873treebuilder_dealloc(TreeBuilderObject* self)
1874{
1875 Py_XDECREF(self->end_ns_event_obj);
1876 Py_XDECREF(self->start_ns_event_obj);
1877 Py_XDECREF(self->end_event_obj);
1878 Py_XDECREF(self->start_event_obj);
1879 Py_XDECREF(self->events);
1880 Py_DECREF(self->stack);
1881 Py_XDECREF(self->data);
1882 Py_DECREF(self->last);
1883 Py_DECREF(self->this);
1884 Py_XDECREF(self->root);
1885
1886 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1887
1888 PyObject_Del(self);
1889}
1890
1891/* -------------------------------------------------------------------- */
1892/* handlers */
1893
1894LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001895treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1896 PyObject* attrib)
1897{
1898 PyObject* node;
1899 PyObject* this;
1900
1901 if (self->data) {
1902 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001903 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904 self->last->text = JOIN_SET(
1905 self->data, PyList_CheckExact(self->data)
1906 );
1907 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001908 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001909 self->last->tail = JOIN_SET(
1910 self->data, PyList_CheckExact(self->data)
1911 );
1912 }
1913 self->data = NULL;
1914 }
1915
Eli Bendersky092af1f2012-03-04 07:14:03 +02001916 node = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001917 if (!node)
1918 return NULL;
1919
1920 this = (PyObject*) self->this;
1921
1922 if (this != Py_None) {
1923 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001924 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001925 } else {
1926 if (self->root) {
1927 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001928 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001929 "multiple elements on top level"
1930 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001931 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001932 }
1933 Py_INCREF(node);
1934 self->root = node;
1935 }
1936
1937 if (self->index < PyList_GET_SIZE(self->stack)) {
1938 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001939 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001940 Py_INCREF(this);
1941 } else {
1942 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001943 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001944 }
1945 self->index++;
1946
1947 Py_DECREF(this);
1948 Py_INCREF(node);
1949 self->this = (ElementObject*) node;
1950
1951 Py_DECREF(self->last);
1952 Py_INCREF(node);
1953 self->last = (ElementObject*) node;
1954
1955 if (self->start_event_obj) {
1956 PyObject* res;
1957 PyObject* action = self->start_event_obj;
1958 res = PyTuple_New(2);
1959 if (res) {
1960 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1961 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1962 PyList_Append(self->events, res);
1963 Py_DECREF(res);
1964 } else
1965 PyErr_Clear(); /* FIXME: propagate error */
1966 }
1967
1968 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001969
1970 error:
1971 Py_DECREF(node);
1972 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001973}
1974
1975LOCAL(PyObject*)
1976treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1977{
1978 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001979 if (self->last == (ElementObject*) Py_None) {
1980 /* ignore calls to data before the first call to start */
1981 Py_RETURN_NONE;
1982 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001983 /* store the first item as is */
1984 Py_INCREF(data); self->data = data;
1985 } else {
1986 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00001987 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1988 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001989 /* expat often generates single character data sections; handle
1990 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00001991 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1992 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001993 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00001994 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001995 } else if (PyList_CheckExact(self->data)) {
1996 if (PyList_Append(self->data, data) < 0)
1997 return NULL;
1998 } else {
1999 PyObject* list = PyList_New(2);
2000 if (!list)
2001 return NULL;
2002 PyList_SET_ITEM(list, 0, self->data);
2003 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2004 self->data = list;
2005 }
2006 }
2007
2008 Py_RETURN_NONE;
2009}
2010
2011LOCAL(PyObject*)
2012treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2013{
2014 PyObject* item;
2015
2016 if (self->data) {
2017 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002018 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019 self->last->text = JOIN_SET(
2020 self->data, PyList_CheckExact(self->data)
2021 );
2022 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002023 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002024 self->last->tail = JOIN_SET(
2025 self->data, PyList_CheckExact(self->data)
2026 );
2027 }
2028 self->data = NULL;
2029 }
2030
2031 if (self->index == 0) {
2032 PyErr_SetString(
2033 PyExc_IndexError,
2034 "pop from empty stack"
2035 );
2036 return NULL;
2037 }
2038
2039 self->index--;
2040
2041 item = PyList_GET_ITEM(self->stack, self->index);
2042 Py_INCREF(item);
2043
2044 Py_DECREF(self->last);
2045
2046 self->last = (ElementObject*) self->this;
2047 self->this = (ElementObject*) item;
2048
2049 if (self->end_event_obj) {
2050 PyObject* res;
2051 PyObject* action = self->end_event_obj;
2052 PyObject* node = (PyObject*) self->last;
2053 res = PyTuple_New(2);
2054 if (res) {
2055 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2056 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2057 PyList_Append(self->events, res);
2058 Py_DECREF(res);
2059 } else
2060 PyErr_Clear(); /* FIXME: propagate error */
2061 }
2062
2063 Py_INCREF(self->last);
2064 return (PyObject*) self->last;
2065}
2066
2067LOCAL(void)
2068treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002069 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070{
2071 PyObject* res;
2072 PyObject* action;
2073 PyObject* parcel;
2074
2075 if (!self->events)
2076 return;
2077
2078 if (start) {
2079 if (!self->start_ns_event_obj)
2080 return;
2081 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002082 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002083 if (!parcel)
2084 return;
2085 Py_INCREF(action);
2086 } else {
2087 if (!self->end_ns_event_obj)
2088 return;
2089 action = self->end_ns_event_obj;
2090 Py_INCREF(action);
2091 parcel = Py_None;
2092 Py_INCREF(parcel);
2093 }
2094
2095 res = PyTuple_New(2);
2096
2097 if (res) {
2098 PyTuple_SET_ITEM(res, 0, action);
2099 PyTuple_SET_ITEM(res, 1, parcel);
2100 PyList_Append(self->events, res);
2101 Py_DECREF(res);
2102 } else
2103 PyErr_Clear(); /* FIXME: propagate error */
2104}
2105
2106/* -------------------------------------------------------------------- */
2107/* methods (in alphabetical order) */
2108
2109static PyObject*
2110treebuilder_data(TreeBuilderObject* self, PyObject* args)
2111{
2112 PyObject* data;
2113 if (!PyArg_ParseTuple(args, "O:data", &data))
2114 return NULL;
2115
2116 return treebuilder_handle_data(self, data);
2117}
2118
2119static PyObject*
2120treebuilder_end(TreeBuilderObject* self, PyObject* args)
2121{
2122 PyObject* tag;
2123 if (!PyArg_ParseTuple(args, "O:end", &tag))
2124 return NULL;
2125
2126 return treebuilder_handle_end(self, tag);
2127}
2128
2129LOCAL(PyObject*)
2130treebuilder_done(TreeBuilderObject* self)
2131{
2132 PyObject* res;
2133
2134 /* FIXME: check stack size? */
2135
2136 if (self->root)
2137 res = self->root;
2138 else
2139 res = Py_None;
2140
2141 Py_INCREF(res);
2142 return res;
2143}
2144
2145static PyObject*
2146treebuilder_close(TreeBuilderObject* self, PyObject* args)
2147{
2148 if (!PyArg_ParseTuple(args, ":close"))
2149 return NULL;
2150
2151 return treebuilder_done(self);
2152}
2153
2154static PyObject*
2155treebuilder_start(TreeBuilderObject* self, PyObject* args)
2156{
2157 PyObject* tag;
2158 PyObject* attrib = Py_None;
2159 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2160 return NULL;
2161
2162 return treebuilder_handle_start(self, tag, attrib);
2163}
2164
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002165static PyMethodDef treebuilder_methods[] = {
2166 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2167 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2168 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002169 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2170 {NULL, NULL}
2171};
2172
Neal Norwitz227b5332006-03-22 09:28:35 +00002173static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002174 PyVarObject_HEAD_INIT(NULL, 0)
2175 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176 /* methods */
2177 (destructor)treebuilder_dealloc, /* tp_dealloc */
2178 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002179 0, /* tp_getattr */
2180 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002181 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00002182 0, /* tp_repr */
2183 0, /* tp_as_number */
2184 0, /* tp_as_sequence */
2185 0, /* tp_as_mapping */
2186 0, /* tp_hash */
2187 0, /* tp_call */
2188 0, /* tp_str */
2189 0, /* tp_getattro */
2190 0, /* tp_setattro */
2191 0, /* tp_as_buffer */
2192 Py_TPFLAGS_DEFAULT, /* tp_flags */
2193 0, /* tp_doc */
2194 0, /* tp_traverse */
2195 0, /* tp_clear */
2196 0, /* tp_richcompare */
2197 0, /* tp_weaklistoffset */
2198 0, /* tp_iter */
2199 0, /* tp_iternext */
2200 treebuilder_methods, /* tp_methods */
2201 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002202};
2203
2204/* ==================================================================== */
2205/* the expat interface */
2206
2207#if defined(USE_EXPAT)
2208
2209#include "expat.h"
2210
2211#if defined(USE_PYEXPAT_CAPI)
2212#include "pyexpat.h"
2213static struct PyExpat_CAPI* expat_capi;
2214#define EXPAT(func) (expat_capi->func)
2215#else
2216#define EXPAT(func) (XML_##func)
2217#endif
2218
2219typedef struct {
2220 PyObject_HEAD
2221
2222 XML_Parser parser;
2223
2224 PyObject* target;
2225 PyObject* entity;
2226
2227 PyObject* names;
2228
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002229 PyObject* handle_start;
2230 PyObject* handle_data;
2231 PyObject* handle_end;
2232
2233 PyObject* handle_comment;
2234 PyObject* handle_pi;
2235
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002236 PyObject* handle_close;
2237
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002238} XMLParserObject;
2239
Neal Norwitz227b5332006-03-22 09:28:35 +00002240static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002241
2242/* helpers */
2243
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002244LOCAL(PyObject*)
2245makeuniversal(XMLParserObject* self, const char* string)
2246{
2247 /* convert a UTF-8 tag/attribute name from the expat parser
2248 to a universal name string */
2249
2250 int size = strlen(string);
2251 PyObject* key;
2252 PyObject* value;
2253
2254 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002255 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002256 if (!key)
2257 return NULL;
2258
2259 value = PyDict_GetItem(self->names, key);
2260
2261 if (value) {
2262 Py_INCREF(value);
2263 } else {
2264 /* new name. convert to universal name, and decode as
2265 necessary */
2266
2267 PyObject* tag;
2268 char* p;
2269 int i;
2270
2271 /* look for namespace separator */
2272 for (i = 0; i < size; i++)
2273 if (string[i] == '}')
2274 break;
2275 if (i != size) {
2276 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002277 tag = PyBytes_FromStringAndSize(NULL, size+1);
2278 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279 p[0] = '{';
2280 memcpy(p+1, string, size);
2281 size++;
2282 } else {
2283 /* plain name; use key as tag */
2284 Py_INCREF(key);
2285 tag = key;
2286 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002289 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002290 value = PyUnicode_DecodeUTF8(p, size, "strict");
2291 Py_DECREF(tag);
2292 if (!value) {
2293 Py_DECREF(key);
2294 return NULL;
2295 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296
2297 /* add to names dictionary */
2298 if (PyDict_SetItem(self->names, key, value) < 0) {
2299 Py_DECREF(key);
2300 Py_DECREF(value);
2301 return NULL;
2302 }
2303 }
2304
2305 Py_DECREF(key);
2306 return value;
2307}
2308
Eli Bendersky5b77d812012-03-16 08:20:05 +02002309/* Set the ParseError exception with the given parameters.
2310 * If message is not NULL, it's used as the error string. Otherwise, the
2311 * message string is the default for the given error_code.
2312*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002313static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002314expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002315{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002316 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002317
Victor Stinner499dfcf2011-03-21 13:26:24 +01002318 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002319 message ? message : EXPAT(ErrorString)(error_code),
2320 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002321 if (errmsg == NULL)
2322 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002323
Victor Stinner499dfcf2011-03-21 13:26:24 +01002324 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2325 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002326 if (!error)
2327 return;
2328
Eli Bendersky5b77d812012-03-16 08:20:05 +02002329 /* Add code and position attributes */
2330 code = PyLong_FromLong((long)error_code);
2331 if (!code) {
2332 Py_DECREF(error);
2333 return;
2334 }
2335 if (PyObject_SetAttrString(error, "code", code) == -1) {
2336 Py_DECREF(error);
2337 Py_DECREF(code);
2338 return;
2339 }
2340 Py_DECREF(code);
2341
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002342 position = Py_BuildValue("(ii)", line, column);
2343 if (!position) {
2344 Py_DECREF(error);
2345 return;
2346 }
2347 if (PyObject_SetAttrString(error, "position", position) == -1) {
2348 Py_DECREF(error);
2349 Py_DECREF(position);
2350 return;
2351 }
2352 Py_DECREF(position);
2353
2354 PyErr_SetObject(elementtree_parseerror_obj, error);
2355 Py_DECREF(error);
2356}
2357
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358/* -------------------------------------------------------------------- */
2359/* handlers */
2360
2361static void
2362expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2363 int data_len)
2364{
2365 PyObject* key;
2366 PyObject* value;
2367 PyObject* res;
2368
2369 if (data_len < 2 || data_in[0] != '&')
2370 return;
2371
Neal Norwitz0269b912007-08-08 06:56:02 +00002372 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373 if (!key)
2374 return;
2375
2376 value = PyDict_GetItem(self->entity, key);
2377
2378 if (value) {
2379 if (TreeBuilder_CheckExact(self->target))
2380 res = treebuilder_handle_data(
2381 (TreeBuilderObject*) self->target, value
2382 );
2383 else if (self->handle_data)
2384 res = PyObject_CallFunction(self->handle_data, "O", value);
2385 else
2386 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002388 } else if (!PyErr_Occurred()) {
2389 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002390 char message[128] = "undefined entity ";
2391 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002392 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002393 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002395 EXPAT(GetErrorColumnNumber)(self->parser),
2396 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397 );
2398 }
2399
2400 Py_DECREF(key);
2401}
2402
2403static void
2404expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2405 const XML_Char **attrib_in)
2406{
2407 PyObject* res;
2408 PyObject* tag;
2409 PyObject* attrib;
2410 int ok;
2411
2412 /* tag name */
2413 tag = makeuniversal(self, tag_in);
2414 if (!tag)
2415 return; /* parser will look for errors */
2416
2417 /* attributes */
2418 if (attrib_in[0]) {
2419 attrib = PyDict_New();
2420 if (!attrib)
2421 return;
2422 while (attrib_in[0] && attrib_in[1]) {
2423 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002424 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425 if (!key || !value) {
2426 Py_XDECREF(value);
2427 Py_XDECREF(key);
2428 Py_DECREF(attrib);
2429 return;
2430 }
2431 ok = PyDict_SetItem(attrib, key, value);
2432 Py_DECREF(value);
2433 Py_DECREF(key);
2434 if (ok < 0) {
2435 Py_DECREF(attrib);
2436 return;
2437 }
2438 attrib_in += 2;
2439 }
2440 } else {
2441 Py_INCREF(Py_None);
2442 attrib = Py_None;
2443 }
2444
2445 if (TreeBuilder_CheckExact(self->target))
2446 /* shortcut */
2447 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2448 tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002449 else if (self->handle_start) {
2450 if (attrib == Py_None) {
2451 Py_DECREF(attrib);
2452 attrib = PyDict_New();
2453 if (!attrib)
2454 return;
2455 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002456 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002457 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002458 res = NULL;
2459
2460 Py_DECREF(tag);
2461 Py_DECREF(attrib);
2462
2463 Py_XDECREF(res);
2464}
2465
2466static void
2467expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2468 int data_len)
2469{
2470 PyObject* data;
2471 PyObject* res;
2472
Neal Norwitz0269b912007-08-08 06:56:02 +00002473 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002474 if (!data)
2475 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476
2477 if (TreeBuilder_CheckExact(self->target))
2478 /* shortcut */
2479 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2480 else if (self->handle_data)
2481 res = PyObject_CallFunction(self->handle_data, "O", data);
2482 else
2483 res = NULL;
2484
2485 Py_DECREF(data);
2486
2487 Py_XDECREF(res);
2488}
2489
2490static void
2491expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2492{
2493 PyObject* tag;
2494 PyObject* res = NULL;
2495
2496 if (TreeBuilder_CheckExact(self->target))
2497 /* shortcut */
2498 /* the standard tree builder doesn't look at the end tag */
2499 res = treebuilder_handle_end(
2500 (TreeBuilderObject*) self->target, Py_None
2501 );
2502 else if (self->handle_end) {
2503 tag = makeuniversal(self, tag_in);
2504 if (tag) {
2505 res = PyObject_CallFunction(self->handle_end, "O", tag);
2506 Py_DECREF(tag);
2507 }
2508 }
2509
2510 Py_XDECREF(res);
2511}
2512
2513static void
2514expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2515 const XML_Char *uri)
2516{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002517 PyObject* sprefix = NULL;
2518 PyObject* suri = NULL;
2519
2520 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2521 if (!suri)
2522 return;
2523
2524 if (prefix)
2525 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2526 else
2527 sprefix = PyUnicode_FromString("");
2528 if (!sprefix) {
2529 Py_DECREF(suri);
2530 return;
2531 }
2532
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002534 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002536
2537 Py_DECREF(sprefix);
2538 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539}
2540
2541static void
2542expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2543{
2544 treebuilder_handle_namespace(
2545 (TreeBuilderObject*) self->target, 0, NULL, NULL
2546 );
2547}
2548
2549static void
2550expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2551{
2552 PyObject* comment;
2553 PyObject* res;
2554
2555 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002556 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557 if (comment) {
2558 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2559 Py_XDECREF(res);
2560 Py_DECREF(comment);
2561 }
2562 }
2563}
2564
2565static void
2566expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2567 const XML_Char* data_in)
2568{
2569 PyObject* target;
2570 PyObject* data;
2571 PyObject* res;
2572
2573 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002574 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2575 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 if (target && data) {
2577 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2578 Py_XDECREF(res);
2579 Py_DECREF(data);
2580 Py_DECREF(target);
2581 } else {
2582 Py_XDECREF(data);
2583 Py_XDECREF(target);
2584 }
2585 }
2586}
2587
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588static int
2589expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2590 XML_Encoding *info)
2591{
2592 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002593 unsigned char s[256];
2594 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002595 void *data;
2596 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002597
2598 memset(info, 0, sizeof(XML_Encoding));
2599
2600 for (i = 0; i < 256; i++)
2601 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002602
Fredrik Lundhc3389992005-12-25 11:40:19 +00002603 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 if (!u)
2605 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002606 if (PyUnicode_READY(u))
2607 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002609 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 Py_DECREF(u);
2611 return XML_STATUS_ERROR;
2612 }
2613
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002614 kind = PyUnicode_KIND(u);
2615 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002617 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2618 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2619 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002621 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622 }
2623
2624 Py_DECREF(u);
2625
2626 return XML_STATUS_OK;
2627}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628
2629/* -------------------------------------------------------------------- */
2630/* constructor and destructor */
2631
2632static PyObject*
Thomas Wouters73e5a5b2006-06-08 15:35:45 +00002633xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634{
2635 XMLParserObject* self;
2636 /* FIXME: does this need to be static? */
2637 static XML_Memory_Handling_Suite memory_handler;
2638
2639 PyObject* target = NULL;
2640 char* encoding = NULL;
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +00002641 static char* kwlist[] = { "target", "encoding", NULL };
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002642 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2643 &target, &encoding))
2644 return NULL;
2645
2646#if defined(USE_PYEXPAT_CAPI)
2647 if (!expat_capi) {
2648 PyErr_SetString(
2649 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2650 );
2651 return NULL;
2652 }
2653#endif
2654
2655 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2656 if (self == NULL)
2657 return NULL;
2658
2659 self->entity = PyDict_New();
2660 if (!self->entity) {
2661 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002662 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002664
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002665 self->names = PyDict_New();
2666 if (!self->names) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002667 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002669 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002670 }
2671
2672 memory_handler.malloc_fcn = PyObject_Malloc;
2673 memory_handler.realloc_fcn = PyObject_Realloc;
2674 memory_handler.free_fcn = PyObject_Free;
2675
2676 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2677 if (!self->parser) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002678 PyObject_Del(self->names);
2679 PyObject_Del(self->entity);
2680 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681 PyErr_NoMemory();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002682 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683 }
2684
2685 /* setup target handlers */
2686 if (!target) {
2687 target = treebuilder_new();
2688 if (!target) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002689 EXPAT(ParserFree)(self->parser);
2690 PyObject_Del(self->names);
2691 PyObject_Del(self->entity);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692 PyObject_Del(self);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002693 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694 }
2695 } else
2696 Py_INCREF(target);
2697 self->target = target;
2698
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002699 self->handle_start = PyObject_GetAttrString(target, "start");
2700 self->handle_data = PyObject_GetAttrString(target, "data");
2701 self->handle_end = PyObject_GetAttrString(target, "end");
2702 self->handle_comment = PyObject_GetAttrString(target, "comment");
2703 self->handle_pi = PyObject_GetAttrString(target, "pi");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002704 self->handle_close = PyObject_GetAttrString(target, "close");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705
2706 PyErr_Clear();
2707
2708 /* configure parser */
2709 EXPAT(SetUserData)(self->parser, self);
2710 EXPAT(SetElementHandler)(
2711 self->parser,
2712 (XML_StartElementHandler) expat_start_handler,
2713 (XML_EndElementHandler) expat_end_handler
2714 );
2715 EXPAT(SetDefaultHandlerExpand)(
2716 self->parser,
2717 (XML_DefaultHandler) expat_default_handler
2718 );
2719 EXPAT(SetCharacterDataHandler)(
2720 self->parser,
2721 (XML_CharacterDataHandler) expat_data_handler
2722 );
2723 if (self->handle_comment)
2724 EXPAT(SetCommentHandler)(
2725 self->parser,
2726 (XML_CommentHandler) expat_comment_handler
2727 );
2728 if (self->handle_pi)
2729 EXPAT(SetProcessingInstructionHandler)(
2730 self->parser,
2731 (XML_ProcessingInstructionHandler) expat_pi_handler
2732 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733 EXPAT(SetUnknownEncodingHandler)(
2734 self->parser,
2735 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2736 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737
2738 ALLOC(sizeof(XMLParserObject), "create expatparser");
2739
2740 return (PyObject*) self;
2741}
2742
2743static void
2744xmlparser_dealloc(XMLParserObject* self)
2745{
2746 EXPAT(ParserFree)(self->parser);
2747
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002748 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749 Py_XDECREF(self->handle_pi);
2750 Py_XDECREF(self->handle_comment);
2751 Py_XDECREF(self->handle_end);
2752 Py_XDECREF(self->handle_data);
2753 Py_XDECREF(self->handle_start);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754
2755 Py_DECREF(self->target);
2756 Py_DECREF(self->entity);
2757 Py_DECREF(self->names);
2758
2759 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2760
2761 PyObject_Del(self);
2762}
2763
2764/* -------------------------------------------------------------------- */
2765/* methods (in alphabetical order) */
2766
2767LOCAL(PyObject*)
2768expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2769{
2770 int ok;
2771
2772 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2773
2774 if (PyErr_Occurred())
2775 return NULL;
2776
2777 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002778 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002779 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002781 EXPAT(GetErrorColumnNumber)(self->parser),
2782 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783 );
2784 return NULL;
2785 }
2786
2787 Py_RETURN_NONE;
2788}
2789
2790static PyObject*
2791xmlparser_close(XMLParserObject* self, PyObject* args)
2792{
2793 /* end feeding data to parser */
2794
2795 PyObject* res;
2796 if (!PyArg_ParseTuple(args, ":close"))
2797 return NULL;
2798
2799 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002800 if (!res)
2801 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002803 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804 Py_DECREF(res);
2805 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002806 } if (self->handle_close) {
2807 Py_DECREF(res);
2808 return PyObject_CallFunction(self->handle_close, "");
2809 } else
2810 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811}
2812
2813static PyObject*
2814xmlparser_feed(XMLParserObject* self, PyObject* args)
2815{
2816 /* feed data to parser */
2817
2818 char* data;
2819 int data_len;
2820 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2821 return NULL;
2822
2823 return expat_parse(self, data, data_len, 0);
2824}
2825
2826static PyObject*
2827xmlparser_parse(XMLParserObject* self, PyObject* args)
2828{
2829 /* (internal) parse until end of input stream */
2830
2831 PyObject* reader;
2832 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02002833 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834 PyObject* res;
2835
2836 PyObject* fileobj;
2837 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2838 return NULL;
2839
2840 reader = PyObject_GetAttrString(fileobj, "read");
2841 if (!reader)
2842 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002843
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002844 /* read from open file object */
2845 for (;;) {
2846
2847 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2848
2849 if (!buffer) {
2850 /* read failed (e.g. due to KeyboardInterrupt) */
2851 Py_DECREF(reader);
2852 return NULL;
2853 }
2854
Eli Benderskyf996e772012-03-16 05:53:30 +02002855 if (PyUnicode_CheckExact(buffer)) {
2856 /* A unicode object is encoded into bytes using UTF-8 */
2857 if (PyUnicode_GET_SIZE(buffer) == 0) {
2858 Py_DECREF(buffer);
2859 break;
2860 }
2861 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
2862 if (!temp) {
2863 /* Propagate exception from PyUnicode_AsEncodedString */
2864 Py_DECREF(buffer);
2865 Py_DECREF(reader);
2866 return NULL;
2867 }
2868
2869 /* Here we no longer need the original buffer since it contains
2870 * unicode. Make it point to the encoded bytes object.
2871 */
2872 Py_DECREF(buffer);
2873 buffer = temp;
2874 }
2875 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002876 Py_DECREF(buffer);
2877 break;
2878 }
2879
2880 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00002881 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002882 );
2883
2884 Py_DECREF(buffer);
2885
2886 if (!res) {
2887 Py_DECREF(reader);
2888 return NULL;
2889 }
2890 Py_DECREF(res);
2891
2892 }
2893
2894 Py_DECREF(reader);
2895
2896 res = expat_parse(self, "", 0, 1);
2897
2898 if (res && TreeBuilder_CheckExact(self->target)) {
2899 Py_DECREF(res);
2900 return treebuilder_done((TreeBuilderObject*) self->target);
2901 }
2902
2903 return res;
2904}
2905
2906static PyObject*
2907xmlparser_setevents(XMLParserObject* self, PyObject* args)
2908{
2909 /* activate element event reporting */
2910
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002911 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002912 TreeBuilderObject* target;
2913
2914 PyObject* events; /* event collector */
2915 PyObject* event_set = Py_None;
2916 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2917 &event_set))
2918 return NULL;
2919
2920 if (!TreeBuilder_CheckExact(self->target)) {
2921 PyErr_SetString(
2922 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002923 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 "targets"
2925 );
2926 return NULL;
2927 }
2928
2929 target = (TreeBuilderObject*) self->target;
2930
2931 Py_INCREF(events);
2932 Py_XDECREF(target->events);
2933 target->events = events;
2934
2935 /* clear out existing events */
2936 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2937 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2938 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2939 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2940
2941 if (event_set == Py_None) {
2942 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002943 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 Py_RETURN_NONE;
2945 }
2946
2947 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2948 goto error;
2949
2950 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2951 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2952 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002953 if (PyUnicode_Check(item)) {
2954 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00002955 if (event == NULL)
2956 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002957 } else if (PyBytes_Check(item))
2958 event = PyBytes_AS_STRING(item);
2959 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002961 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 if (strcmp(event, "start") == 0) {
2963 Py_INCREF(item);
2964 target->start_event_obj = item;
2965 } else if (strcmp(event, "end") == 0) {
2966 Py_INCREF(item);
2967 Py_XDECREF(target->end_event_obj);
2968 target->end_event_obj = item;
2969 } else if (strcmp(event, "start-ns") == 0) {
2970 Py_INCREF(item);
2971 Py_XDECREF(target->start_ns_event_obj);
2972 target->start_ns_event_obj = item;
2973 EXPAT(SetNamespaceDeclHandler)(
2974 self->parser,
2975 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2976 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2977 );
2978 } else if (strcmp(event, "end-ns") == 0) {
2979 Py_INCREF(item);
2980 Py_XDECREF(target->end_ns_event_obj);
2981 target->end_ns_event_obj = item;
2982 EXPAT(SetNamespaceDeclHandler)(
2983 self->parser,
2984 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2985 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2986 );
2987 } else {
2988 PyErr_Format(
2989 PyExc_ValueError,
2990 "unknown event '%s'", event
2991 );
2992 return NULL;
2993 }
2994 }
2995
2996 Py_RETURN_NONE;
2997
2998 error:
2999 PyErr_SetString(
3000 PyExc_TypeError,
3001 "invalid event tuple"
3002 );
3003 return NULL;
3004}
3005
3006static PyMethodDef xmlparser_methods[] = {
3007 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3008 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3009 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3010 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
3011 {NULL, NULL}
3012};
3013
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003014static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003015xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003017 if (PyUnicode_Check(nameobj)) {
3018 PyObject* res;
3019 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3020 res = self->entity;
3021 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3022 res = self->target;
3023 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3024 return PyUnicode_FromFormat(
3025 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003027 }
3028 else
3029 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003030
Alexander Belopolskye239d232010-12-08 23:31:48 +00003031 Py_INCREF(res);
3032 return res;
3033 }
3034 generic:
3035 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036}
3037
Neal Norwitz227b5332006-03-22 09:28:35 +00003038static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003039 PyVarObject_HEAD_INIT(NULL, 0)
3040 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041 /* methods */
3042 (destructor)xmlparser_dealloc, /* tp_dealloc */
3043 0, /* tp_print */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003044 0, /* tp_getattr */
3045 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003046 0, /* tp_reserved */
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003047 0, /* tp_repr */
3048 0, /* tp_as_number */
3049 0, /* tp_as_sequence */
3050 0, /* tp_as_mapping */
3051 0, /* tp_hash */
3052 0, /* tp_call */
3053 0, /* tp_str */
3054 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3055 0, /* tp_setattro */
3056 0, /* tp_as_buffer */
3057 Py_TPFLAGS_DEFAULT, /* tp_flags */
3058 0, /* tp_doc */
3059 0, /* tp_traverse */
3060 0, /* tp_clear */
3061 0, /* tp_richcompare */
3062 0, /* tp_weaklistoffset */
3063 0, /* tp_iter */
3064 0, /* tp_iternext */
3065 xmlparser_methods, /* tp_methods */
3066 0, /* tp_members */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067};
3068
3069#endif
3070
3071/* ==================================================================== */
3072/* python module interface */
3073
3074static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
3076 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
3077#if defined(USE_EXPAT)
3078 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079#endif
3080 {NULL, NULL}
3081};
3082
Martin v. Löwis1a214512008-06-11 05:26:20 +00003083
3084static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003085 PyModuleDef_HEAD_INIT,
3086 "_elementtree",
3087 NULL,
3088 -1,
3089 _functions,
3090 NULL,
3091 NULL,
3092 NULL,
3093 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003094};
3095
Neal Norwitzf6657e62006-12-28 04:47:50 +00003096PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003097PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098{
Eli Bendersky828efde2012-04-05 05:40:58 +03003099 PyObject *m, *g, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003100 char* bootstrap;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003102 /* Initialize object types */
3103 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003104 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003105 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003106 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003108 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003109 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003110#endif
3111
Martin v. Löwis1a214512008-06-11 05:26:20 +00003112 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003113 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003114 return NULL;
3115
3116 /* The code below requires that the module gets already added
3117 to sys.modules. */
3118 PyDict_SetItemString(PyImport_GetModuleDict(),
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003119 _elementtreemodule.m_name,
3120 m);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121
3122 /* python glue code */
3123
3124 g = PyDict_New();
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003125 if (!g)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003126 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127
3128 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
3129
3130 bootstrap = (
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003131 "def iter(node, tag=None):\n" /* helper */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132 " if tag == '*':\n"
3133 " tag = None\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134 " if tag is None or node.tag == tag:\n"
3135 " yield node\n"
3136 " for node in node:\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003137 " for node in iter(node, tag):\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003138 " yield node\n"
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003139
3140 "def itertext(node):\n" /* helper */
3141 " if node.text:\n"
3142 " yield node.text\n"
3143 " for e in node:\n"
3144 " for s in e.itertext():\n"
3145 " yield s\n"
3146 " if e.tail:\n"
3147 " yield e.tail\n"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003148
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003149 );
3150
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003151 if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
3152 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153
Eli Bendersky828efde2012-04-05 05:40:58 +03003154 if (!(temp = PyImport_ImportModule("copy")))
3155 return NULL;
3156 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3157 Py_XDECREF(temp);
3158
3159 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3160 return NULL;
3161
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003162 elementtree_iter_obj = PyDict_GetItemString(g, "iter");
3163 elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003164
3165#if defined(USE_PYEXPAT_CAPI)
3166 /* link against pyexpat, if possible */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003167 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3168 if (expat_capi) {
3169 /* check that it's usable */
3170 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3171 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3172 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3173 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
3174 expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
3175 expat_capi = NULL;
3176 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003177#endif
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003178
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003179 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003180 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003181 );
3182 Py_INCREF(elementtree_parseerror_obj);
3183 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3184
Eli Bendersky092af1f2012-03-04 07:14:03 +02003185 Py_INCREF((PyObject *)&Element_Type);
3186 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3187
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003188 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003189}