blob: 6415797f5690a6e7e384b98e2783c2335e7aa079 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
126 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
129
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
131 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
132
133 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
134
135 Py_DECREF(args);
136
137 return result;
138}
139
140LOCAL(PyObject*)
141list_join(PyObject* list)
142{
143 /* join list elements (destroying the list in the process) */
144
145 PyObject* joiner;
146 PyObject* function;
147 PyObject* args;
148 PyObject* result;
149
150 switch (PyList_GET_SIZE(list)) {
151 case 0:
152 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000153 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 case 1:
155 result = PyList_GET_ITEM(list, 0);
156 Py_INCREF(result);
157 Py_DECREF(list);
158 return result;
159 }
160
161 /* two or more elements: slice out a suitable separator from the
162 first member, and use that to join the entire list */
163
164 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
165 if (!joiner)
166 return NULL;
167
168 function = PyObject_GetAttrString(joiner, "join");
169 if (!function) {
170 Py_DECREF(joiner);
171 return NULL;
172 }
173
174 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000175 if (!args)
176 return NULL;
177
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 PyTuple_SET_ITEM(args, 0, list);
179
180 result = PyObject_CallObject(function, args);
181
182 Py_DECREF(args); /* also removes list */
183 Py_DECREF(function);
184 Py_DECREF(joiner);
185
186 return result;
187}
188
Eli Bendersky48d358b2012-05-30 17:57:50 +0300189/* Is the given object an empty dictionary?
190*/
191static int
192is_empty_dict(PyObject *obj)
193{
194 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
195}
196
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200199/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
201typedef struct {
202
203 /* attributes (a dictionary object), or None if no attributes */
204 PyObject* attrib;
205
206 /* child elements */
207 int length; /* actual number of items */
208 int allocated; /* allocated items */
209
210 /* this either points to _children or to a malloced buffer */
211 PyObject* *children;
212
213 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215} ElementObjectExtra;
216
217typedef struct {
218 PyObject_HEAD
219
220 /* element tag (a string). */
221 PyObject* tag;
222
223 /* text before first child. note that this is a tagged pointer;
224 use JOIN_OBJ to get the object pointer. the join flag is used
225 to distinguish lists created by the tree builder from lists
226 assigned to the attribute by application code; the former
227 should be joined before being returned to the user, the latter
228 should be left intact. */
229 PyObject* text;
230
231 /* text after this element, in parent. note that this is a tagged
232 pointer; use JOIN_OBJ to get the object pointer. */
233 PyObject* tail;
234
235 ElementObjectExtra* extra;
236
Eli Benderskyebf37a22012-04-03 22:02:37 +0300237 PyObject *weakreflist; /* For tp_weaklistoffset */
238
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000239} ElementObject;
240
Neal Norwitz227b5332006-03-22 09:28:35 +0000241static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000242
Christian Heimes90aa7642007-12-19 02:45:37 +0000243#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
245/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200246/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
248LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200249create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250{
251 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
252 if (!self->extra)
253 return -1;
254
255 if (!attrib)
256 attrib = Py_None;
257
258 Py_INCREF(attrib);
259 self->extra->attrib = attrib;
260
261 self->extra->length = 0;
262 self->extra->allocated = STATIC_CHILDREN;
263 self->extra->children = self->extra->_children;
264
265 return 0;
266}
267
268LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270{
Eli Bendersky08b85292012-04-04 15:55:07 +0300271 ElementObjectExtra *myextra;
272 int i;
273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 if (!self->extra)
275 return;
276
277 /* Avoid DECREFs calling into this code again (cycles, etc.)
278 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300279 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 self->extra = NULL;
281
282 Py_DECREF(myextra->attrib);
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 for (i = 0; i < myextra->length; i++)
285 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286
Eli Benderskyebf37a22012-04-03 22:02:37 +0300287 if (myextra->children != myextra->_children)
288 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289
Eli Benderskyebf37a22012-04-03 22:02:37 +0300290 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000291}
292
Eli Bendersky092af1f2012-03-04 07:14:03 +0200293/* Convenience internal function to create new Element objects with the given
294 * tag and attributes.
295*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000296LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200297create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298{
299 ElementObject* self;
300
Eli Bendersky0192ba32012-03-30 16:38:33 +0300301 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000302 if (self == NULL)
303 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 self->extra = NULL;
305
Eli Bendersky48d358b2012-05-30 17:57:50 +0300306 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000308 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000310 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 }
312
313 Py_INCREF(tag);
314 self->tag = tag;
315
316 Py_INCREF(Py_None);
317 self->text = Py_None;
318
319 Py_INCREF(Py_None);
320 self->tail = Py_None;
321
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 self->weakreflist = NULL;
323
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000324 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300325 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000326 return (PyObject*) self;
327}
328
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329static PyObject *
330element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
331{
332 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
333 if (e != NULL) {
334 Py_INCREF(Py_None);
335 e->tag = Py_None;
336
337 Py_INCREF(Py_None);
338 e->text = Py_None;
339
340 Py_INCREF(Py_None);
341 e->tail = Py_None;
342
343 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300344 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200345 }
346 return (PyObject *)e;
347}
348
Eli Bendersky737b1732012-05-29 06:02:56 +0300349/* Helper function for extracting the attrib dictionary from a keywords dict.
350 * This is required by some constructors/functions in this module that can
351 * either accept attrib as a keyword argument or all attributes splashed
352 * directly into *kwds.
353 * If there is no 'attrib' keyword, return an empty dict.
354 */
355static PyObject*
356get_attrib_from_keywords(PyObject *kwds)
357{
358 PyObject *attrib_str = PyUnicode_FromString("attrib");
359 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
360
361 if (attrib) {
362 /* If attrib was found in kwds, copy its value and remove it from
363 * kwds
364 */
365 if (!PyDict_Check(attrib)) {
366 Py_DECREF(attrib_str);
367 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
368 Py_TYPE(attrib)->tp_name);
369 return NULL;
370 }
371 attrib = PyDict_Copy(attrib);
372 PyDict_DelItem(kwds, attrib_str);
373 } else {
374 attrib = PyDict_New();
375 }
376
377 Py_DECREF(attrib_str);
378
379 if (attrib)
380 PyDict_Update(attrib, kwds);
381 return attrib;
382}
383
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384static int
385element_init(PyObject *self, PyObject *args, PyObject *kwds)
386{
387 PyObject *tag;
388 PyObject *tmp;
389 PyObject *attrib = NULL;
390 ElementObject *self_elem;
391
392 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
393 return -1;
394
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 if (attrib) {
396 /* attrib passed as positional arg */
397 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 if (!attrib)
399 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 if (kwds) {
401 if (PyDict_Update(attrib, kwds) < 0) {
402 return -1;
403 }
404 }
405 } else if (kwds) {
406 /* have keywords args */
407 attrib = get_attrib_from_keywords(kwds);
408 if (!attrib)
409 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300411 /* no attrib arg, no kwds, so no attributes */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 Py_INCREF(Py_None);
413 attrib = Py_None;
414 }
415
416 self_elem = (ElementObject *)self;
417
Eli Bendersky48d358b2012-05-30 17:57:50 +0300418 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 if (create_extra(self_elem, attrib) < 0) {
420 PyObject_Del(self_elem);
421 return -1;
422 }
423 }
424
Eli Bendersky48d358b2012-05-30 17:57:50 +0300425 /* We own a reference to attrib here and it's no longer needed. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(attrib);
427
428 /* Replace the objects already pointed to by tag, text and tail. */
429 tmp = self_elem->tag;
430 self_elem->tag = tag;
431 Py_INCREF(tag);
432 Py_DECREF(tmp);
433
434 tmp = self_elem->text;
435 self_elem->text = Py_None;
436 Py_INCREF(Py_None);
437 Py_DECREF(JOIN_OBJ(tmp));
438
439 tmp = self_elem->tail;
440 self_elem->tail = Py_None;
441 Py_INCREF(Py_None);
442 Py_DECREF(JOIN_OBJ(tmp));
443
444 return 0;
445}
446
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447LOCAL(int)
448element_resize(ElementObject* self, int extra)
449{
450 int size;
451 PyObject* *children;
452
453 /* make sure self->children can hold the given number of extra
454 elements. set an exception and return -1 if allocation failed */
455
456 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200457 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000458
459 size = self->extra->length + extra;
460
461 if (size > self->extra->allocated) {
462 /* use Python 2.4's list growth strategy */
463 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100465 * which needs at least 4 bytes.
466 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000467 * be safe.
468 */
469 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000470 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100472 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000473 * false alarm always assume at least one child to be safe.
474 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000475 children = PyObject_Realloc(self->extra->children,
476 size * sizeof(PyObject*));
477 if (!children)
478 goto nomemory;
479 } else {
480 children = PyObject_Malloc(size * sizeof(PyObject*));
481 if (!children)
482 goto nomemory;
483 /* copy existing children from static area to malloc buffer */
484 memcpy(children, self->extra->children,
485 self->extra->length * sizeof(PyObject*));
486 }
487 self->extra->children = children;
488 self->extra->allocated = size;
489 }
490
491 return 0;
492
493 nomemory:
494 PyErr_NoMemory();
495 return -1;
496}
497
498LOCAL(int)
499element_add_subelement(ElementObject* self, PyObject* element)
500{
501 /* add a child element to a parent */
502
503 if (element_resize(self, 1) < 0)
504 return -1;
505
506 Py_INCREF(element);
507 self->extra->children[self->extra->length] = element;
508
509 self->extra->length++;
510
511 return 0;
512}
513
514LOCAL(PyObject*)
515element_get_attrib(ElementObject* self)
516{
517 /* return borrowed reference to attrib dictionary */
518 /* note: this function assumes that the extra section exists */
519
520 PyObject* res = self->extra->attrib;
521
522 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000523 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524 /* create missing dictionary */
525 res = PyDict_New();
526 if (!res)
527 return NULL;
528 self->extra->attrib = res;
529 }
530
531 return res;
532}
533
534LOCAL(PyObject*)
535element_get_text(ElementObject* self)
536{
537 /* return borrowed reference to text attribute */
538
539 PyObject* res = self->text;
540
541 if (JOIN_GET(res)) {
542 res = JOIN_OBJ(res);
543 if (PyList_CheckExact(res)) {
544 res = list_join(res);
545 if (!res)
546 return NULL;
547 self->text = res;
548 }
549 }
550
551 return res;
552}
553
554LOCAL(PyObject*)
555element_get_tail(ElementObject* self)
556{
557 /* return borrowed reference to text attribute */
558
559 PyObject* res = self->tail;
560
561 if (JOIN_GET(res)) {
562 res = JOIN_OBJ(res);
563 if (PyList_CheckExact(res)) {
564 res = list_join(res);
565 if (!res)
566 return NULL;
567 self->tail = res;
568 }
569 }
570
571 return res;
572}
573
574static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300575subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576{
577 PyObject* elem;
578
579 ElementObject* parent;
580 PyObject* tag;
581 PyObject* attrib = NULL;
582 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
583 &Element_Type, &parent, &tag,
584 &PyDict_Type, &attrib))
585 return NULL;
586
Eli Bendersky737b1732012-05-29 06:02:56 +0300587 if (attrib) {
588 /* attrib passed as positional arg */
589 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 if (!attrib)
591 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300592 if (kwds) {
593 if (PyDict_Update(attrib, kwds) < 0) {
594 return NULL;
595 }
596 }
597 } else if (kwds) {
598 /* have keyword args */
599 attrib = get_attrib_from_keywords(kwds);
600 if (!attrib)
601 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604 Py_INCREF(Py_None);
605 attrib = Py_None;
606 }
607
Eli Bendersky092af1f2012-03-04 07:14:03 +0200608 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609
610 Py_DECREF(attrib);
611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
628 int i;
629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300641
642 /* The following is like Py_CLEAR for self->text and self->tail, but
643 * written explicitily because the real pointers hide behind access
644 * macros.
645 */
646 if (self->text) {
647 PyObject *tmp = JOIN_OBJ(self->text);
648 self->text = NULL;
649 Py_DECREF(tmp);
650 }
651
652 if (self->tail) {
653 PyObject *tmp = JOIN_OBJ(self->tail);
654 self->tail = NULL;
655 Py_DECREF(tmp);
656 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657
658 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300659 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300660 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 return 0;
663}
664
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665static void
666element_dealloc(ElementObject* self)
667{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300668 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300669
670 if (self->weakreflist != NULL)
671 PyObject_ClearWeakRefs((PyObject *) self);
672
Eli Bendersky0192ba32012-03-30 16:38:33 +0300673 /* element_gc_clear clears all references and deallocates extra
674 */
675 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676
677 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200678 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679}
680
681/* -------------------------------------------------------------------- */
682/* methods (in alphabetical order) */
683
684static PyObject*
685element_append(ElementObject* self, PyObject* args)
686{
687 PyObject* element;
688 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
689 return NULL;
690
691 if (element_add_subelement(self, element) < 0)
692 return NULL;
693
694 Py_RETURN_NONE;
695}
696
697static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300698element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699{
700 if (!PyArg_ParseTuple(args, ":clear"))
701 return NULL;
702
Eli Benderskyebf37a22012-04-03 22:02:37 +0300703 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 Py_INCREF(Py_None);
706 Py_DECREF(JOIN_OBJ(self->text));
707 self->text = Py_None;
708
709 Py_INCREF(Py_None);
710 Py_DECREF(JOIN_OBJ(self->tail));
711 self->tail = Py_None;
712
713 Py_RETURN_NONE;
714}
715
716static PyObject*
717element_copy(ElementObject* self, PyObject* args)
718{
719 int i;
720 ElementObject* element;
721
722 if (!PyArg_ParseTuple(args, ":__copy__"))
723 return NULL;
724
Eli Bendersky092af1f2012-03-04 07:14:03 +0200725 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000726 self->tag, (self->extra) ? self->extra->attrib : Py_None
727 );
728 if (!element)
729 return NULL;
730
731 Py_DECREF(JOIN_OBJ(element->text));
732 element->text = self->text;
733 Py_INCREF(JOIN_OBJ(element->text));
734
735 Py_DECREF(JOIN_OBJ(element->tail));
736 element->tail = self->tail;
737 Py_INCREF(JOIN_OBJ(element->tail));
738
739 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100740
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000741 if (element_resize(element, self->extra->length) < 0) {
742 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000744 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
746 for (i = 0; i < self->extra->length; i++) {
747 Py_INCREF(self->extra->children[i]);
748 element->extra->children[i] = self->extra->children[i];
749 }
750
751 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 }
754
755 return (PyObject*) element;
756}
757
758static PyObject*
759element_deepcopy(ElementObject* self, PyObject* args)
760{
761 int i;
762 ElementObject* element;
763 PyObject* tag;
764 PyObject* attrib;
765 PyObject* text;
766 PyObject* tail;
767 PyObject* id;
768
769 PyObject* memo;
770 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
771 return NULL;
772
773 tag = deepcopy(self->tag, memo);
774 if (!tag)
775 return NULL;
776
777 if (self->extra) {
778 attrib = deepcopy(self->extra->attrib, memo);
779 if (!attrib) {
780 Py_DECREF(tag);
781 return NULL;
782 }
783 } else {
784 Py_INCREF(Py_None);
785 attrib = Py_None;
786 }
787
Eli Bendersky092af1f2012-03-04 07:14:03 +0200788 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 Py_DECREF(tag);
791 Py_DECREF(attrib);
792
793 if (!element)
794 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796 text = deepcopy(JOIN_OBJ(self->text), memo);
797 if (!text)
798 goto error;
799 Py_DECREF(element->text);
800 element->text = JOIN_SET(text, JOIN_GET(self->text));
801
802 tail = deepcopy(JOIN_OBJ(self->tail), memo);
803 if (!tail)
804 goto error;
805 Py_DECREF(element->tail);
806 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
807
808 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100809
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000810 if (element_resize(element, self->extra->length) < 0)
811 goto error;
812
813 for (i = 0; i < self->extra->length; i++) {
814 PyObject* child = deepcopy(self->extra->children[i], memo);
815 if (!child) {
816 element->extra->length = i;
817 goto error;
818 }
819 element->extra->children[i] = child;
820 }
821
822 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100823
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000824 }
825
826 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000827 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000828 if (!id)
829 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830
831 i = PyDict_SetItem(memo, id, (PyObject*) element);
832
833 Py_DECREF(id);
834
835 if (i < 0)
836 goto error;
837
838 return (PyObject*) element;
839
840 error:
841 Py_DECREF(element);
842 return NULL;
843}
844
Martin v. Löwisbce16662012-06-17 10:41:22 +0200845static PyObject*
846element_sizeof(PyObject* _self, PyObject* args)
847{
848 ElementObject *self = (ElementObject*)_self;
849 Py_ssize_t result = sizeof(ElementObject);
850 if (self->extra) {
851 result += sizeof(ElementObjectExtra);
852 if (self->extra->children != self->extra->_children)
853 result += sizeof(PyObject*) * self->extra->allocated;
854 }
855 return PyLong_FromSsize_t(result);
856}
857
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000858LOCAL(int)
859checkpath(PyObject* tag)
860{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000861 Py_ssize_t i;
862 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000863
864 /* check if a tag contains an xpath character */
865
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000866#define PATHCHAR(ch) \
867 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000868
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000869 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200870 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
871 void *data = PyUnicode_DATA(tag);
872 unsigned int kind = PyUnicode_KIND(tag);
873 for (i = 0; i < len; i++) {
874 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
875 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000876 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200877 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000878 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200879 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000880 return 1;
881 }
882 return 0;
883 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000884 if (PyBytes_Check(tag)) {
885 char *p = PyBytes_AS_STRING(tag);
886 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000887 if (p[i] == '{')
888 check = 0;
889 else if (p[i] == '}')
890 check = 1;
891 else if (check && PATHCHAR(p[i]))
892 return 1;
893 }
894 return 0;
895 }
896
897 return 1; /* unknown type; might be path expression */
898}
899
900static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000901element_extend(ElementObject* self, PyObject* args)
902{
903 PyObject* seq;
904 Py_ssize_t i, seqlen = 0;
905
906 PyObject* seq_in;
907 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
908 return NULL;
909
910 seq = PySequence_Fast(seq_in, "");
911 if (!seq) {
912 PyErr_Format(
913 PyExc_TypeError,
914 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
915 );
916 return NULL;
917 }
918
919 seqlen = PySequence_Size(seq);
920 for (i = 0; i < seqlen; i++) {
921 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200922 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
923 Py_DECREF(seq);
924 PyErr_Format(
925 PyExc_TypeError,
926 "expected an Element, not \"%.200s\"",
927 Py_TYPE(element)->tp_name);
928 return NULL;
929 }
930
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000931 if (element_add_subelement(self, element) < 0) {
932 Py_DECREF(seq);
933 return NULL;
934 }
935 }
936
937 Py_DECREF(seq);
938
939 Py_RETURN_NONE;
940}
941
942static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300943element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000944{
945 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000946 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000947 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300948 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200949
Eli Bendersky737b1732012-05-29 06:02:56 +0300950 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
951 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000952 return NULL;
953
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200954 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200955 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200956 return _PyObject_CallMethodId(
957 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000958 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200959 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000960
961 if (!self->extra)
962 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100963
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000964 for (i = 0; i < self->extra->length; i++) {
965 PyObject* item = self->extra->children[i];
966 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000967 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000968 Py_INCREF(item);
969 return item;
970 }
971 }
972
973 Py_RETURN_NONE;
974}
975
976static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300977element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000978{
979 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 PyObject* tag;
981 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000982 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200983 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300984 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200985
Eli Bendersky737b1732012-05-29 06:02:56 +0300986 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
987 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000988 return NULL;
989
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000990 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200991 return _PyObject_CallMethodId(
992 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000993 );
994
995 if (!self->extra) {
996 Py_INCREF(default_value);
997 return default_value;
998 }
999
1000 for (i = 0; i < self->extra->length; i++) {
1001 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +00001002 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
1003
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001004 PyObject* text = element_get_text(item);
1005 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +00001006 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001007 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008 return text;
1009 }
1010 }
1011
1012 Py_INCREF(default_value);
1013 return default_value;
1014}
1015
1016static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001017element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018{
1019 int i;
1020 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001021 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001022 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001023 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001024
Eli Bendersky737b1732012-05-29 06:02:56 +03001025 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1026 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001027 return NULL;
1028
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001029 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001030 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001031 return _PyObject_CallMethodId(
1032 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001033 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001034 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001035
1036 out = PyList_New(0);
1037 if (!out)
1038 return NULL;
1039
1040 if (!self->extra)
1041 return out;
1042
1043 for (i = 0; i < self->extra->length; i++) {
1044 PyObject* item = self->extra->children[i];
1045 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001046 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047 if (PyList_Append(out, item) < 0) {
1048 Py_DECREF(out);
1049 return NULL;
1050 }
1051 }
1052 }
1053
1054 return out;
1055}
1056
1057static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001058element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001059{
1060 PyObject* tag;
1061 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001062 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001063 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001064
Eli Bendersky737b1732012-05-29 06:02:56 +03001065 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1066 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001067 return NULL;
1068
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001069 return _PyObject_CallMethodId(
1070 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001071 );
1072}
1073
1074static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075element_get(ElementObject* self, PyObject* args)
1076{
1077 PyObject* value;
1078
1079 PyObject* key;
1080 PyObject* default_value = Py_None;
1081 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1082 return NULL;
1083
1084 if (!self->extra || self->extra->attrib == Py_None)
1085 value = default_value;
1086 else {
1087 value = PyDict_GetItem(self->extra->attrib, key);
1088 if (!value)
1089 value = default_value;
1090 }
1091
1092 Py_INCREF(value);
1093 return value;
1094}
1095
1096static PyObject*
1097element_getchildren(ElementObject* self, PyObject* args)
1098{
1099 int i;
1100 PyObject* list;
1101
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001102 /* FIXME: report as deprecated? */
1103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001104 if (!PyArg_ParseTuple(args, ":getchildren"))
1105 return NULL;
1106
1107 if (!self->extra)
1108 return PyList_New(0);
1109
1110 list = PyList_New(self->extra->length);
1111 if (!list)
1112 return NULL;
1113
1114 for (i = 0; i < self->extra->length; i++) {
1115 PyObject* item = self->extra->children[i];
1116 Py_INCREF(item);
1117 PyList_SET_ITEM(list, i, item);
1118 }
1119
1120 return list;
1121}
1122
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001123
Eli Bendersky64d11e62012-06-15 07:42:50 +03001124static PyObject *
1125create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1126
1127
1128static PyObject *
1129element_iter(ElementObject *self, PyObject *args)
1130{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001131 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001132 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 return NULL;
1134
Eli Bendersky64d11e62012-06-15 07:42:50 +03001135 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001136}
1137
1138
1139static PyObject*
1140element_itertext(ElementObject* self, PyObject* args)
1141{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001142 if (!PyArg_ParseTuple(args, ":itertext"))
1143 return NULL;
1144
Eli Bendersky64d11e62012-06-15 07:42:50 +03001145 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001146}
1147
Eli Bendersky64d11e62012-06-15 07:42:50 +03001148
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001150element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001151{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001152 ElementObject* self = (ElementObject*) self_;
1153
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154 if (!self->extra || index < 0 || index >= self->extra->length) {
1155 PyErr_SetString(
1156 PyExc_IndexError,
1157 "child index out of range"
1158 );
1159 return NULL;
1160 }
1161
1162 Py_INCREF(self->extra->children[index]);
1163 return self->extra->children[index];
1164}
1165
1166static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167element_insert(ElementObject* self, PyObject* args)
1168{
1169 int i;
1170
1171 int index;
1172 PyObject* element;
1173 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1174 &Element_Type, &element))
1175 return NULL;
1176
1177 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001178 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001180 if (index < 0) {
1181 index += self->extra->length;
1182 if (index < 0)
1183 index = 0;
1184 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001185 if (index > self->extra->length)
1186 index = self->extra->length;
1187
1188 if (element_resize(self, 1) < 0)
1189 return NULL;
1190
1191 for (i = self->extra->length; i > index; i--)
1192 self->extra->children[i] = self->extra->children[i-1];
1193
1194 Py_INCREF(element);
1195 self->extra->children[index] = element;
1196
1197 self->extra->length++;
1198
1199 Py_RETURN_NONE;
1200}
1201
1202static PyObject*
1203element_items(ElementObject* self, PyObject* args)
1204{
1205 if (!PyArg_ParseTuple(args, ":items"))
1206 return NULL;
1207
1208 if (!self->extra || self->extra->attrib == Py_None)
1209 return PyList_New(0);
1210
1211 return PyDict_Items(self->extra->attrib);
1212}
1213
1214static PyObject*
1215element_keys(ElementObject* self, PyObject* args)
1216{
1217 if (!PyArg_ParseTuple(args, ":keys"))
1218 return NULL;
1219
1220 if (!self->extra || self->extra->attrib == Py_None)
1221 return PyList_New(0);
1222
1223 return PyDict_Keys(self->extra->attrib);
1224}
1225
Martin v. Löwis18e16552006-02-15 17:27:45 +00001226static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001227element_length(ElementObject* self)
1228{
1229 if (!self->extra)
1230 return 0;
1231
1232 return self->extra->length;
1233}
1234
1235static PyObject*
1236element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1237{
1238 PyObject* elem;
1239
1240 PyObject* tag;
1241 PyObject* attrib;
1242 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1243 return NULL;
1244
1245 attrib = PyDict_Copy(attrib);
1246 if (!attrib)
1247 return NULL;
1248
Eli Bendersky092af1f2012-03-04 07:14:03 +02001249 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250
1251 Py_DECREF(attrib);
1252
1253 return elem;
1254}
1255
1256static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001257element_remove(ElementObject* self, PyObject* args)
1258{
1259 int i;
1260
1261 PyObject* element;
1262 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1263 return NULL;
1264
1265 if (!self->extra) {
1266 /* element has no children, so raise exception */
1267 PyErr_SetString(
1268 PyExc_ValueError,
1269 "list.remove(x): x not in list"
1270 );
1271 return NULL;
1272 }
1273
1274 for (i = 0; i < self->extra->length; i++) {
1275 if (self->extra->children[i] == element)
1276 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001277 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001278 break;
1279 }
1280
1281 if (i == self->extra->length) {
1282 /* element is not in children, so raise exception */
1283 PyErr_SetString(
1284 PyExc_ValueError,
1285 "list.remove(x): x not in list"
1286 );
1287 return NULL;
1288 }
1289
1290 Py_DECREF(self->extra->children[i]);
1291
1292 self->extra->length--;
1293
1294 for (; i < self->extra->length; i++)
1295 self->extra->children[i] = self->extra->children[i+1];
1296
1297 Py_RETURN_NONE;
1298}
1299
1300static PyObject*
1301element_repr(ElementObject* self)
1302{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001303 if (self->tag)
1304 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1305 else
1306 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001307}
1308
1309static PyObject*
1310element_set(ElementObject* self, PyObject* args)
1311{
1312 PyObject* attrib;
1313
1314 PyObject* key;
1315 PyObject* value;
1316 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1317 return NULL;
1318
1319 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001320 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001321
1322 attrib = element_get_attrib(self);
1323 if (!attrib)
1324 return NULL;
1325
1326 if (PyDict_SetItem(attrib, key, value) < 0)
1327 return NULL;
1328
1329 Py_RETURN_NONE;
1330}
1331
1332static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001333element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001334{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001335 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001336 int i;
1337 PyObject* old;
1338
1339 if (!self->extra || index < 0 || index >= self->extra->length) {
1340 PyErr_SetString(
1341 PyExc_IndexError,
1342 "child assignment index out of range");
1343 return -1;
1344 }
1345
1346 old = self->extra->children[index];
1347
1348 if (item) {
1349 Py_INCREF(item);
1350 self->extra->children[index] = item;
1351 } else {
1352 self->extra->length--;
1353 for (i = index; i < self->extra->length; i++)
1354 self->extra->children[i] = self->extra->children[i+1];
1355 }
1356
1357 Py_DECREF(old);
1358
1359 return 0;
1360}
1361
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001362static PyObject*
1363element_subscr(PyObject* self_, PyObject* item)
1364{
1365 ElementObject* self = (ElementObject*) self_;
1366
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001367 if (PyIndex_Check(item)) {
1368 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001369
1370 if (i == -1 && PyErr_Occurred()) {
1371 return NULL;
1372 }
1373 if (i < 0 && self->extra)
1374 i += self->extra->length;
1375 return element_getitem(self_, i);
1376 }
1377 else if (PySlice_Check(item)) {
1378 Py_ssize_t start, stop, step, slicelen, cur, i;
1379 PyObject* list;
1380
1381 if (!self->extra)
1382 return PyList_New(0);
1383
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001384 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001385 self->extra->length,
1386 &start, &stop, &step, &slicelen) < 0) {
1387 return NULL;
1388 }
1389
1390 if (slicelen <= 0)
1391 return PyList_New(0);
1392 else {
1393 list = PyList_New(slicelen);
1394 if (!list)
1395 return NULL;
1396
1397 for (cur = start, i = 0; i < slicelen;
1398 cur += step, i++) {
1399 PyObject* item = self->extra->children[cur];
1400 Py_INCREF(item);
1401 PyList_SET_ITEM(list, i, item);
1402 }
1403
1404 return list;
1405 }
1406 }
1407 else {
1408 PyErr_SetString(PyExc_TypeError,
1409 "element indices must be integers");
1410 return NULL;
1411 }
1412}
1413
1414static int
1415element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1416{
1417 ElementObject* self = (ElementObject*) self_;
1418
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001419 if (PyIndex_Check(item)) {
1420 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001421
1422 if (i == -1 && PyErr_Occurred()) {
1423 return -1;
1424 }
1425 if (i < 0 && self->extra)
1426 i += self->extra->length;
1427 return element_setitem(self_, i, value);
1428 }
1429 else if (PySlice_Check(item)) {
1430 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1431
1432 PyObject* recycle = NULL;
1433 PyObject* seq = NULL;
1434
1435 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001436 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001437
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001438 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001439 self->extra->length,
1440 &start, &stop, &step, &slicelen) < 0) {
1441 return -1;
1442 }
1443
Eli Bendersky865756a2012-03-09 13:38:15 +02001444 if (value == NULL) {
1445 /* Delete slice */
1446 size_t cur;
1447 Py_ssize_t i;
1448
1449 if (slicelen <= 0)
1450 return 0;
1451
1452 /* Since we're deleting, the direction of the range doesn't matter,
1453 * so for simplicity make it always ascending.
1454 */
1455 if (step < 0) {
1456 stop = start + 1;
1457 start = stop + step * (slicelen - 1) - 1;
1458 step = -step;
1459 }
1460
1461 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1462
1463 /* recycle is a list that will contain all the children
1464 * scheduled for removal.
1465 */
1466 if (!(recycle = PyList_New(slicelen))) {
1467 PyErr_NoMemory();
1468 return -1;
1469 }
1470
1471 /* This loop walks over all the children that have to be deleted,
1472 * with cur pointing at them. num_moved is the amount of children
1473 * until the next deleted child that have to be "shifted down" to
1474 * occupy the deleted's places.
1475 * Note that in the ith iteration, shifting is done i+i places down
1476 * because i children were already removed.
1477 */
1478 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1479 /* Compute how many children have to be moved, clipping at the
1480 * list end.
1481 */
1482 Py_ssize_t num_moved = step - 1;
1483 if (cur + step >= (size_t)self->extra->length) {
1484 num_moved = self->extra->length - cur - 1;
1485 }
1486
1487 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1488
1489 memmove(
1490 self->extra->children + cur - i,
1491 self->extra->children + cur + 1,
1492 num_moved * sizeof(PyObject *));
1493 }
1494
1495 /* Leftover "tail" after the last removed child */
1496 cur = start + (size_t)slicelen * step;
1497 if (cur < (size_t)self->extra->length) {
1498 memmove(
1499 self->extra->children + cur - slicelen,
1500 self->extra->children + cur,
1501 (self->extra->length - cur) * sizeof(PyObject *));
1502 }
1503
1504 self->extra->length -= slicelen;
1505
1506 /* Discard the recycle list with all the deleted sub-elements */
1507 Py_XDECREF(recycle);
1508 return 0;
1509 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001510 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001511 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001512 seq = PySequence_Fast(value, "");
1513 if (!seq) {
1514 PyErr_Format(
1515 PyExc_TypeError,
1516 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1517 );
1518 return -1;
1519 }
1520 newlen = PySequence_Size(seq);
1521 }
1522
1523 if (step != 1 && newlen != slicelen)
1524 {
1525 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001526 "attempt to assign sequence of size %zd "
1527 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001528 newlen, slicelen
1529 );
1530 return -1;
1531 }
1532
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001533 /* Resize before creating the recycle bin, to prevent refleaks. */
1534 if (newlen > slicelen) {
1535 if (element_resize(self, newlen - slicelen) < 0) {
1536 if (seq) {
1537 Py_DECREF(seq);
1538 }
1539 return -1;
1540 }
1541 }
1542
1543 if (slicelen > 0) {
1544 /* to avoid recursive calls to this method (via decref), move
1545 old items to the recycle bin here, and get rid of them when
1546 we're done modifying the element */
1547 recycle = PyList_New(slicelen);
1548 if (!recycle) {
1549 if (seq) {
1550 Py_DECREF(seq);
1551 }
1552 return -1;
1553 }
1554 for (cur = start, i = 0; i < slicelen;
1555 cur += step, i++)
1556 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1557 }
1558
1559 if (newlen < slicelen) {
1560 /* delete slice */
1561 for (i = stop; i < self->extra->length; i++)
1562 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1563 } else if (newlen > slicelen) {
1564 /* insert slice */
1565 for (i = self->extra->length-1; i >= stop; i--)
1566 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1567 }
1568
1569 /* replace the slice */
1570 for (cur = start, i = 0; i < newlen;
1571 cur += step, i++) {
1572 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1573 Py_INCREF(element);
1574 self->extra->children[cur] = element;
1575 }
1576
1577 self->extra->length += newlen - slicelen;
1578
1579 if (seq) {
1580 Py_DECREF(seq);
1581 }
1582
1583 /* discard the recycle bin, and everything in it */
1584 Py_XDECREF(recycle);
1585
1586 return 0;
1587 }
1588 else {
1589 PyErr_SetString(PyExc_TypeError,
1590 "element indices must be integers");
1591 return -1;
1592 }
1593}
1594
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595static PyMethodDef element_methods[] = {
1596
Eli Bendersky0192ba32012-03-30 16:38:33 +03001597 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598
1599 {"get", (PyCFunction) element_get, METH_VARARGS},
1600 {"set", (PyCFunction) element_set, METH_VARARGS},
1601
Eli Bendersky737b1732012-05-29 06:02:56 +03001602 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1603 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1604 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605
1606 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001607 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001608 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1609 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1610
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001611 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1612 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001613 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001614
1615 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1617
1618 {"items", (PyCFunction) element_items, METH_VARARGS},
1619 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1620
1621 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1622
1623 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1624 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001625 {"__sizeof__", element_sizeof, METH_NOARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627 {NULL, NULL}
1628};
1629
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001630static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001631element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632{
1633 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001634 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001636 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001637 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001638
Alexander Belopolskye239d232010-12-08 23:31:48 +00001639 if (name == NULL)
1640 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001641
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001642 /* handle common attributes first */
1643 if (strcmp(name, "tag") == 0) {
1644 res = self->tag;
1645 Py_INCREF(res);
1646 return res;
1647 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001648 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001649 Py_INCREF(res);
1650 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001651 }
1652
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001653 /* methods */
1654 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1655 if (res)
1656 return res;
1657
1658 /* less common attributes */
1659 if (strcmp(name, "tail") == 0) {
1660 PyErr_Clear();
1661 res = element_get_tail(self);
1662 } else if (strcmp(name, "attrib") == 0) {
1663 PyErr_Clear();
1664 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001665 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001666 res = element_get_attrib(self);
1667 }
1668
1669 if (!res)
1670 return NULL;
1671
1672 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673 return res;
1674}
1675
Eli Benderskyb20df952012-05-20 06:33:29 +03001676static PyObject*
1677element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678{
Eli Benderskyb20df952012-05-20 06:33:29 +03001679 char *name = "";
1680 if (PyUnicode_Check(nameobj))
1681 name = _PyUnicode_AsString(nameobj);
1682
1683 if (name == NULL)
1684 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001685
1686 if (strcmp(name, "tag") == 0) {
1687 Py_DECREF(self->tag);
1688 self->tag = value;
1689 Py_INCREF(self->tag);
1690 } else if (strcmp(name, "text") == 0) {
1691 Py_DECREF(JOIN_OBJ(self->text));
1692 self->text = value;
1693 Py_INCREF(self->text);
1694 } else if (strcmp(name, "tail") == 0) {
1695 Py_DECREF(JOIN_OBJ(self->tail));
1696 self->tail = value;
1697 Py_INCREF(self->tail);
1698 } else if (strcmp(name, "attrib") == 0) {
1699 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001700 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001701 Py_DECREF(self->extra->attrib);
1702 self->extra->attrib = value;
1703 Py_INCREF(self->extra->attrib);
1704 } else {
1705 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001706 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001707 }
1708
Eli Benderskyb20df952012-05-20 06:33:29 +03001709 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001710}
1711
1712static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001713 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001714 0, /* sq_concat */
1715 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001716 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001717 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001718 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001719 0,
1720};
1721
1722static PyMappingMethods element_as_mapping = {
1723 (lenfunc) element_length,
1724 (binaryfunc) element_subscr,
1725 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001726};
1727
Neal Norwitz227b5332006-03-22 09:28:35 +00001728static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001729 PyVarObject_HEAD_INIT(NULL, 0)
1730 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001732 (destructor)element_dealloc, /* tp_dealloc */
1733 0, /* tp_print */
1734 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001735 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001736 0, /* tp_reserved */
1737 (reprfunc)element_repr, /* tp_repr */
1738 0, /* tp_as_number */
1739 &element_as_sequence, /* tp_as_sequence */
1740 &element_as_mapping, /* tp_as_mapping */
1741 0, /* tp_hash */
1742 0, /* tp_call */
1743 0, /* tp_str */
1744 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001745 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001746 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001747 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1748 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001749 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001750 (traverseproc)element_gc_traverse, /* tp_traverse */
1751 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001752 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001753 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001754 0, /* tp_iter */
1755 0, /* tp_iternext */
1756 element_methods, /* tp_methods */
1757 0, /* tp_members */
1758 0, /* tp_getset */
1759 0, /* tp_base */
1760 0, /* tp_dict */
1761 0, /* tp_descr_get */
1762 0, /* tp_descr_set */
1763 0, /* tp_dictoffset */
1764 (initproc)element_init, /* tp_init */
1765 PyType_GenericAlloc, /* tp_alloc */
1766 element_new, /* tp_new */
1767 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001768};
1769
Eli Bendersky64d11e62012-06-15 07:42:50 +03001770/******************************* Element iterator ****************************/
1771
1772/* ElementIterObject represents the iteration state over an XML element in
1773 * pre-order traversal. To keep track of which sub-element should be returned
1774 * next, a stack of parents is maintained. This is a standard stack-based
1775 * iterative pre-order traversal of a tree.
1776 * The stack is managed using a single-linked list starting at parent_stack.
1777 * Each stack node contains the saved parent to which we should return after
1778 * the current one is exhausted, and the next child to examine in that parent.
1779 */
1780typedef struct ParentLocator_t {
1781 ElementObject *parent;
1782 Py_ssize_t child_index;
1783 struct ParentLocator_t *next;
1784} ParentLocator;
1785
1786typedef struct {
1787 PyObject_HEAD
1788 ParentLocator *parent_stack;
1789 ElementObject *root_element;
1790 PyObject *sought_tag;
1791 int root_done;
1792 int gettext;
1793} ElementIterObject;
1794
1795
1796static void
1797elementiter_dealloc(ElementIterObject *it)
1798{
1799 ParentLocator *p = it->parent_stack;
1800 while (p) {
1801 ParentLocator *temp = p;
1802 Py_XDECREF(p->parent);
1803 p = p->next;
1804 PyObject_Free(temp);
1805 }
1806
1807 Py_XDECREF(it->sought_tag);
1808 Py_XDECREF(it->root_element);
1809
1810 PyObject_GC_UnTrack(it);
1811 PyObject_GC_Del(it);
1812}
1813
1814static int
1815elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1816{
1817 ParentLocator *p = it->parent_stack;
1818 while (p) {
1819 Py_VISIT(p->parent);
1820 p = p->next;
1821 }
1822
1823 Py_VISIT(it->root_element);
1824 Py_VISIT(it->sought_tag);
1825 return 0;
1826}
1827
1828/* Helper function for elementiter_next. Add a new parent to the parent stack.
1829 */
1830static ParentLocator *
1831parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1832{
1833 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1834 if (new_node) {
1835 new_node->parent = parent;
1836 Py_INCREF(parent);
1837 new_node->child_index = 0;
1838 new_node->next = stack;
1839 }
1840 return new_node;
1841}
1842
1843static PyObject *
1844elementiter_next(ElementIterObject *it)
1845{
1846 /* Sub-element iterator.
1847 *
1848 * A short note on gettext: this function serves both the iter() and
1849 * itertext() methods to avoid code duplication. However, there are a few
1850 * small differences in the way these iterations work. Namely:
1851 * - itertext() only yields text from nodes that have it, and continues
1852 * iterating when a node doesn't have text (so it doesn't return any
1853 * node like iter())
1854 * - itertext() also has to handle tail, after finishing with all the
1855 * children of a node.
1856 */
Eli Bendersky113da642012-06-15 07:52:49 +03001857 ElementObject *cur_parent;
1858 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001859
1860 while (1) {
1861 /* Handle the case reached in the beginning and end of iteration, where
1862 * the parent stack is empty. The root_done flag gives us indication
1863 * whether we've just started iterating (so root_done is 0), in which
1864 * case the root is returned. If root_done is 1 and we're here, the
1865 * iterator is exhausted.
1866 */
1867 if (!it->parent_stack->parent) {
1868 if (it->root_done) {
1869 PyErr_SetNone(PyExc_StopIteration);
1870 return NULL;
1871 } else {
1872 it->parent_stack = parent_stack_push_new(it->parent_stack,
1873 it->root_element);
1874 if (!it->parent_stack) {
1875 PyErr_NoMemory();
1876 return NULL;
1877 }
1878
1879 it->root_done = 1;
1880 if (it->sought_tag == Py_None ||
1881 PyObject_RichCompareBool(it->root_element->tag,
1882 it->sought_tag, Py_EQ) == 1) {
1883 if (it->gettext) {
1884 PyObject *text = JOIN_OBJ(it->root_element->text);
1885 if (PyObject_IsTrue(text)) {
1886 Py_INCREF(text);
1887 return text;
1888 }
1889 } else {
1890 Py_INCREF(it->root_element);
1891 return (PyObject *)it->root_element;
1892 }
1893 }
1894 }
1895 }
1896
1897 /* See if there are children left to traverse in the current parent. If
1898 * yes, visit the next child. If not, pop the stack and try again.
1899 */
Eli Bendersky113da642012-06-15 07:52:49 +03001900 cur_parent = it->parent_stack->parent;
1901 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001902 if (cur_parent->extra && child_index < cur_parent->extra->length) {
1903 ElementObject *child = (ElementObject *)
1904 cur_parent->extra->children[child_index];
1905 it->parent_stack->child_index++;
1906 it->parent_stack = parent_stack_push_new(it->parent_stack,
1907 child);
1908 if (!it->parent_stack) {
1909 PyErr_NoMemory();
1910 return NULL;
1911 }
1912
1913 if (it->gettext) {
1914 PyObject *text = JOIN_OBJ(child->text);
1915 if (PyObject_IsTrue(text)) {
1916 Py_INCREF(text);
1917 return text;
1918 }
1919 } else if (it->sought_tag == Py_None ||
1920 PyObject_RichCompareBool(child->tag,
1921 it->sought_tag, Py_EQ) == 1) {
1922 Py_INCREF(child);
1923 return (PyObject *)child;
1924 }
1925 else
1926 continue;
1927 }
1928 else {
1929 PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
1930 ParentLocator *next = it->parent_stack->next;
1931 Py_XDECREF(it->parent_stack->parent);
1932 PyObject_Free(it->parent_stack);
1933 it->parent_stack = next;
1934
1935 /* Note that extra condition on it->parent_stack->parent here;
1936 * this is because itertext() is supposed to only return *inner*
1937 * text, not text following the element it began iteration with.
1938 */
1939 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
1940 Py_INCREF(tail);
1941 return tail;
1942 }
1943 }
1944 }
1945
1946 return NULL;
1947}
1948
1949
1950static PyTypeObject ElementIter_Type = {
1951 PyVarObject_HEAD_INIT(NULL, 0)
1952 "_elementtree._element_iterator", /* tp_name */
1953 sizeof(ElementIterObject), /* tp_basicsize */
1954 0, /* tp_itemsize */
1955 /* methods */
1956 (destructor)elementiter_dealloc, /* tp_dealloc */
1957 0, /* tp_print */
1958 0, /* tp_getattr */
1959 0, /* tp_setattr */
1960 0, /* tp_reserved */
1961 0, /* tp_repr */
1962 0, /* tp_as_number */
1963 0, /* tp_as_sequence */
1964 0, /* tp_as_mapping */
1965 0, /* tp_hash */
1966 0, /* tp_call */
1967 0, /* tp_str */
1968 0, /* tp_getattro */
1969 0, /* tp_setattro */
1970 0, /* tp_as_buffer */
1971 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1972 0, /* tp_doc */
1973 (traverseproc)elementiter_traverse, /* tp_traverse */
1974 0, /* tp_clear */
1975 0, /* tp_richcompare */
1976 0, /* tp_weaklistoffset */
1977 PyObject_SelfIter, /* tp_iter */
1978 (iternextfunc)elementiter_next, /* tp_iternext */
1979 0, /* tp_methods */
1980 0, /* tp_members */
1981 0, /* tp_getset */
1982 0, /* tp_base */
1983 0, /* tp_dict */
1984 0, /* tp_descr_get */
1985 0, /* tp_descr_set */
1986 0, /* tp_dictoffset */
1987 0, /* tp_init */
1988 0, /* tp_alloc */
1989 0, /* tp_new */
1990};
1991
1992
1993static PyObject *
1994create_elementiter(ElementObject *self, PyObject *tag, int gettext)
1995{
1996 ElementIterObject *it;
1997 PyObject *star = NULL;
1998
1999 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2000 if (!it)
2001 return NULL;
2002 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
2003 PyObject_GC_Del(it);
2004 return NULL;
2005 }
2006
2007 it->parent_stack->parent = NULL;
2008 it->parent_stack->child_index = 0;
2009 it->parent_stack->next = NULL;
2010
2011 if (PyUnicode_Check(tag))
2012 star = PyUnicode_FromString("*");
2013 else if (PyBytes_Check(tag))
2014 star = PyBytes_FromString("*");
2015
2016 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2017 tag = Py_None;
2018
2019 Py_XDECREF(star);
2020 it->sought_tag = tag;
2021 it->root_done = 0;
2022 it->gettext = gettext;
2023 it->root_element = self;
2024
2025 Py_INCREF(self);
2026 Py_INCREF(tag);
2027
2028 PyObject_GC_Track(it);
2029 return (PyObject *)it;
2030}
2031
2032
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002033/* ==================================================================== */
2034/* the tree builder type */
2035
2036typedef struct {
2037 PyObject_HEAD
2038
Eli Bendersky58d548d2012-05-29 15:45:16 +03002039 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002040
Eli Bendersky58d548d2012-05-29 15:45:16 +03002041 ElementObject *this; /* current node */
2042 ElementObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002043
Eli Bendersky58d548d2012-05-29 15:45:16 +03002044 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002045
Eli Bendersky58d548d2012-05-29 15:45:16 +03002046 PyObject *stack; /* element stack */
2047 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002048
Eli Bendersky48d358b2012-05-30 17:57:50 +03002049 PyObject *element_factory;
2050
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002051 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002052 PyObject *events; /* list of events, or NULL if not collecting */
2053 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2054 PyObject *end_event_obj;
2055 PyObject *start_ns_event_obj;
2056 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002057} TreeBuilderObject;
2058
Neal Norwitz227b5332006-03-22 09:28:35 +00002059static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002060
Christian Heimes90aa7642007-12-19 02:45:37 +00002061#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002062
2063/* -------------------------------------------------------------------- */
2064/* constructor and destructor */
2065
Eli Bendersky58d548d2012-05-29 15:45:16 +03002066static PyObject *
2067treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002068{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002069 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2070 if (t != NULL) {
2071 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002072
Eli Bendersky58d548d2012-05-29 15:45:16 +03002073 Py_INCREF(Py_None);
2074 t->this = (ElementObject *)Py_None;
2075 Py_INCREF(Py_None);
2076 t->last = (ElementObject *)Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002077
Eli Bendersky58d548d2012-05-29 15:45:16 +03002078 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002079 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002080 t->stack = PyList_New(20);
2081 if (!t->stack) {
2082 Py_DECREF(t->this);
2083 Py_DECREF(t->last);
2084 return NULL;
2085 }
2086 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002087
Eli Bendersky58d548d2012-05-29 15:45:16 +03002088 t->events = NULL;
2089 t->start_event_obj = t->end_event_obj = NULL;
2090 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2091 }
2092 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002093}
2094
Eli Bendersky58d548d2012-05-29 15:45:16 +03002095static int
2096treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002097{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002098 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002099 PyObject *element_factory = NULL;
2100 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
2101
2102 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2103 &element_factory)) {
2104 return -1;
2105 }
2106
2107 if (element_factory) {
2108 Py_INCREF(element_factory);
2109 Py_XDECREF(self_tb->element_factory);
2110 self_tb->element_factory = element_factory;
2111 }
2112
Eli Bendersky58d548d2012-05-29 15:45:16 +03002113 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002114}
2115
Eli Bendersky48d358b2012-05-30 17:57:50 +03002116static int
2117treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2118{
2119 Py_VISIT(self->root);
2120 Py_VISIT(self->this);
2121 Py_VISIT(self->last);
2122 Py_VISIT(self->data);
2123 Py_VISIT(self->stack);
2124 Py_VISIT(self->element_factory);
2125 return 0;
2126}
2127
2128static int
2129treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002130{
2131 Py_XDECREF(self->end_ns_event_obj);
2132 Py_XDECREF(self->start_ns_event_obj);
2133 Py_XDECREF(self->end_event_obj);
2134 Py_XDECREF(self->start_event_obj);
2135 Py_XDECREF(self->events);
2136 Py_DECREF(self->stack);
2137 Py_XDECREF(self->data);
2138 Py_DECREF(self->last);
2139 Py_DECREF(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002140 Py_CLEAR(self->element_factory);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002141 Py_XDECREF(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002142 return 0;
2143}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002144
Eli Bendersky48d358b2012-05-30 17:57:50 +03002145static void
2146treebuilder_dealloc(TreeBuilderObject *self)
2147{
2148 PyObject_GC_UnTrack(self);
2149 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002150 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002151}
2152
2153/* -------------------------------------------------------------------- */
2154/* handlers */
2155
2156LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002157treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2158 PyObject* attrib)
2159{
2160 PyObject* node;
2161 PyObject* this;
2162
2163 if (self->data) {
2164 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002165 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002166 self->last->text = JOIN_SET(
2167 self->data, PyList_CheckExact(self->data)
2168 );
2169 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002170 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002171 self->last->tail = JOIN_SET(
2172 self->data, PyList_CheckExact(self->data)
2173 );
2174 }
2175 self->data = NULL;
2176 }
2177
Eli Bendersky48d358b2012-05-30 17:57:50 +03002178 if (self->element_factory) {
2179 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2180 } else {
2181 node = create_new_element(tag, attrib);
2182 }
2183 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002184 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002185 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002186
2187 this = (PyObject*) self->this;
2188
2189 if (this != Py_None) {
2190 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002191 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002192 } else {
2193 if (self->root) {
2194 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002195 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002196 "multiple elements on top level"
2197 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002198 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002199 }
2200 Py_INCREF(node);
2201 self->root = node;
2202 }
2203
2204 if (self->index < PyList_GET_SIZE(self->stack)) {
2205 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002206 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002207 Py_INCREF(this);
2208 } else {
2209 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002210 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002211 }
2212 self->index++;
2213
2214 Py_DECREF(this);
2215 Py_INCREF(node);
2216 self->this = (ElementObject*) node;
2217
2218 Py_DECREF(self->last);
2219 Py_INCREF(node);
2220 self->last = (ElementObject*) node;
2221
2222 if (self->start_event_obj) {
2223 PyObject* res;
2224 PyObject* action = self->start_event_obj;
2225 res = PyTuple_New(2);
2226 if (res) {
2227 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2228 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2229 PyList_Append(self->events, res);
2230 Py_DECREF(res);
2231 } else
2232 PyErr_Clear(); /* FIXME: propagate error */
2233 }
2234
2235 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002236
2237 error:
2238 Py_DECREF(node);
2239 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002240}
2241
2242LOCAL(PyObject*)
2243treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2244{
2245 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002246 if (self->last == (ElementObject*) Py_None) {
2247 /* ignore calls to data before the first call to start */
2248 Py_RETURN_NONE;
2249 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002250 /* store the first item as is */
2251 Py_INCREF(data); self->data = data;
2252 } else {
2253 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002254 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2255 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002256 /* expat often generates single character data sections; handle
2257 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002258 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2259 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002260 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002261 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002262 } else if (PyList_CheckExact(self->data)) {
2263 if (PyList_Append(self->data, data) < 0)
2264 return NULL;
2265 } else {
2266 PyObject* list = PyList_New(2);
2267 if (!list)
2268 return NULL;
2269 PyList_SET_ITEM(list, 0, self->data);
2270 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2271 self->data = list;
2272 }
2273 }
2274
2275 Py_RETURN_NONE;
2276}
2277
2278LOCAL(PyObject*)
2279treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2280{
2281 PyObject* item;
2282
2283 if (self->data) {
2284 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002285 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286 self->last->text = JOIN_SET(
2287 self->data, PyList_CheckExact(self->data)
2288 );
2289 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002290 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291 self->last->tail = JOIN_SET(
2292 self->data, PyList_CheckExact(self->data)
2293 );
2294 }
2295 self->data = NULL;
2296 }
2297
2298 if (self->index == 0) {
2299 PyErr_SetString(
2300 PyExc_IndexError,
2301 "pop from empty stack"
2302 );
2303 return NULL;
2304 }
2305
2306 self->index--;
2307
2308 item = PyList_GET_ITEM(self->stack, self->index);
2309 Py_INCREF(item);
2310
2311 Py_DECREF(self->last);
2312
2313 self->last = (ElementObject*) self->this;
2314 self->this = (ElementObject*) item;
2315
2316 if (self->end_event_obj) {
2317 PyObject* res;
2318 PyObject* action = self->end_event_obj;
2319 PyObject* node = (PyObject*) self->last;
2320 res = PyTuple_New(2);
2321 if (res) {
2322 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2323 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2324 PyList_Append(self->events, res);
2325 Py_DECREF(res);
2326 } else
2327 PyErr_Clear(); /* FIXME: propagate error */
2328 }
2329
2330 Py_INCREF(self->last);
2331 return (PyObject*) self->last;
2332}
2333
2334LOCAL(void)
2335treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002336 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337{
2338 PyObject* res;
2339 PyObject* action;
2340 PyObject* parcel;
2341
2342 if (!self->events)
2343 return;
2344
2345 if (start) {
2346 if (!self->start_ns_event_obj)
2347 return;
2348 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002349 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002350 if (!parcel)
2351 return;
2352 Py_INCREF(action);
2353 } else {
2354 if (!self->end_ns_event_obj)
2355 return;
2356 action = self->end_ns_event_obj;
2357 Py_INCREF(action);
2358 parcel = Py_None;
2359 Py_INCREF(parcel);
2360 }
2361
2362 res = PyTuple_New(2);
2363
2364 if (res) {
2365 PyTuple_SET_ITEM(res, 0, action);
2366 PyTuple_SET_ITEM(res, 1, parcel);
2367 PyList_Append(self->events, res);
2368 Py_DECREF(res);
2369 } else
2370 PyErr_Clear(); /* FIXME: propagate error */
2371}
2372
2373/* -------------------------------------------------------------------- */
2374/* methods (in alphabetical order) */
2375
2376static PyObject*
2377treebuilder_data(TreeBuilderObject* self, PyObject* args)
2378{
2379 PyObject* data;
2380 if (!PyArg_ParseTuple(args, "O:data", &data))
2381 return NULL;
2382
2383 return treebuilder_handle_data(self, data);
2384}
2385
2386static PyObject*
2387treebuilder_end(TreeBuilderObject* self, PyObject* args)
2388{
2389 PyObject* tag;
2390 if (!PyArg_ParseTuple(args, "O:end", &tag))
2391 return NULL;
2392
2393 return treebuilder_handle_end(self, tag);
2394}
2395
2396LOCAL(PyObject*)
2397treebuilder_done(TreeBuilderObject* self)
2398{
2399 PyObject* res;
2400
2401 /* FIXME: check stack size? */
2402
2403 if (self->root)
2404 res = self->root;
2405 else
2406 res = Py_None;
2407
2408 Py_INCREF(res);
2409 return res;
2410}
2411
2412static PyObject*
2413treebuilder_close(TreeBuilderObject* self, PyObject* args)
2414{
2415 if (!PyArg_ParseTuple(args, ":close"))
2416 return NULL;
2417
2418 return treebuilder_done(self);
2419}
2420
2421static PyObject*
2422treebuilder_start(TreeBuilderObject* self, PyObject* args)
2423{
2424 PyObject* tag;
2425 PyObject* attrib = Py_None;
2426 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2427 return NULL;
2428
2429 return treebuilder_handle_start(self, tag, attrib);
2430}
2431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432static PyMethodDef treebuilder_methods[] = {
2433 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2434 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2435 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002436 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2437 {NULL, NULL}
2438};
2439
Neal Norwitz227b5332006-03-22 09:28:35 +00002440static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002441 PyVarObject_HEAD_INIT(NULL, 0)
2442 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002443 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002444 (destructor)treebuilder_dealloc, /* tp_dealloc */
2445 0, /* tp_print */
2446 0, /* tp_getattr */
2447 0, /* tp_setattr */
2448 0, /* tp_reserved */
2449 0, /* tp_repr */
2450 0, /* tp_as_number */
2451 0, /* tp_as_sequence */
2452 0, /* tp_as_mapping */
2453 0, /* tp_hash */
2454 0, /* tp_call */
2455 0, /* tp_str */
2456 0, /* tp_getattro */
2457 0, /* tp_setattro */
2458 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002459 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2460 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002461 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002462 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2463 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002464 0, /* tp_richcompare */
2465 0, /* tp_weaklistoffset */
2466 0, /* tp_iter */
2467 0, /* tp_iternext */
2468 treebuilder_methods, /* tp_methods */
2469 0, /* tp_members */
2470 0, /* tp_getset */
2471 0, /* tp_base */
2472 0, /* tp_dict */
2473 0, /* tp_descr_get */
2474 0, /* tp_descr_set */
2475 0, /* tp_dictoffset */
2476 (initproc)treebuilder_init, /* tp_init */
2477 PyType_GenericAlloc, /* tp_alloc */
2478 treebuilder_new, /* tp_new */
2479 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480};
2481
2482/* ==================================================================== */
2483/* the expat interface */
2484
2485#if defined(USE_EXPAT)
2486
2487#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002489static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491
Eli Bendersky52467b12012-06-01 07:13:08 +03002492static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2493 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2494
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495typedef struct {
2496 PyObject_HEAD
2497
2498 XML_Parser parser;
2499
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002500 PyObject *target;
2501 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002503 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002505 PyObject *handle_start;
2506 PyObject *handle_data;
2507 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002509 PyObject *handle_comment;
2510 PyObject *handle_pi;
2511 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002513 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002514
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515} XMLParserObject;
2516
Neal Norwitz227b5332006-03-22 09:28:35 +00002517static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002519#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2520
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521/* helpers */
2522
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523LOCAL(PyObject*)
2524makeuniversal(XMLParserObject* self, const char* string)
2525{
2526 /* convert a UTF-8 tag/attribute name from the expat parser
2527 to a universal name string */
2528
2529 int size = strlen(string);
2530 PyObject* key;
2531 PyObject* value;
2532
2533 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002534 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535 if (!key)
2536 return NULL;
2537
2538 value = PyDict_GetItem(self->names, key);
2539
2540 if (value) {
2541 Py_INCREF(value);
2542 } else {
2543 /* new name. convert to universal name, and decode as
2544 necessary */
2545
2546 PyObject* tag;
2547 char* p;
2548 int i;
2549
2550 /* look for namespace separator */
2551 for (i = 0; i < size; i++)
2552 if (string[i] == '}')
2553 break;
2554 if (i != size) {
2555 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002556 tag = PyBytes_FromStringAndSize(NULL, size+1);
2557 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 p[0] = '{';
2559 memcpy(p+1, string, size);
2560 size++;
2561 } else {
2562 /* plain name; use key as tag */
2563 Py_INCREF(key);
2564 tag = key;
2565 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002566
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002568 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002569 value = PyUnicode_DecodeUTF8(p, size, "strict");
2570 Py_DECREF(tag);
2571 if (!value) {
2572 Py_DECREF(key);
2573 return NULL;
2574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575
2576 /* add to names dictionary */
2577 if (PyDict_SetItem(self->names, key, value) < 0) {
2578 Py_DECREF(key);
2579 Py_DECREF(value);
2580 return NULL;
2581 }
2582 }
2583
2584 Py_DECREF(key);
2585 return value;
2586}
2587
Eli Bendersky5b77d812012-03-16 08:20:05 +02002588/* Set the ParseError exception with the given parameters.
2589 * If message is not NULL, it's used as the error string. Otherwise, the
2590 * message string is the default for the given error_code.
2591*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002592static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002593expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002594{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002595 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002596
Victor Stinner499dfcf2011-03-21 13:26:24 +01002597 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002598 message ? message : EXPAT(ErrorString)(error_code),
2599 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002600 if (errmsg == NULL)
2601 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002602
Victor Stinner499dfcf2011-03-21 13:26:24 +01002603 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2604 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002605 if (!error)
2606 return;
2607
Eli Bendersky5b77d812012-03-16 08:20:05 +02002608 /* Add code and position attributes */
2609 code = PyLong_FromLong((long)error_code);
2610 if (!code) {
2611 Py_DECREF(error);
2612 return;
2613 }
2614 if (PyObject_SetAttrString(error, "code", code) == -1) {
2615 Py_DECREF(error);
2616 Py_DECREF(code);
2617 return;
2618 }
2619 Py_DECREF(code);
2620
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002621 position = Py_BuildValue("(ii)", line, column);
2622 if (!position) {
2623 Py_DECREF(error);
2624 return;
2625 }
2626 if (PyObject_SetAttrString(error, "position", position) == -1) {
2627 Py_DECREF(error);
2628 Py_DECREF(position);
2629 return;
2630 }
2631 Py_DECREF(position);
2632
2633 PyErr_SetObject(elementtree_parseerror_obj, error);
2634 Py_DECREF(error);
2635}
2636
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002637/* -------------------------------------------------------------------- */
2638/* handlers */
2639
2640static void
2641expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2642 int data_len)
2643{
2644 PyObject* key;
2645 PyObject* value;
2646 PyObject* res;
2647
2648 if (data_len < 2 || data_in[0] != '&')
2649 return;
2650
Neal Norwitz0269b912007-08-08 06:56:02 +00002651 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652 if (!key)
2653 return;
2654
2655 value = PyDict_GetItem(self->entity, key);
2656
2657 if (value) {
2658 if (TreeBuilder_CheckExact(self->target))
2659 res = treebuilder_handle_data(
2660 (TreeBuilderObject*) self->target, value
2661 );
2662 else if (self->handle_data)
2663 res = PyObject_CallFunction(self->handle_data, "O", value);
2664 else
2665 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002667 } else if (!PyErr_Occurred()) {
2668 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002669 char message[128] = "undefined entity ";
2670 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002671 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002672 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002674 EXPAT(GetErrorColumnNumber)(self->parser),
2675 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676 );
2677 }
2678
2679 Py_DECREF(key);
2680}
2681
2682static void
2683expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2684 const XML_Char **attrib_in)
2685{
2686 PyObject* res;
2687 PyObject* tag;
2688 PyObject* attrib;
2689 int ok;
2690
2691 /* tag name */
2692 tag = makeuniversal(self, tag_in);
2693 if (!tag)
2694 return; /* parser will look for errors */
2695
2696 /* attributes */
2697 if (attrib_in[0]) {
2698 attrib = PyDict_New();
2699 if (!attrib)
2700 return;
2701 while (attrib_in[0] && attrib_in[1]) {
2702 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002703 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704 if (!key || !value) {
2705 Py_XDECREF(value);
2706 Py_XDECREF(key);
2707 Py_DECREF(attrib);
2708 return;
2709 }
2710 ok = PyDict_SetItem(attrib, key, value);
2711 Py_DECREF(value);
2712 Py_DECREF(key);
2713 if (ok < 0) {
2714 Py_DECREF(attrib);
2715 return;
2716 }
2717 attrib_in += 2;
2718 }
2719 } else {
2720 Py_INCREF(Py_None);
2721 attrib = Py_None;
2722 }
2723
Eli Bendersky48d358b2012-05-30 17:57:50 +03002724 /* If we get None, pass an empty dictionary on */
2725 if (attrib == Py_None) {
2726 Py_DECREF(attrib);
2727 attrib = PyDict_New();
2728 if (!attrib)
2729 return;
2730 }
2731
2732 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733 /* shortcut */
2734 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2735 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002736 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002737 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002739 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740 res = NULL;
2741
2742 Py_DECREF(tag);
2743 Py_DECREF(attrib);
2744
2745 Py_XDECREF(res);
2746}
2747
2748static void
2749expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2750 int data_len)
2751{
2752 PyObject* data;
2753 PyObject* res;
2754
Neal Norwitz0269b912007-08-08 06:56:02 +00002755 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002756 if (!data)
2757 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758
2759 if (TreeBuilder_CheckExact(self->target))
2760 /* shortcut */
2761 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2762 else if (self->handle_data)
2763 res = PyObject_CallFunction(self->handle_data, "O", data);
2764 else
2765 res = NULL;
2766
2767 Py_DECREF(data);
2768
2769 Py_XDECREF(res);
2770}
2771
2772static void
2773expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2774{
2775 PyObject* tag;
2776 PyObject* res = NULL;
2777
2778 if (TreeBuilder_CheckExact(self->target))
2779 /* shortcut */
2780 /* the standard tree builder doesn't look at the end tag */
2781 res = treebuilder_handle_end(
2782 (TreeBuilderObject*) self->target, Py_None
2783 );
2784 else if (self->handle_end) {
2785 tag = makeuniversal(self, tag_in);
2786 if (tag) {
2787 res = PyObject_CallFunction(self->handle_end, "O", tag);
2788 Py_DECREF(tag);
2789 }
2790 }
2791
2792 Py_XDECREF(res);
2793}
2794
2795static void
2796expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2797 const XML_Char *uri)
2798{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002799 PyObject* sprefix = NULL;
2800 PyObject* suri = NULL;
2801
2802 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2803 if (!suri)
2804 return;
2805
2806 if (prefix)
2807 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2808 else
2809 sprefix = PyUnicode_FromString("");
2810 if (!sprefix) {
2811 Py_DECREF(suri);
2812 return;
2813 }
2814
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002816 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002817 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002818
2819 Py_DECREF(sprefix);
2820 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821}
2822
2823static void
2824expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2825{
2826 treebuilder_handle_namespace(
2827 (TreeBuilderObject*) self->target, 0, NULL, NULL
2828 );
2829}
2830
2831static void
2832expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2833{
2834 PyObject* comment;
2835 PyObject* res;
2836
2837 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002838 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002839 if (comment) {
2840 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2841 Py_XDECREF(res);
2842 Py_DECREF(comment);
2843 }
2844 }
2845}
2846
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002847static void
2848expat_start_doctype_handler(XMLParserObject *self,
2849 const XML_Char *doctype_name,
2850 const XML_Char *sysid,
2851 const XML_Char *pubid,
2852 int has_internal_subset)
2853{
2854 PyObject *self_pyobj = (PyObject *)self;
2855 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
2856 PyObject *parser_doctype = NULL;
2857 PyObject *res = NULL;
2858
2859 doctype_name_obj = makeuniversal(self, doctype_name);
2860 if (!doctype_name_obj)
2861 return;
2862
2863 if (sysid) {
2864 sysid_obj = makeuniversal(self, sysid);
2865 if (!sysid_obj) {
2866 Py_DECREF(doctype_name_obj);
2867 return;
2868 }
2869 } else {
2870 Py_INCREF(Py_None);
2871 sysid_obj = Py_None;
2872 }
2873
2874 if (pubid) {
2875 pubid_obj = makeuniversal(self, pubid);
2876 if (!pubid_obj) {
2877 Py_DECREF(doctype_name_obj);
2878 Py_DECREF(sysid_obj);
2879 return;
2880 }
2881 } else {
2882 Py_INCREF(Py_None);
2883 pubid_obj = Py_None;
2884 }
2885
2886 /* If the target has a handler for doctype, call it. */
2887 if (self->handle_doctype) {
2888 res = PyObject_CallFunction(self->handle_doctype, "OOO",
2889 doctype_name_obj, pubid_obj, sysid_obj);
2890 Py_CLEAR(res);
2891 }
2892
2893 /* Now see if the parser itself has a doctype method. If yes and it's
2894 * a subclass, call it but warn about deprecation. If it's not a subclass
2895 * (i.e. vanilla XMLParser), do nothing.
2896 */
2897 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
2898 if (parser_doctype) {
2899 if (!XMLParser_CheckExact(self_pyobj)) {
2900 if (PyErr_WarnEx(PyExc_DeprecationWarning,
2901 "This method of XMLParser is deprecated. Define"
2902 " doctype() method on the TreeBuilder target.",
2903 1) < 0) {
2904 goto clear;
2905 }
2906 res = PyObject_CallFunction(parser_doctype, "OOO",
2907 doctype_name_obj, pubid_obj, sysid_obj);
2908 Py_CLEAR(res);
2909 }
2910 }
2911
2912clear:
2913 Py_XDECREF(parser_doctype);
2914 Py_DECREF(doctype_name_obj);
2915 Py_DECREF(pubid_obj);
2916 Py_DECREF(sysid_obj);
2917}
2918
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919static void
2920expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2921 const XML_Char* data_in)
2922{
2923 PyObject* target;
2924 PyObject* data;
2925 PyObject* res;
2926
2927 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002928 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2929 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930 if (target && data) {
2931 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2932 Py_XDECREF(res);
2933 Py_DECREF(data);
2934 Py_DECREF(target);
2935 } else {
2936 Py_XDECREF(data);
2937 Py_XDECREF(target);
2938 }
2939 }
2940}
2941
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002942static int
2943expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2944 XML_Encoding *info)
2945{
2946 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947 unsigned char s[256];
2948 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002949 void *data;
2950 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002951
2952 memset(info, 0, sizeof(XML_Encoding));
2953
2954 for (i = 0; i < 256; i++)
2955 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002956
Fredrik Lundhc3389992005-12-25 11:40:19 +00002957 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958 if (!u)
2959 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002960 if (PyUnicode_READY(u))
2961 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002963 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964 Py_DECREF(u);
2965 return XML_STATUS_ERROR;
2966 }
2967
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002968 kind = PyUnicode_KIND(u);
2969 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002971 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2972 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2973 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002975 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 }
2977
2978 Py_DECREF(u);
2979
2980 return XML_STATUS_OK;
2981}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982
2983/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002984
Eli Bendersky52467b12012-06-01 07:13:08 +03002985static PyObject *
2986xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987{
Eli Bendersky52467b12012-06-01 07:13:08 +03002988 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
2989 if (self) {
2990 self->parser = NULL;
2991 self->target = self->entity = self->names = NULL;
2992 self->handle_start = self->handle_data = self->handle_end = NULL;
2993 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002994 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002996 return (PyObject *)self;
2997}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998
Eli Bendersky52467b12012-06-01 07:13:08 +03002999static int
3000xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3001{
3002 XMLParserObject *self_xp = (XMLParserObject *)self;
3003 PyObject *target = NULL, *html = NULL;
3004 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003005 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006
Eli Bendersky52467b12012-06-01 07:13:08 +03003007 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3008 &html, &target, &encoding)) {
3009 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003011
Eli Bendersky52467b12012-06-01 07:13:08 +03003012 self_xp->entity = PyDict_New();
3013 if (!self_xp->entity)
3014 return -1;
3015
3016 self_xp->names = PyDict_New();
3017 if (!self_xp->names) {
3018 Py_XDECREF(self_xp->entity);
3019 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003020 }
3021
Eli Bendersky52467b12012-06-01 07:13:08 +03003022 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3023 if (!self_xp->parser) {
3024 Py_XDECREF(self_xp->entity);
3025 Py_XDECREF(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003027 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028 }
3029
Eli Bendersky52467b12012-06-01 07:13:08 +03003030 if (target) {
3031 Py_INCREF(target);
3032 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003033 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 if (!target) {
Eli Bendersky52467b12012-06-01 07:13:08 +03003035 Py_XDECREF(self_xp->entity);
3036 Py_XDECREF(self_xp->names);
3037 EXPAT(ParserFree)(self_xp->parser);
3038 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003040 }
3041 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042
Eli Bendersky52467b12012-06-01 07:13:08 +03003043 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3044 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3045 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3046 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3047 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3048 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003049 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050
3051 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003053 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003054 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003056 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003057 (XML_StartElementHandler) expat_start_handler,
3058 (XML_EndElementHandler) expat_end_handler
3059 );
3060 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003061 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 (XML_DefaultHandler) expat_default_handler
3063 );
3064 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003065 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066 (XML_CharacterDataHandler) expat_data_handler
3067 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003068 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003070 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071 (XML_CommentHandler) expat_comment_handler
3072 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003073 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003074 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003075 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 (XML_ProcessingInstructionHandler) expat_pi_handler
3077 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003078 EXPAT(SetStartDoctypeDeclHandler)(
3079 self_xp->parser,
3080 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3081 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003083 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3085 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003086
Eli Bendersky52467b12012-06-01 07:13:08 +03003087 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003088}
3089
Eli Bendersky52467b12012-06-01 07:13:08 +03003090static int
3091xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3092{
3093 Py_VISIT(self->handle_close);
3094 Py_VISIT(self->handle_pi);
3095 Py_VISIT(self->handle_comment);
3096 Py_VISIT(self->handle_end);
3097 Py_VISIT(self->handle_data);
3098 Py_VISIT(self->handle_start);
3099
3100 Py_VISIT(self->target);
3101 Py_VISIT(self->entity);
3102 Py_VISIT(self->names);
3103
3104 return 0;
3105}
3106
3107static int
3108xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109{
3110 EXPAT(ParserFree)(self->parser);
3111
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003112 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003113 Py_XDECREF(self->handle_pi);
3114 Py_XDECREF(self->handle_comment);
3115 Py_XDECREF(self->handle_end);
3116 Py_XDECREF(self->handle_data);
3117 Py_XDECREF(self->handle_start);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003118 Py_XDECREF(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119
Eli Bendersky52467b12012-06-01 07:13:08 +03003120 Py_XDECREF(self->target);
3121 Py_XDECREF(self->entity);
3122 Py_XDECREF(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003123
Eli Bendersky52467b12012-06-01 07:13:08 +03003124 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003125}
3126
Eli Bendersky52467b12012-06-01 07:13:08 +03003127static void
3128xmlparser_dealloc(XMLParserObject* self)
3129{
3130 PyObject_GC_UnTrack(self);
3131 xmlparser_gc_clear(self);
3132 Py_TYPE(self)->tp_free((PyObject *)self);
3133}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134
3135LOCAL(PyObject*)
3136expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3137{
3138 int ok;
3139
3140 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3141
3142 if (PyErr_Occurred())
3143 return NULL;
3144
3145 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003146 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003147 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003148 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003149 EXPAT(GetErrorColumnNumber)(self->parser),
3150 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003151 );
3152 return NULL;
3153 }
3154
3155 Py_RETURN_NONE;
3156}
3157
3158static PyObject*
3159xmlparser_close(XMLParserObject* self, PyObject* args)
3160{
3161 /* end feeding data to parser */
3162
3163 PyObject* res;
3164 if (!PyArg_ParseTuple(args, ":close"))
3165 return NULL;
3166
3167 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003168 if (!res)
3169 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003170
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003171 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003172 Py_DECREF(res);
3173 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003174 } if (self->handle_close) {
3175 Py_DECREF(res);
3176 return PyObject_CallFunction(self->handle_close, "");
3177 } else
3178 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179}
3180
3181static PyObject*
3182xmlparser_feed(XMLParserObject* self, PyObject* args)
3183{
3184 /* feed data to parser */
3185
3186 char* data;
3187 int data_len;
3188 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3189 return NULL;
3190
3191 return expat_parse(self, data, data_len, 0);
3192}
3193
3194static PyObject*
3195xmlparser_parse(XMLParserObject* self, PyObject* args)
3196{
3197 /* (internal) parse until end of input stream */
3198
3199 PyObject* reader;
3200 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003201 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202 PyObject* res;
3203
3204 PyObject* fileobj;
3205 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3206 return NULL;
3207
3208 reader = PyObject_GetAttrString(fileobj, "read");
3209 if (!reader)
3210 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003211
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212 /* read from open file object */
3213 for (;;) {
3214
3215 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3216
3217 if (!buffer) {
3218 /* read failed (e.g. due to KeyboardInterrupt) */
3219 Py_DECREF(reader);
3220 return NULL;
3221 }
3222
Eli Benderskyf996e772012-03-16 05:53:30 +02003223 if (PyUnicode_CheckExact(buffer)) {
3224 /* A unicode object is encoded into bytes using UTF-8 */
3225 if (PyUnicode_GET_SIZE(buffer) == 0) {
3226 Py_DECREF(buffer);
3227 break;
3228 }
3229 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3230 if (!temp) {
3231 /* Propagate exception from PyUnicode_AsEncodedString */
3232 Py_DECREF(buffer);
3233 Py_DECREF(reader);
3234 return NULL;
3235 }
3236
3237 /* Here we no longer need the original buffer since it contains
3238 * unicode. Make it point to the encoded bytes object.
3239 */
3240 Py_DECREF(buffer);
3241 buffer = temp;
3242 }
3243 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 Py_DECREF(buffer);
3245 break;
3246 }
3247
3248 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003249 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250 );
3251
3252 Py_DECREF(buffer);
3253
3254 if (!res) {
3255 Py_DECREF(reader);
3256 return NULL;
3257 }
3258 Py_DECREF(res);
3259
3260 }
3261
3262 Py_DECREF(reader);
3263
3264 res = expat_parse(self, "", 0, 1);
3265
3266 if (res && TreeBuilder_CheckExact(self->target)) {
3267 Py_DECREF(res);
3268 return treebuilder_done((TreeBuilderObject*) self->target);
3269 }
3270
3271 return res;
3272}
3273
3274static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003275xmlparser_doctype(XMLParserObject *self, PyObject *args)
3276{
3277 Py_RETURN_NONE;
3278}
3279
3280static PyObject*
3281xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282{
3283 /* activate element event reporting */
3284
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003285 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 TreeBuilderObject* target;
3287
3288 PyObject* events; /* event collector */
3289 PyObject* event_set = Py_None;
3290 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3291 &event_set))
3292 return NULL;
3293
3294 if (!TreeBuilder_CheckExact(self->target)) {
3295 PyErr_SetString(
3296 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003297 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 "targets"
3299 );
3300 return NULL;
3301 }
3302
3303 target = (TreeBuilderObject*) self->target;
3304
3305 Py_INCREF(events);
3306 Py_XDECREF(target->events);
3307 target->events = events;
3308
3309 /* clear out existing events */
3310 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
3311 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
3312 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
3313 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
3314
3315 if (event_set == Py_None) {
3316 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003317 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003318 Py_RETURN_NONE;
3319 }
3320
3321 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3322 goto error;
3323
3324 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3325 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3326 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003327 if (PyUnicode_Check(item)) {
3328 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003329 if (event == NULL)
3330 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003331 } else if (PyBytes_Check(item))
3332 event = PyBytes_AS_STRING(item);
3333 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003335 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336 if (strcmp(event, "start") == 0) {
3337 Py_INCREF(item);
3338 target->start_event_obj = item;
3339 } else if (strcmp(event, "end") == 0) {
3340 Py_INCREF(item);
3341 Py_XDECREF(target->end_event_obj);
3342 target->end_event_obj = item;
3343 } else if (strcmp(event, "start-ns") == 0) {
3344 Py_INCREF(item);
3345 Py_XDECREF(target->start_ns_event_obj);
3346 target->start_ns_event_obj = item;
3347 EXPAT(SetNamespaceDeclHandler)(
3348 self->parser,
3349 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3350 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3351 );
3352 } else if (strcmp(event, "end-ns") == 0) {
3353 Py_INCREF(item);
3354 Py_XDECREF(target->end_ns_event_obj);
3355 target->end_ns_event_obj = item;
3356 EXPAT(SetNamespaceDeclHandler)(
3357 self->parser,
3358 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3359 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3360 );
3361 } else {
3362 PyErr_Format(
3363 PyExc_ValueError,
3364 "unknown event '%s'", event
3365 );
3366 return NULL;
3367 }
3368 }
3369
3370 Py_RETURN_NONE;
3371
3372 error:
3373 PyErr_SetString(
3374 PyExc_TypeError,
3375 "invalid event tuple"
3376 );
3377 return NULL;
3378}
3379
3380static PyMethodDef xmlparser_methods[] = {
3381 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3382 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3383 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3384 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003385 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386 {NULL, NULL}
3387};
3388
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003389static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003390xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003391{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003392 if (PyUnicode_Check(nameobj)) {
3393 PyObject* res;
3394 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3395 res = self->entity;
3396 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3397 res = self->target;
3398 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3399 return PyUnicode_FromFormat(
3400 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003401 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003402 }
3403 else
3404 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405
Alexander Belopolskye239d232010-12-08 23:31:48 +00003406 Py_INCREF(res);
3407 return res;
3408 }
3409 generic:
3410 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411}
3412
Neal Norwitz227b5332006-03-22 09:28:35 +00003413static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003414 PyVarObject_HEAD_INIT(NULL, 0)
3415 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003417 (destructor)xmlparser_dealloc, /* tp_dealloc */
3418 0, /* tp_print */
3419 0, /* tp_getattr */
3420 0, /* tp_setattr */
3421 0, /* tp_reserved */
3422 0, /* tp_repr */
3423 0, /* tp_as_number */
3424 0, /* tp_as_sequence */
3425 0, /* tp_as_mapping */
3426 0, /* tp_hash */
3427 0, /* tp_call */
3428 0, /* tp_str */
3429 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3430 0, /* tp_setattro */
3431 0, /* tp_as_buffer */
3432 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3433 /* tp_flags */
3434 0, /* tp_doc */
3435 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3436 (inquiry)xmlparser_gc_clear, /* tp_clear */
3437 0, /* tp_richcompare */
3438 0, /* tp_weaklistoffset */
3439 0, /* tp_iter */
3440 0, /* tp_iternext */
3441 xmlparser_methods, /* tp_methods */
3442 0, /* tp_members */
3443 0, /* tp_getset */
3444 0, /* tp_base */
3445 0, /* tp_dict */
3446 0, /* tp_descr_get */
3447 0, /* tp_descr_set */
3448 0, /* tp_dictoffset */
3449 (initproc)xmlparser_init, /* tp_init */
3450 PyType_GenericAlloc, /* tp_alloc */
3451 xmlparser_new, /* tp_new */
3452 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003453};
3454
3455#endif
3456
3457/* ==================================================================== */
3458/* python module interface */
3459
3460static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003462 {NULL, NULL}
3463};
3464
Martin v. Löwis1a214512008-06-11 05:26:20 +00003465
3466static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003467 PyModuleDef_HEAD_INIT,
3468 "_elementtree",
3469 NULL,
3470 -1,
3471 _functions,
3472 NULL,
3473 NULL,
3474 NULL,
3475 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003476};
3477
Neal Norwitzf6657e62006-12-28 04:47:50 +00003478PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003479PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003480{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003481 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003483 /* Initialize object types */
3484 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003485 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003486 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003487 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003489 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003490 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003491#endif
3492
Martin v. Löwis1a214512008-06-11 05:26:20 +00003493 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003494 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003495 return NULL;
3496
Eli Bendersky828efde2012-04-05 05:40:58 +03003497 if (!(temp = PyImport_ImportModule("copy")))
3498 return NULL;
3499 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3500 Py_XDECREF(temp);
3501
3502 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3503 return NULL;
3504
Eli Bendersky20d41742012-06-01 09:48:37 +03003505 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003506 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3507 if (expat_capi) {
3508 /* check that it's usable */
3509 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3510 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3511 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3512 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003513 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003514 expat_capi = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003515 }
3516 }
3517 if (!expat_capi) {
3518 PyErr_SetString(
3519 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
3520 );
3521 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003522 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003523
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003524 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003525 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003526 );
3527 Py_INCREF(elementtree_parseerror_obj);
3528 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3529
Eli Bendersky092af1f2012-03-04 07:14:03 +02003530 Py_INCREF((PyObject *)&Element_Type);
3531 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3532
Eli Bendersky58d548d2012-05-29 15:45:16 +03003533 Py_INCREF((PyObject *)&TreeBuilder_Type);
3534 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3535
Eli Bendersky52467b12012-06-01 07:13:08 +03003536#if defined(USE_EXPAT)
3537 Py_INCREF((PyObject *)&XMLParser_Type);
3538 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3539#endif
3540
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003541 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003542}