blob: f0b5a3fa242dc49733f26e3c290e9045b9b28f11 [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
126 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
129
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
131 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
132
133 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
134
135 Py_DECREF(args);
136
137 return result;
138}
139
140LOCAL(PyObject*)
141list_join(PyObject* list)
142{
143 /* join list elements (destroying the list in the process) */
144
145 PyObject* joiner;
146 PyObject* function;
147 PyObject* args;
148 PyObject* result;
149
150 switch (PyList_GET_SIZE(list)) {
151 case 0:
152 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000153 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 case 1:
155 result = PyList_GET_ITEM(list, 0);
156 Py_INCREF(result);
157 Py_DECREF(list);
158 return result;
159 }
160
161 /* two or more elements: slice out a suitable separator from the
162 first member, and use that to join the entire list */
163
164 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
165 if (!joiner)
166 return NULL;
167
168 function = PyObject_GetAttrString(joiner, "join");
169 if (!function) {
170 Py_DECREF(joiner);
171 return NULL;
172 }
173
174 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000175 if (!args)
176 return NULL;
177
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 PyTuple_SET_ITEM(args, 0, list);
179
180 result = PyObject_CallObject(function, args);
181
182 Py_DECREF(args); /* also removes list */
183 Py_DECREF(function);
184 Py_DECREF(joiner);
185
186 return result;
187}
188
Eli Bendersky48d358b2012-05-30 17:57:50 +0300189/* Is the given object an empty dictionary?
190*/
191static int
192is_empty_dict(PyObject *obj)
193{
194 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
195}
196
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200199/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
201typedef struct {
202
203 /* attributes (a dictionary object), or None if no attributes */
204 PyObject* attrib;
205
206 /* child elements */
207 int length; /* actual number of items */
208 int allocated; /* allocated items */
209
210 /* this either points to _children or to a malloced buffer */
211 PyObject* *children;
212
213 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215} ElementObjectExtra;
216
217typedef struct {
218 PyObject_HEAD
219
220 /* element tag (a string). */
221 PyObject* tag;
222
223 /* text before first child. note that this is a tagged pointer;
224 use JOIN_OBJ to get the object pointer. the join flag is used
225 to distinguish lists created by the tree builder from lists
226 assigned to the attribute by application code; the former
227 should be joined before being returned to the user, the latter
228 should be left intact. */
229 PyObject* text;
230
231 /* text after this element, in parent. note that this is a tagged
232 pointer; use JOIN_OBJ to get the object pointer. */
233 PyObject* tail;
234
235 ElementObjectExtra* extra;
236
Eli Benderskyebf37a22012-04-03 22:02:37 +0300237 PyObject *weakreflist; /* For tp_weaklistoffset */
238
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000239} ElementObject;
240
Neal Norwitz227b5332006-03-22 09:28:35 +0000241static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000242
Christian Heimes90aa7642007-12-19 02:45:37 +0000243#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
245/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200246/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
248LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200249create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250{
251 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
252 if (!self->extra)
253 return -1;
254
255 if (!attrib)
256 attrib = Py_None;
257
258 Py_INCREF(attrib);
259 self->extra->attrib = attrib;
260
261 self->extra->length = 0;
262 self->extra->allocated = STATIC_CHILDREN;
263 self->extra->children = self->extra->_children;
264
265 return 0;
266}
267
268LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270{
Eli Bendersky08b85292012-04-04 15:55:07 +0300271 ElementObjectExtra *myextra;
272 int i;
273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 if (!self->extra)
275 return;
276
277 /* Avoid DECREFs calling into this code again (cycles, etc.)
278 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300279 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 self->extra = NULL;
281
282 Py_DECREF(myextra->attrib);
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 for (i = 0; i < myextra->length; i++)
285 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286
Eli Benderskyebf37a22012-04-03 22:02:37 +0300287 if (myextra->children != myextra->_children)
288 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289
Eli Benderskyebf37a22012-04-03 22:02:37 +0300290 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000291}
292
Eli Bendersky092af1f2012-03-04 07:14:03 +0200293/* Convenience internal function to create new Element objects with the given
294 * tag and attributes.
295*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000296LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200297create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298{
299 ElementObject* self;
300
Eli Bendersky0192ba32012-03-30 16:38:33 +0300301 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000302 if (self == NULL)
303 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 self->extra = NULL;
305
Eli Bendersky48d358b2012-05-30 17:57:50 +0300306 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000308 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000310 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 }
312
313 Py_INCREF(tag);
314 self->tag = tag;
315
316 Py_INCREF(Py_None);
317 self->text = Py_None;
318
319 Py_INCREF(Py_None);
320 self->tail = Py_None;
321
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 self->weakreflist = NULL;
323
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000324 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300325 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000326 return (PyObject*) self;
327}
328
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329static PyObject *
330element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
331{
332 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
333 if (e != NULL) {
334 Py_INCREF(Py_None);
335 e->tag = Py_None;
336
337 Py_INCREF(Py_None);
338 e->text = Py_None;
339
340 Py_INCREF(Py_None);
341 e->tail = Py_None;
342
343 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300344 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200345 }
346 return (PyObject *)e;
347}
348
Eli Bendersky737b1732012-05-29 06:02:56 +0300349/* Helper function for extracting the attrib dictionary from a keywords dict.
350 * This is required by some constructors/functions in this module that can
351 * either accept attrib as a keyword argument or all attributes splashed
352 * directly into *kwds.
353 * If there is no 'attrib' keyword, return an empty dict.
354 */
355static PyObject*
356get_attrib_from_keywords(PyObject *kwds)
357{
358 PyObject *attrib_str = PyUnicode_FromString("attrib");
359 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
360
361 if (attrib) {
362 /* If attrib was found in kwds, copy its value and remove it from
363 * kwds
364 */
365 if (!PyDict_Check(attrib)) {
366 Py_DECREF(attrib_str);
367 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
368 Py_TYPE(attrib)->tp_name);
369 return NULL;
370 }
371 attrib = PyDict_Copy(attrib);
372 PyDict_DelItem(kwds, attrib_str);
373 } else {
374 attrib = PyDict_New();
375 }
376
377 Py_DECREF(attrib_str);
378
379 if (attrib)
380 PyDict_Update(attrib, kwds);
381 return attrib;
382}
383
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384static int
385element_init(PyObject *self, PyObject *args, PyObject *kwds)
386{
387 PyObject *tag;
388 PyObject *tmp;
389 PyObject *attrib = NULL;
390 ElementObject *self_elem;
391
392 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
393 return -1;
394
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 if (attrib) {
396 /* attrib passed as positional arg */
397 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 if (!attrib)
399 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 if (kwds) {
401 if (PyDict_Update(attrib, kwds) < 0) {
402 return -1;
403 }
404 }
405 } else if (kwds) {
406 /* have keywords args */
407 attrib = get_attrib_from_keywords(kwds);
408 if (!attrib)
409 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300411 /* no attrib arg, no kwds, so no attributes */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 Py_INCREF(Py_None);
413 attrib = Py_None;
414 }
415
416 self_elem = (ElementObject *)self;
417
Eli Bendersky48d358b2012-05-30 17:57:50 +0300418 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 if (create_extra(self_elem, attrib) < 0) {
420 PyObject_Del(self_elem);
421 return -1;
422 }
423 }
424
Eli Bendersky48d358b2012-05-30 17:57:50 +0300425 /* We own a reference to attrib here and it's no longer needed. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(attrib);
427
428 /* Replace the objects already pointed to by tag, text and tail. */
429 tmp = self_elem->tag;
430 self_elem->tag = tag;
431 Py_INCREF(tag);
432 Py_DECREF(tmp);
433
434 tmp = self_elem->text;
435 self_elem->text = Py_None;
436 Py_INCREF(Py_None);
437 Py_DECREF(JOIN_OBJ(tmp));
438
439 tmp = self_elem->tail;
440 self_elem->tail = Py_None;
441 Py_INCREF(Py_None);
442 Py_DECREF(JOIN_OBJ(tmp));
443
444 return 0;
445}
446
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447LOCAL(int)
448element_resize(ElementObject* self, int extra)
449{
450 int size;
451 PyObject* *children;
452
453 /* make sure self->children can hold the given number of extra
454 elements. set an exception and return -1 if allocation failed */
455
456 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200457 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000458
459 size = self->extra->length + extra;
460
461 if (size > self->extra->allocated) {
462 /* use Python 2.4's list growth strategy */
463 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100465 * which needs at least 4 bytes.
466 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000467 * be safe.
468 */
469 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000470 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100472 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000473 * false alarm always assume at least one child to be safe.
474 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000475 children = PyObject_Realloc(self->extra->children,
476 size * sizeof(PyObject*));
477 if (!children)
478 goto nomemory;
479 } else {
480 children = PyObject_Malloc(size * sizeof(PyObject*));
481 if (!children)
482 goto nomemory;
483 /* copy existing children from static area to malloc buffer */
484 memcpy(children, self->extra->children,
485 self->extra->length * sizeof(PyObject*));
486 }
487 self->extra->children = children;
488 self->extra->allocated = size;
489 }
490
491 return 0;
492
493 nomemory:
494 PyErr_NoMemory();
495 return -1;
496}
497
498LOCAL(int)
499element_add_subelement(ElementObject* self, PyObject* element)
500{
501 /* add a child element to a parent */
502
503 if (element_resize(self, 1) < 0)
504 return -1;
505
506 Py_INCREF(element);
507 self->extra->children[self->extra->length] = element;
508
509 self->extra->length++;
510
511 return 0;
512}
513
514LOCAL(PyObject*)
515element_get_attrib(ElementObject* self)
516{
517 /* return borrowed reference to attrib dictionary */
518 /* note: this function assumes that the extra section exists */
519
520 PyObject* res = self->extra->attrib;
521
522 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000523 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524 /* create missing dictionary */
525 res = PyDict_New();
526 if (!res)
527 return NULL;
528 self->extra->attrib = res;
529 }
530
531 return res;
532}
533
534LOCAL(PyObject*)
535element_get_text(ElementObject* self)
536{
537 /* return borrowed reference to text attribute */
538
539 PyObject* res = self->text;
540
541 if (JOIN_GET(res)) {
542 res = JOIN_OBJ(res);
543 if (PyList_CheckExact(res)) {
544 res = list_join(res);
545 if (!res)
546 return NULL;
547 self->text = res;
548 }
549 }
550
551 return res;
552}
553
554LOCAL(PyObject*)
555element_get_tail(ElementObject* self)
556{
557 /* return borrowed reference to text attribute */
558
559 PyObject* res = self->tail;
560
561 if (JOIN_GET(res)) {
562 res = JOIN_OBJ(res);
563 if (PyList_CheckExact(res)) {
564 res = list_join(res);
565 if (!res)
566 return NULL;
567 self->tail = res;
568 }
569 }
570
571 return res;
572}
573
574static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300575subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576{
577 PyObject* elem;
578
579 ElementObject* parent;
580 PyObject* tag;
581 PyObject* attrib = NULL;
582 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
583 &Element_Type, &parent, &tag,
584 &PyDict_Type, &attrib))
585 return NULL;
586
Eli Bendersky737b1732012-05-29 06:02:56 +0300587 if (attrib) {
588 /* attrib passed as positional arg */
589 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 if (!attrib)
591 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300592 if (kwds) {
593 if (PyDict_Update(attrib, kwds) < 0) {
594 return NULL;
595 }
596 }
597 } else if (kwds) {
598 /* have keyword args */
599 attrib = get_attrib_from_keywords(kwds);
600 if (!attrib)
601 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604 Py_INCREF(Py_None);
605 attrib = Py_None;
606 }
607
Eli Bendersky092af1f2012-03-04 07:14:03 +0200608 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609
610 Py_DECREF(attrib);
611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
628 int i;
629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300641
642 /* The following is like Py_CLEAR for self->text and self->tail, but
643 * written explicitily because the real pointers hide behind access
644 * macros.
645 */
646 if (self->text) {
647 PyObject *tmp = JOIN_OBJ(self->text);
648 self->text = NULL;
649 Py_DECREF(tmp);
650 }
651
652 if (self->tail) {
653 PyObject *tmp = JOIN_OBJ(self->tail);
654 self->tail = NULL;
655 Py_DECREF(tmp);
656 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657
658 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300659 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300660 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 return 0;
663}
664
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665static void
666element_dealloc(ElementObject* self)
667{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300668 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300669
670 if (self->weakreflist != NULL)
671 PyObject_ClearWeakRefs((PyObject *) self);
672
Eli Bendersky0192ba32012-03-30 16:38:33 +0300673 /* element_gc_clear clears all references and deallocates extra
674 */
675 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676
677 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200678 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679}
680
681/* -------------------------------------------------------------------- */
682/* methods (in alphabetical order) */
683
684static PyObject*
685element_append(ElementObject* self, PyObject* args)
686{
687 PyObject* element;
688 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
689 return NULL;
690
691 if (element_add_subelement(self, element) < 0)
692 return NULL;
693
694 Py_RETURN_NONE;
695}
696
697static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300698element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699{
700 if (!PyArg_ParseTuple(args, ":clear"))
701 return NULL;
702
Eli Benderskyebf37a22012-04-03 22:02:37 +0300703 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 Py_INCREF(Py_None);
706 Py_DECREF(JOIN_OBJ(self->text));
707 self->text = Py_None;
708
709 Py_INCREF(Py_None);
710 Py_DECREF(JOIN_OBJ(self->tail));
711 self->tail = Py_None;
712
713 Py_RETURN_NONE;
714}
715
716static PyObject*
717element_copy(ElementObject* self, PyObject* args)
718{
719 int i;
720 ElementObject* element;
721
722 if (!PyArg_ParseTuple(args, ":__copy__"))
723 return NULL;
724
Eli Bendersky092af1f2012-03-04 07:14:03 +0200725 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000726 self->tag, (self->extra) ? self->extra->attrib : Py_None
727 );
728 if (!element)
729 return NULL;
730
731 Py_DECREF(JOIN_OBJ(element->text));
732 element->text = self->text;
733 Py_INCREF(JOIN_OBJ(element->text));
734
735 Py_DECREF(JOIN_OBJ(element->tail));
736 element->tail = self->tail;
737 Py_INCREF(JOIN_OBJ(element->tail));
738
739 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100740
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000741 if (element_resize(element, self->extra->length) < 0) {
742 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000744 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
746 for (i = 0; i < self->extra->length; i++) {
747 Py_INCREF(self->extra->children[i]);
748 element->extra->children[i] = self->extra->children[i];
749 }
750
751 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 }
754
755 return (PyObject*) element;
756}
757
758static PyObject*
759element_deepcopy(ElementObject* self, PyObject* args)
760{
761 int i;
762 ElementObject* element;
763 PyObject* tag;
764 PyObject* attrib;
765 PyObject* text;
766 PyObject* tail;
767 PyObject* id;
768
769 PyObject* memo;
770 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
771 return NULL;
772
773 tag = deepcopy(self->tag, memo);
774 if (!tag)
775 return NULL;
776
777 if (self->extra) {
778 attrib = deepcopy(self->extra->attrib, memo);
779 if (!attrib) {
780 Py_DECREF(tag);
781 return NULL;
782 }
783 } else {
784 Py_INCREF(Py_None);
785 attrib = Py_None;
786 }
787
Eli Bendersky092af1f2012-03-04 07:14:03 +0200788 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 Py_DECREF(tag);
791 Py_DECREF(attrib);
792
793 if (!element)
794 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796 text = deepcopy(JOIN_OBJ(self->text), memo);
797 if (!text)
798 goto error;
799 Py_DECREF(element->text);
800 element->text = JOIN_SET(text, JOIN_GET(self->text));
801
802 tail = deepcopy(JOIN_OBJ(self->tail), memo);
803 if (!tail)
804 goto error;
805 Py_DECREF(element->tail);
806 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
807
808 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100809
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000810 if (element_resize(element, self->extra->length) < 0)
811 goto error;
812
813 for (i = 0; i < self->extra->length; i++) {
814 PyObject* child = deepcopy(self->extra->children[i], memo);
815 if (!child) {
816 element->extra->length = i;
817 goto error;
818 }
819 element->extra->children[i] = child;
820 }
821
822 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100823
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000824 }
825
826 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000827 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000828 if (!id)
829 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830
831 i = PyDict_SetItem(memo, id, (PyObject*) element);
832
833 Py_DECREF(id);
834
835 if (i < 0)
836 goto error;
837
838 return (PyObject*) element;
839
840 error:
841 Py_DECREF(element);
842 return NULL;
843}
844
845LOCAL(int)
846checkpath(PyObject* tag)
847{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000848 Py_ssize_t i;
849 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 /* check if a tag contains an xpath character */
852
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000853#define PATHCHAR(ch) \
854 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000855
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000856 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200857 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
858 void *data = PyUnicode_DATA(tag);
859 unsigned int kind = PyUnicode_KIND(tag);
860 for (i = 0; i < len; i++) {
861 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
862 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000863 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200864 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200866 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 return 1;
868 }
869 return 0;
870 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000871 if (PyBytes_Check(tag)) {
872 char *p = PyBytes_AS_STRING(tag);
873 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000874 if (p[i] == '{')
875 check = 0;
876 else if (p[i] == '}')
877 check = 1;
878 else if (check && PATHCHAR(p[i]))
879 return 1;
880 }
881 return 0;
882 }
883
884 return 1; /* unknown type; might be path expression */
885}
886
887static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000888element_extend(ElementObject* self, PyObject* args)
889{
890 PyObject* seq;
891 Py_ssize_t i, seqlen = 0;
892
893 PyObject* seq_in;
894 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
895 return NULL;
896
897 seq = PySequence_Fast(seq_in, "");
898 if (!seq) {
899 PyErr_Format(
900 PyExc_TypeError,
901 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
902 );
903 return NULL;
904 }
905
906 seqlen = PySequence_Size(seq);
907 for (i = 0; i < seqlen; i++) {
908 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200909 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
910 Py_DECREF(seq);
911 PyErr_Format(
912 PyExc_TypeError,
913 "expected an Element, not \"%.200s\"",
914 Py_TYPE(element)->tp_name);
915 return NULL;
916 }
917
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000918 if (element_add_subelement(self, element) < 0) {
919 Py_DECREF(seq);
920 return NULL;
921 }
922 }
923
924 Py_DECREF(seq);
925
926 Py_RETURN_NONE;
927}
928
929static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300930element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000931{
932 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000934 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300935 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200936
Eli Bendersky737b1732012-05-29 06:02:56 +0300937 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
938 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000939 return NULL;
940
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200941 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200942 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200943 return _PyObject_CallMethodId(
944 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000945 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200946 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947
948 if (!self->extra)
949 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100950
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000951 for (i = 0; i < self->extra->length; i++) {
952 PyObject* item = self->extra->children[i];
953 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000954 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000955 Py_INCREF(item);
956 return item;
957 }
958 }
959
960 Py_RETURN_NONE;
961}
962
963static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300964element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000965{
966 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967 PyObject* tag;
968 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000969 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200970 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300971 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200972
Eli Bendersky737b1732012-05-29 06:02:56 +0300973 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
974 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000975 return NULL;
976
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000977 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200978 return _PyObject_CallMethodId(
979 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 );
981
982 if (!self->extra) {
983 Py_INCREF(default_value);
984 return default_value;
985 }
986
987 for (i = 0; i < self->extra->length; i++) {
988 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000989 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
990
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000991 PyObject* text = element_get_text(item);
992 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000993 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000994 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995 return text;
996 }
997 }
998
999 Py_INCREF(default_value);
1000 return default_value;
1001}
1002
1003static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001004element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001005{
1006 int i;
1007 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001009 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001010 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001011
Eli Bendersky737b1732012-05-29 06:02:56 +03001012 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1013 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001014 return NULL;
1015
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001016 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001017 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001018 return _PyObject_CallMethodId(
1019 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001021 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022
1023 out = PyList_New(0);
1024 if (!out)
1025 return NULL;
1026
1027 if (!self->extra)
1028 return out;
1029
1030 for (i = 0; i < self->extra->length; i++) {
1031 PyObject* item = self->extra->children[i];
1032 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001033 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001034 if (PyList_Append(out, item) < 0) {
1035 Py_DECREF(out);
1036 return NULL;
1037 }
1038 }
1039 }
1040
1041 return out;
1042}
1043
1044static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001045element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001046{
1047 PyObject* tag;
1048 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001049 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001050 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001051
Eli Bendersky737b1732012-05-29 06:02:56 +03001052 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1053 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001054 return NULL;
1055
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001056 return _PyObject_CallMethodId(
1057 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001058 );
1059}
1060
1061static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062element_get(ElementObject* self, PyObject* args)
1063{
1064 PyObject* value;
1065
1066 PyObject* key;
1067 PyObject* default_value = Py_None;
1068 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1069 return NULL;
1070
1071 if (!self->extra || self->extra->attrib == Py_None)
1072 value = default_value;
1073 else {
1074 value = PyDict_GetItem(self->extra->attrib, key);
1075 if (!value)
1076 value = default_value;
1077 }
1078
1079 Py_INCREF(value);
1080 return value;
1081}
1082
1083static PyObject*
1084element_getchildren(ElementObject* self, PyObject* args)
1085{
1086 int i;
1087 PyObject* list;
1088
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001089 /* FIXME: report as deprecated? */
1090
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091 if (!PyArg_ParseTuple(args, ":getchildren"))
1092 return NULL;
1093
1094 if (!self->extra)
1095 return PyList_New(0);
1096
1097 list = PyList_New(self->extra->length);
1098 if (!list)
1099 return NULL;
1100
1101 for (i = 0; i < self->extra->length; i++) {
1102 PyObject* item = self->extra->children[i];
1103 Py_INCREF(item);
1104 PyList_SET_ITEM(list, i, item);
1105 }
1106
1107 return list;
1108}
1109
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001110
Eli Bendersky64d11e62012-06-15 07:42:50 +03001111static PyObject *
1112create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1113
1114
1115static PyObject *
1116element_iter(ElementObject *self, PyObject *args)
1117{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001118 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001120 return NULL;
1121
Eli Bendersky64d11e62012-06-15 07:42:50 +03001122 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123}
1124
1125
1126static PyObject*
1127element_itertext(ElementObject* self, PyObject* args)
1128{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001129 if (!PyArg_ParseTuple(args, ":itertext"))
1130 return NULL;
1131
Eli Bendersky64d11e62012-06-15 07:42:50 +03001132 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133}
1134
Eli Bendersky64d11e62012-06-15 07:42:50 +03001135
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001137element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001139 ElementObject* self = (ElementObject*) self_;
1140
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141 if (!self->extra || index < 0 || index >= self->extra->length) {
1142 PyErr_SetString(
1143 PyExc_IndexError,
1144 "child index out of range"
1145 );
1146 return NULL;
1147 }
1148
1149 Py_INCREF(self->extra->children[index]);
1150 return self->extra->children[index];
1151}
1152
1153static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154element_insert(ElementObject* self, PyObject* args)
1155{
1156 int i;
1157
1158 int index;
1159 PyObject* element;
1160 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1161 &Element_Type, &element))
1162 return NULL;
1163
1164 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001165 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001167 if (index < 0) {
1168 index += self->extra->length;
1169 if (index < 0)
1170 index = 0;
1171 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 if (index > self->extra->length)
1173 index = self->extra->length;
1174
1175 if (element_resize(self, 1) < 0)
1176 return NULL;
1177
1178 for (i = self->extra->length; i > index; i--)
1179 self->extra->children[i] = self->extra->children[i-1];
1180
1181 Py_INCREF(element);
1182 self->extra->children[index] = element;
1183
1184 self->extra->length++;
1185
1186 Py_RETURN_NONE;
1187}
1188
1189static PyObject*
1190element_items(ElementObject* self, PyObject* args)
1191{
1192 if (!PyArg_ParseTuple(args, ":items"))
1193 return NULL;
1194
1195 if (!self->extra || self->extra->attrib == Py_None)
1196 return PyList_New(0);
1197
1198 return PyDict_Items(self->extra->attrib);
1199}
1200
1201static PyObject*
1202element_keys(ElementObject* self, PyObject* args)
1203{
1204 if (!PyArg_ParseTuple(args, ":keys"))
1205 return NULL;
1206
1207 if (!self->extra || self->extra->attrib == Py_None)
1208 return PyList_New(0);
1209
1210 return PyDict_Keys(self->extra->attrib);
1211}
1212
Martin v. Löwis18e16552006-02-15 17:27:45 +00001213static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214element_length(ElementObject* self)
1215{
1216 if (!self->extra)
1217 return 0;
1218
1219 return self->extra->length;
1220}
1221
1222static PyObject*
1223element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1224{
1225 PyObject* elem;
1226
1227 PyObject* tag;
1228 PyObject* attrib;
1229 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1230 return NULL;
1231
1232 attrib = PyDict_Copy(attrib);
1233 if (!attrib)
1234 return NULL;
1235
Eli Bendersky092af1f2012-03-04 07:14:03 +02001236 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237
1238 Py_DECREF(attrib);
1239
1240 return elem;
1241}
1242
1243static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244element_remove(ElementObject* self, PyObject* args)
1245{
1246 int i;
1247
1248 PyObject* element;
1249 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1250 return NULL;
1251
1252 if (!self->extra) {
1253 /* element has no children, so raise exception */
1254 PyErr_SetString(
1255 PyExc_ValueError,
1256 "list.remove(x): x not in list"
1257 );
1258 return NULL;
1259 }
1260
1261 for (i = 0; i < self->extra->length; i++) {
1262 if (self->extra->children[i] == element)
1263 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001264 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001265 break;
1266 }
1267
1268 if (i == self->extra->length) {
1269 /* element is not in children, so raise exception */
1270 PyErr_SetString(
1271 PyExc_ValueError,
1272 "list.remove(x): x not in list"
1273 );
1274 return NULL;
1275 }
1276
1277 Py_DECREF(self->extra->children[i]);
1278
1279 self->extra->length--;
1280
1281 for (; i < self->extra->length; i++)
1282 self->extra->children[i] = self->extra->children[i+1];
1283
1284 Py_RETURN_NONE;
1285}
1286
1287static PyObject*
1288element_repr(ElementObject* self)
1289{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001290 if (self->tag)
1291 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1292 else
1293 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001294}
1295
1296static PyObject*
1297element_set(ElementObject* self, PyObject* args)
1298{
1299 PyObject* attrib;
1300
1301 PyObject* key;
1302 PyObject* value;
1303 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1304 return NULL;
1305
1306 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001307 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001308
1309 attrib = element_get_attrib(self);
1310 if (!attrib)
1311 return NULL;
1312
1313 if (PyDict_SetItem(attrib, key, value) < 0)
1314 return NULL;
1315
1316 Py_RETURN_NONE;
1317}
1318
1319static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001320element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001321{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001322 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 int i;
1324 PyObject* old;
1325
1326 if (!self->extra || index < 0 || index >= self->extra->length) {
1327 PyErr_SetString(
1328 PyExc_IndexError,
1329 "child assignment index out of range");
1330 return -1;
1331 }
1332
1333 old = self->extra->children[index];
1334
1335 if (item) {
1336 Py_INCREF(item);
1337 self->extra->children[index] = item;
1338 } else {
1339 self->extra->length--;
1340 for (i = index; i < self->extra->length; i++)
1341 self->extra->children[i] = self->extra->children[i+1];
1342 }
1343
1344 Py_DECREF(old);
1345
1346 return 0;
1347}
1348
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001349static PyObject*
1350element_subscr(PyObject* self_, PyObject* item)
1351{
1352 ElementObject* self = (ElementObject*) self_;
1353
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001354 if (PyIndex_Check(item)) {
1355 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001356
1357 if (i == -1 && PyErr_Occurred()) {
1358 return NULL;
1359 }
1360 if (i < 0 && self->extra)
1361 i += self->extra->length;
1362 return element_getitem(self_, i);
1363 }
1364 else if (PySlice_Check(item)) {
1365 Py_ssize_t start, stop, step, slicelen, cur, i;
1366 PyObject* list;
1367
1368 if (!self->extra)
1369 return PyList_New(0);
1370
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001371 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001372 self->extra->length,
1373 &start, &stop, &step, &slicelen) < 0) {
1374 return NULL;
1375 }
1376
1377 if (slicelen <= 0)
1378 return PyList_New(0);
1379 else {
1380 list = PyList_New(slicelen);
1381 if (!list)
1382 return NULL;
1383
1384 for (cur = start, i = 0; i < slicelen;
1385 cur += step, i++) {
1386 PyObject* item = self->extra->children[cur];
1387 Py_INCREF(item);
1388 PyList_SET_ITEM(list, i, item);
1389 }
1390
1391 return list;
1392 }
1393 }
1394 else {
1395 PyErr_SetString(PyExc_TypeError,
1396 "element indices must be integers");
1397 return NULL;
1398 }
1399}
1400
1401static int
1402element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1403{
1404 ElementObject* self = (ElementObject*) self_;
1405
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001406 if (PyIndex_Check(item)) {
1407 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001408
1409 if (i == -1 && PyErr_Occurred()) {
1410 return -1;
1411 }
1412 if (i < 0 && self->extra)
1413 i += self->extra->length;
1414 return element_setitem(self_, i, value);
1415 }
1416 else if (PySlice_Check(item)) {
1417 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1418
1419 PyObject* recycle = NULL;
1420 PyObject* seq = NULL;
1421
1422 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001423 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001424
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001425 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001426 self->extra->length,
1427 &start, &stop, &step, &slicelen) < 0) {
1428 return -1;
1429 }
1430
Eli Bendersky865756a2012-03-09 13:38:15 +02001431 if (value == NULL) {
1432 /* Delete slice */
1433 size_t cur;
1434 Py_ssize_t i;
1435
1436 if (slicelen <= 0)
1437 return 0;
1438
1439 /* Since we're deleting, the direction of the range doesn't matter,
1440 * so for simplicity make it always ascending.
1441 */
1442 if (step < 0) {
1443 stop = start + 1;
1444 start = stop + step * (slicelen - 1) - 1;
1445 step = -step;
1446 }
1447
1448 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1449
1450 /* recycle is a list that will contain all the children
1451 * scheduled for removal.
1452 */
1453 if (!(recycle = PyList_New(slicelen))) {
1454 PyErr_NoMemory();
1455 return -1;
1456 }
1457
1458 /* This loop walks over all the children that have to be deleted,
1459 * with cur pointing at them. num_moved is the amount of children
1460 * until the next deleted child that have to be "shifted down" to
1461 * occupy the deleted's places.
1462 * Note that in the ith iteration, shifting is done i+i places down
1463 * because i children were already removed.
1464 */
1465 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1466 /* Compute how many children have to be moved, clipping at the
1467 * list end.
1468 */
1469 Py_ssize_t num_moved = step - 1;
1470 if (cur + step >= (size_t)self->extra->length) {
1471 num_moved = self->extra->length - cur - 1;
1472 }
1473
1474 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1475
1476 memmove(
1477 self->extra->children + cur - i,
1478 self->extra->children + cur + 1,
1479 num_moved * sizeof(PyObject *));
1480 }
1481
1482 /* Leftover "tail" after the last removed child */
1483 cur = start + (size_t)slicelen * step;
1484 if (cur < (size_t)self->extra->length) {
1485 memmove(
1486 self->extra->children + cur - slicelen,
1487 self->extra->children + cur,
1488 (self->extra->length - cur) * sizeof(PyObject *));
1489 }
1490
1491 self->extra->length -= slicelen;
1492
1493 /* Discard the recycle list with all the deleted sub-elements */
1494 Py_XDECREF(recycle);
1495 return 0;
1496 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001497 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001498 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001499 seq = PySequence_Fast(value, "");
1500 if (!seq) {
1501 PyErr_Format(
1502 PyExc_TypeError,
1503 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1504 );
1505 return -1;
1506 }
1507 newlen = PySequence_Size(seq);
1508 }
1509
1510 if (step != 1 && newlen != slicelen)
1511 {
1512 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001513 "attempt to assign sequence of size %zd "
1514 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001515 newlen, slicelen
1516 );
1517 return -1;
1518 }
1519
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001520 /* Resize before creating the recycle bin, to prevent refleaks. */
1521 if (newlen > slicelen) {
1522 if (element_resize(self, newlen - slicelen) < 0) {
1523 if (seq) {
1524 Py_DECREF(seq);
1525 }
1526 return -1;
1527 }
1528 }
1529
1530 if (slicelen > 0) {
1531 /* to avoid recursive calls to this method (via decref), move
1532 old items to the recycle bin here, and get rid of them when
1533 we're done modifying the element */
1534 recycle = PyList_New(slicelen);
1535 if (!recycle) {
1536 if (seq) {
1537 Py_DECREF(seq);
1538 }
1539 return -1;
1540 }
1541 for (cur = start, i = 0; i < slicelen;
1542 cur += step, i++)
1543 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1544 }
1545
1546 if (newlen < slicelen) {
1547 /* delete slice */
1548 for (i = stop; i < self->extra->length; i++)
1549 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1550 } else if (newlen > slicelen) {
1551 /* insert slice */
1552 for (i = self->extra->length-1; i >= stop; i--)
1553 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1554 }
1555
1556 /* replace the slice */
1557 for (cur = start, i = 0; i < newlen;
1558 cur += step, i++) {
1559 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1560 Py_INCREF(element);
1561 self->extra->children[cur] = element;
1562 }
1563
1564 self->extra->length += newlen - slicelen;
1565
1566 if (seq) {
1567 Py_DECREF(seq);
1568 }
1569
1570 /* discard the recycle bin, and everything in it */
1571 Py_XDECREF(recycle);
1572
1573 return 0;
1574 }
1575 else {
1576 PyErr_SetString(PyExc_TypeError,
1577 "element indices must be integers");
1578 return -1;
1579 }
1580}
1581
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582static PyMethodDef element_methods[] = {
1583
Eli Bendersky0192ba32012-03-30 16:38:33 +03001584 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585
1586 {"get", (PyCFunction) element_get, METH_VARARGS},
1587 {"set", (PyCFunction) element_set, METH_VARARGS},
1588
Eli Bendersky737b1732012-05-29 06:02:56 +03001589 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1590 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1591 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592
1593 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001594 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1596 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1597
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001598 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1599 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001600 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001601
1602 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1604
1605 {"items", (PyCFunction) element_items, METH_VARARGS},
1606 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1607
1608 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1609
1610 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1611 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1612
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 {NULL, NULL}
1614};
1615
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001616static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001617element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001618{
1619 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001620 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001621
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001622 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001623 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001624
Alexander Belopolskye239d232010-12-08 23:31:48 +00001625 if (name == NULL)
1626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001628 /* handle common attributes first */
1629 if (strcmp(name, "tag") == 0) {
1630 res = self->tag;
1631 Py_INCREF(res);
1632 return res;
1633 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001635 Py_INCREF(res);
1636 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637 }
1638
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001639 /* methods */
1640 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1641 if (res)
1642 return res;
1643
1644 /* less common attributes */
1645 if (strcmp(name, "tail") == 0) {
1646 PyErr_Clear();
1647 res = element_get_tail(self);
1648 } else if (strcmp(name, "attrib") == 0) {
1649 PyErr_Clear();
1650 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001651 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001652 res = element_get_attrib(self);
1653 }
1654
1655 if (!res)
1656 return NULL;
1657
1658 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001659 return res;
1660}
1661
Eli Benderskyb20df952012-05-20 06:33:29 +03001662static PyObject*
1663element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664{
Eli Benderskyb20df952012-05-20 06:33:29 +03001665 char *name = "";
1666 if (PyUnicode_Check(nameobj))
1667 name = _PyUnicode_AsString(nameobj);
1668
1669 if (name == NULL)
1670 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671
1672 if (strcmp(name, "tag") == 0) {
1673 Py_DECREF(self->tag);
1674 self->tag = value;
1675 Py_INCREF(self->tag);
1676 } else if (strcmp(name, "text") == 0) {
1677 Py_DECREF(JOIN_OBJ(self->text));
1678 self->text = value;
1679 Py_INCREF(self->text);
1680 } else if (strcmp(name, "tail") == 0) {
1681 Py_DECREF(JOIN_OBJ(self->tail));
1682 self->tail = value;
1683 Py_INCREF(self->tail);
1684 } else if (strcmp(name, "attrib") == 0) {
1685 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001686 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001687 Py_DECREF(self->extra->attrib);
1688 self->extra->attrib = value;
1689 Py_INCREF(self->extra->attrib);
1690 } else {
1691 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001692 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 }
1694
Eli Benderskyb20df952012-05-20 06:33:29 +03001695 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001696}
1697
1698static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001699 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001700 0, /* sq_concat */
1701 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001702 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001703 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001704 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001705 0,
1706};
1707
1708static PyMappingMethods element_as_mapping = {
1709 (lenfunc) element_length,
1710 (binaryfunc) element_subscr,
1711 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001712};
1713
Neal Norwitz227b5332006-03-22 09:28:35 +00001714static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001715 PyVarObject_HEAD_INIT(NULL, 0)
1716 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001717 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001718 (destructor)element_dealloc, /* tp_dealloc */
1719 0, /* tp_print */
1720 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001721 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001722 0, /* tp_reserved */
1723 (reprfunc)element_repr, /* tp_repr */
1724 0, /* tp_as_number */
1725 &element_as_sequence, /* tp_as_sequence */
1726 &element_as_mapping, /* tp_as_mapping */
1727 0, /* tp_hash */
1728 0, /* tp_call */
1729 0, /* tp_str */
1730 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001731 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001732 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001733 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1734 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001735 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001736 (traverseproc)element_gc_traverse, /* tp_traverse */
1737 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001738 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001739 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001740 0, /* tp_iter */
1741 0, /* tp_iternext */
1742 element_methods, /* tp_methods */
1743 0, /* tp_members */
1744 0, /* tp_getset */
1745 0, /* tp_base */
1746 0, /* tp_dict */
1747 0, /* tp_descr_get */
1748 0, /* tp_descr_set */
1749 0, /* tp_dictoffset */
1750 (initproc)element_init, /* tp_init */
1751 PyType_GenericAlloc, /* tp_alloc */
1752 element_new, /* tp_new */
1753 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754};
1755
Eli Bendersky64d11e62012-06-15 07:42:50 +03001756/******************************* Element iterator ****************************/
1757
1758/* ElementIterObject represents the iteration state over an XML element in
1759 * pre-order traversal. To keep track of which sub-element should be returned
1760 * next, a stack of parents is maintained. This is a standard stack-based
1761 * iterative pre-order traversal of a tree.
1762 * The stack is managed using a single-linked list starting at parent_stack.
1763 * Each stack node contains the saved parent to which we should return after
1764 * the current one is exhausted, and the next child to examine in that parent.
1765 */
1766typedef struct ParentLocator_t {
1767 ElementObject *parent;
1768 Py_ssize_t child_index;
1769 struct ParentLocator_t *next;
1770} ParentLocator;
1771
1772typedef struct {
1773 PyObject_HEAD
1774 ParentLocator *parent_stack;
1775 ElementObject *root_element;
1776 PyObject *sought_tag;
1777 int root_done;
1778 int gettext;
1779} ElementIterObject;
1780
1781
1782static void
1783elementiter_dealloc(ElementIterObject *it)
1784{
1785 ParentLocator *p = it->parent_stack;
1786 while (p) {
1787 ParentLocator *temp = p;
1788 Py_XDECREF(p->parent);
1789 p = p->next;
1790 PyObject_Free(temp);
1791 }
1792
1793 Py_XDECREF(it->sought_tag);
1794 Py_XDECREF(it->root_element);
1795
1796 PyObject_GC_UnTrack(it);
1797 PyObject_GC_Del(it);
1798}
1799
1800static int
1801elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1802{
1803 ParentLocator *p = it->parent_stack;
1804 while (p) {
1805 Py_VISIT(p->parent);
1806 p = p->next;
1807 }
1808
1809 Py_VISIT(it->root_element);
1810 Py_VISIT(it->sought_tag);
1811 return 0;
1812}
1813
1814/* Helper function for elementiter_next. Add a new parent to the parent stack.
1815 */
1816static ParentLocator *
1817parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1818{
1819 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1820 if (new_node) {
1821 new_node->parent = parent;
1822 Py_INCREF(parent);
1823 new_node->child_index = 0;
1824 new_node->next = stack;
1825 }
1826 return new_node;
1827}
1828
1829static PyObject *
1830elementiter_next(ElementIterObject *it)
1831{
1832 /* Sub-element iterator.
1833 *
1834 * A short note on gettext: this function serves both the iter() and
1835 * itertext() methods to avoid code duplication. However, there are a few
1836 * small differences in the way these iterations work. Namely:
1837 * - itertext() only yields text from nodes that have it, and continues
1838 * iterating when a node doesn't have text (so it doesn't return any
1839 * node like iter())
1840 * - itertext() also has to handle tail, after finishing with all the
1841 * children of a node.
1842 */
1843
1844 while (1) {
1845 /* Handle the case reached in the beginning and end of iteration, where
1846 * the parent stack is empty. The root_done flag gives us indication
1847 * whether we've just started iterating (so root_done is 0), in which
1848 * case the root is returned. If root_done is 1 and we're here, the
1849 * iterator is exhausted.
1850 */
1851 if (!it->parent_stack->parent) {
1852 if (it->root_done) {
1853 PyErr_SetNone(PyExc_StopIteration);
1854 return NULL;
1855 } else {
1856 it->parent_stack = parent_stack_push_new(it->parent_stack,
1857 it->root_element);
1858 if (!it->parent_stack) {
1859 PyErr_NoMemory();
1860 return NULL;
1861 }
1862
1863 it->root_done = 1;
1864 if (it->sought_tag == Py_None ||
1865 PyObject_RichCompareBool(it->root_element->tag,
1866 it->sought_tag, Py_EQ) == 1) {
1867 if (it->gettext) {
1868 PyObject *text = JOIN_OBJ(it->root_element->text);
1869 if (PyObject_IsTrue(text)) {
1870 Py_INCREF(text);
1871 return text;
1872 }
1873 } else {
1874 Py_INCREF(it->root_element);
1875 return (PyObject *)it->root_element;
1876 }
1877 }
1878 }
1879 }
1880
1881 /* See if there are children left to traverse in the current parent. If
1882 * yes, visit the next child. If not, pop the stack and try again.
1883 */
1884 ElementObject *cur_parent = it->parent_stack->parent;
1885 Py_ssize_t child_index = it->parent_stack->child_index;
1886 if (cur_parent->extra && child_index < cur_parent->extra->length) {
1887 ElementObject *child = (ElementObject *)
1888 cur_parent->extra->children[child_index];
1889 it->parent_stack->child_index++;
1890 it->parent_stack = parent_stack_push_new(it->parent_stack,
1891 child);
1892 if (!it->parent_stack) {
1893 PyErr_NoMemory();
1894 return NULL;
1895 }
1896
1897 if (it->gettext) {
1898 PyObject *text = JOIN_OBJ(child->text);
1899 if (PyObject_IsTrue(text)) {
1900 Py_INCREF(text);
1901 return text;
1902 }
1903 } else if (it->sought_tag == Py_None ||
1904 PyObject_RichCompareBool(child->tag,
1905 it->sought_tag, Py_EQ) == 1) {
1906 Py_INCREF(child);
1907 return (PyObject *)child;
1908 }
1909 else
1910 continue;
1911 }
1912 else {
1913 PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
1914 ParentLocator *next = it->parent_stack->next;
1915 Py_XDECREF(it->parent_stack->parent);
1916 PyObject_Free(it->parent_stack);
1917 it->parent_stack = next;
1918
1919 /* Note that extra condition on it->parent_stack->parent here;
1920 * this is because itertext() is supposed to only return *inner*
1921 * text, not text following the element it began iteration with.
1922 */
1923 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
1924 Py_INCREF(tail);
1925 return tail;
1926 }
1927 }
1928 }
1929
1930 return NULL;
1931}
1932
1933
1934static PyTypeObject ElementIter_Type = {
1935 PyVarObject_HEAD_INIT(NULL, 0)
1936 "_elementtree._element_iterator", /* tp_name */
1937 sizeof(ElementIterObject), /* tp_basicsize */
1938 0, /* tp_itemsize */
1939 /* methods */
1940 (destructor)elementiter_dealloc, /* tp_dealloc */
1941 0, /* tp_print */
1942 0, /* tp_getattr */
1943 0, /* tp_setattr */
1944 0, /* tp_reserved */
1945 0, /* tp_repr */
1946 0, /* tp_as_number */
1947 0, /* tp_as_sequence */
1948 0, /* tp_as_mapping */
1949 0, /* tp_hash */
1950 0, /* tp_call */
1951 0, /* tp_str */
1952 0, /* tp_getattro */
1953 0, /* tp_setattro */
1954 0, /* tp_as_buffer */
1955 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1956 0, /* tp_doc */
1957 (traverseproc)elementiter_traverse, /* tp_traverse */
1958 0, /* tp_clear */
1959 0, /* tp_richcompare */
1960 0, /* tp_weaklistoffset */
1961 PyObject_SelfIter, /* tp_iter */
1962 (iternextfunc)elementiter_next, /* tp_iternext */
1963 0, /* tp_methods */
1964 0, /* tp_members */
1965 0, /* tp_getset */
1966 0, /* tp_base */
1967 0, /* tp_dict */
1968 0, /* tp_descr_get */
1969 0, /* tp_descr_set */
1970 0, /* tp_dictoffset */
1971 0, /* tp_init */
1972 0, /* tp_alloc */
1973 0, /* tp_new */
1974};
1975
1976
1977static PyObject *
1978create_elementiter(ElementObject *self, PyObject *tag, int gettext)
1979{
1980 ElementIterObject *it;
1981 PyObject *star = NULL;
1982
1983 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
1984 if (!it)
1985 return NULL;
1986 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
1987 PyObject_GC_Del(it);
1988 return NULL;
1989 }
1990
1991 it->parent_stack->parent = NULL;
1992 it->parent_stack->child_index = 0;
1993 it->parent_stack->next = NULL;
1994
1995 if (PyUnicode_Check(tag))
1996 star = PyUnicode_FromString("*");
1997 else if (PyBytes_Check(tag))
1998 star = PyBytes_FromString("*");
1999
2000 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2001 tag = Py_None;
2002
2003 Py_XDECREF(star);
2004 it->sought_tag = tag;
2005 it->root_done = 0;
2006 it->gettext = gettext;
2007 it->root_element = self;
2008
2009 Py_INCREF(self);
2010 Py_INCREF(tag);
2011
2012 PyObject_GC_Track(it);
2013 return (PyObject *)it;
2014}
2015
2016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002017/* ==================================================================== */
2018/* the tree builder type */
2019
2020typedef struct {
2021 PyObject_HEAD
2022
Eli Bendersky58d548d2012-05-29 15:45:16 +03002023 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002024
Eli Bendersky58d548d2012-05-29 15:45:16 +03002025 ElementObject *this; /* current node */
2026 ElementObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002027
Eli Bendersky58d548d2012-05-29 15:45:16 +03002028 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002029
Eli Bendersky58d548d2012-05-29 15:45:16 +03002030 PyObject *stack; /* element stack */
2031 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002032
Eli Bendersky48d358b2012-05-30 17:57:50 +03002033 PyObject *element_factory;
2034
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002035 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002036 PyObject *events; /* list of events, or NULL if not collecting */
2037 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2038 PyObject *end_event_obj;
2039 PyObject *start_ns_event_obj;
2040 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002041} TreeBuilderObject;
2042
Neal Norwitz227b5332006-03-22 09:28:35 +00002043static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002044
Christian Heimes90aa7642007-12-19 02:45:37 +00002045#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002046
2047/* -------------------------------------------------------------------- */
2048/* constructor and destructor */
2049
Eli Bendersky58d548d2012-05-29 15:45:16 +03002050static PyObject *
2051treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002052{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002053 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2054 if (t != NULL) {
2055 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002056
Eli Bendersky58d548d2012-05-29 15:45:16 +03002057 Py_INCREF(Py_None);
2058 t->this = (ElementObject *)Py_None;
2059 Py_INCREF(Py_None);
2060 t->last = (ElementObject *)Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002061
Eli Bendersky58d548d2012-05-29 15:45:16 +03002062 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002063 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002064 t->stack = PyList_New(20);
2065 if (!t->stack) {
2066 Py_DECREF(t->this);
2067 Py_DECREF(t->last);
2068 return NULL;
2069 }
2070 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002071
Eli Bendersky58d548d2012-05-29 15:45:16 +03002072 t->events = NULL;
2073 t->start_event_obj = t->end_event_obj = NULL;
2074 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2075 }
2076 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002077}
2078
Eli Bendersky58d548d2012-05-29 15:45:16 +03002079static int
2080treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002081{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002082 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002083 PyObject *element_factory = NULL;
2084 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
2085
2086 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2087 &element_factory)) {
2088 return -1;
2089 }
2090
2091 if (element_factory) {
2092 Py_INCREF(element_factory);
2093 Py_XDECREF(self_tb->element_factory);
2094 self_tb->element_factory = element_factory;
2095 }
2096
Eli Bendersky58d548d2012-05-29 15:45:16 +03002097 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002098}
2099
Eli Bendersky48d358b2012-05-30 17:57:50 +03002100static int
2101treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2102{
2103 Py_VISIT(self->root);
2104 Py_VISIT(self->this);
2105 Py_VISIT(self->last);
2106 Py_VISIT(self->data);
2107 Py_VISIT(self->stack);
2108 Py_VISIT(self->element_factory);
2109 return 0;
2110}
2111
2112static int
2113treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002114{
2115 Py_XDECREF(self->end_ns_event_obj);
2116 Py_XDECREF(self->start_ns_event_obj);
2117 Py_XDECREF(self->end_event_obj);
2118 Py_XDECREF(self->start_event_obj);
2119 Py_XDECREF(self->events);
2120 Py_DECREF(self->stack);
2121 Py_XDECREF(self->data);
2122 Py_DECREF(self->last);
2123 Py_DECREF(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002124 Py_CLEAR(self->element_factory);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002125 Py_XDECREF(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002126 return 0;
2127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002128
Eli Bendersky48d358b2012-05-30 17:57:50 +03002129static void
2130treebuilder_dealloc(TreeBuilderObject *self)
2131{
2132 PyObject_GC_UnTrack(self);
2133 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002134 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002135}
2136
2137/* -------------------------------------------------------------------- */
2138/* handlers */
2139
2140LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002141treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2142 PyObject* attrib)
2143{
2144 PyObject* node;
2145 PyObject* this;
2146
2147 if (self->data) {
2148 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002149 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002150 self->last->text = JOIN_SET(
2151 self->data, PyList_CheckExact(self->data)
2152 );
2153 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002154 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002155 self->last->tail = JOIN_SET(
2156 self->data, PyList_CheckExact(self->data)
2157 );
2158 }
2159 self->data = NULL;
2160 }
2161
Eli Bendersky48d358b2012-05-30 17:57:50 +03002162 if (self->element_factory) {
2163 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2164 } else {
2165 node = create_new_element(tag, attrib);
2166 }
2167 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002168 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002169 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002170
2171 this = (PyObject*) self->this;
2172
2173 if (this != Py_None) {
2174 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002175 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002176 } else {
2177 if (self->root) {
2178 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002179 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002180 "multiple elements on top level"
2181 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002182 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002183 }
2184 Py_INCREF(node);
2185 self->root = node;
2186 }
2187
2188 if (self->index < PyList_GET_SIZE(self->stack)) {
2189 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002190 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002191 Py_INCREF(this);
2192 } else {
2193 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002194 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002195 }
2196 self->index++;
2197
2198 Py_DECREF(this);
2199 Py_INCREF(node);
2200 self->this = (ElementObject*) node;
2201
2202 Py_DECREF(self->last);
2203 Py_INCREF(node);
2204 self->last = (ElementObject*) node;
2205
2206 if (self->start_event_obj) {
2207 PyObject* res;
2208 PyObject* action = self->start_event_obj;
2209 res = PyTuple_New(2);
2210 if (res) {
2211 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2212 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2213 PyList_Append(self->events, res);
2214 Py_DECREF(res);
2215 } else
2216 PyErr_Clear(); /* FIXME: propagate error */
2217 }
2218
2219 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002220
2221 error:
2222 Py_DECREF(node);
2223 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002224}
2225
2226LOCAL(PyObject*)
2227treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2228{
2229 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002230 if (self->last == (ElementObject*) Py_None) {
2231 /* ignore calls to data before the first call to start */
2232 Py_RETURN_NONE;
2233 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002234 /* store the first item as is */
2235 Py_INCREF(data); self->data = data;
2236 } else {
2237 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002238 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2239 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002240 /* expat often generates single character data sections; handle
2241 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002242 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2243 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002244 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002245 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246 } else if (PyList_CheckExact(self->data)) {
2247 if (PyList_Append(self->data, data) < 0)
2248 return NULL;
2249 } else {
2250 PyObject* list = PyList_New(2);
2251 if (!list)
2252 return NULL;
2253 PyList_SET_ITEM(list, 0, self->data);
2254 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2255 self->data = list;
2256 }
2257 }
2258
2259 Py_RETURN_NONE;
2260}
2261
2262LOCAL(PyObject*)
2263treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2264{
2265 PyObject* item;
2266
2267 if (self->data) {
2268 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002269 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002270 self->last->text = JOIN_SET(
2271 self->data, PyList_CheckExact(self->data)
2272 );
2273 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002274 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002275 self->last->tail = JOIN_SET(
2276 self->data, PyList_CheckExact(self->data)
2277 );
2278 }
2279 self->data = NULL;
2280 }
2281
2282 if (self->index == 0) {
2283 PyErr_SetString(
2284 PyExc_IndexError,
2285 "pop from empty stack"
2286 );
2287 return NULL;
2288 }
2289
2290 self->index--;
2291
2292 item = PyList_GET_ITEM(self->stack, self->index);
2293 Py_INCREF(item);
2294
2295 Py_DECREF(self->last);
2296
2297 self->last = (ElementObject*) self->this;
2298 self->this = (ElementObject*) item;
2299
2300 if (self->end_event_obj) {
2301 PyObject* res;
2302 PyObject* action = self->end_event_obj;
2303 PyObject* node = (PyObject*) self->last;
2304 res = PyTuple_New(2);
2305 if (res) {
2306 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2307 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2308 PyList_Append(self->events, res);
2309 Py_DECREF(res);
2310 } else
2311 PyErr_Clear(); /* FIXME: propagate error */
2312 }
2313
2314 Py_INCREF(self->last);
2315 return (PyObject*) self->last;
2316}
2317
2318LOCAL(void)
2319treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002320 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321{
2322 PyObject* res;
2323 PyObject* action;
2324 PyObject* parcel;
2325
2326 if (!self->events)
2327 return;
2328
2329 if (start) {
2330 if (!self->start_ns_event_obj)
2331 return;
2332 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002333 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334 if (!parcel)
2335 return;
2336 Py_INCREF(action);
2337 } else {
2338 if (!self->end_ns_event_obj)
2339 return;
2340 action = self->end_ns_event_obj;
2341 Py_INCREF(action);
2342 parcel = Py_None;
2343 Py_INCREF(parcel);
2344 }
2345
2346 res = PyTuple_New(2);
2347
2348 if (res) {
2349 PyTuple_SET_ITEM(res, 0, action);
2350 PyTuple_SET_ITEM(res, 1, parcel);
2351 PyList_Append(self->events, res);
2352 Py_DECREF(res);
2353 } else
2354 PyErr_Clear(); /* FIXME: propagate error */
2355}
2356
2357/* -------------------------------------------------------------------- */
2358/* methods (in alphabetical order) */
2359
2360static PyObject*
2361treebuilder_data(TreeBuilderObject* self, PyObject* args)
2362{
2363 PyObject* data;
2364 if (!PyArg_ParseTuple(args, "O:data", &data))
2365 return NULL;
2366
2367 return treebuilder_handle_data(self, data);
2368}
2369
2370static PyObject*
2371treebuilder_end(TreeBuilderObject* self, PyObject* args)
2372{
2373 PyObject* tag;
2374 if (!PyArg_ParseTuple(args, "O:end", &tag))
2375 return NULL;
2376
2377 return treebuilder_handle_end(self, tag);
2378}
2379
2380LOCAL(PyObject*)
2381treebuilder_done(TreeBuilderObject* self)
2382{
2383 PyObject* res;
2384
2385 /* FIXME: check stack size? */
2386
2387 if (self->root)
2388 res = self->root;
2389 else
2390 res = Py_None;
2391
2392 Py_INCREF(res);
2393 return res;
2394}
2395
2396static PyObject*
2397treebuilder_close(TreeBuilderObject* self, PyObject* args)
2398{
2399 if (!PyArg_ParseTuple(args, ":close"))
2400 return NULL;
2401
2402 return treebuilder_done(self);
2403}
2404
2405static PyObject*
2406treebuilder_start(TreeBuilderObject* self, PyObject* args)
2407{
2408 PyObject* tag;
2409 PyObject* attrib = Py_None;
2410 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2411 return NULL;
2412
2413 return treebuilder_handle_start(self, tag, attrib);
2414}
2415
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002416static PyMethodDef treebuilder_methods[] = {
2417 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2418 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2419 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002420 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2421 {NULL, NULL}
2422};
2423
Neal Norwitz227b5332006-03-22 09:28:35 +00002424static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002425 PyVarObject_HEAD_INIT(NULL, 0)
2426 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002428 (destructor)treebuilder_dealloc, /* tp_dealloc */
2429 0, /* tp_print */
2430 0, /* tp_getattr */
2431 0, /* tp_setattr */
2432 0, /* tp_reserved */
2433 0, /* tp_repr */
2434 0, /* tp_as_number */
2435 0, /* tp_as_sequence */
2436 0, /* tp_as_mapping */
2437 0, /* tp_hash */
2438 0, /* tp_call */
2439 0, /* tp_str */
2440 0, /* tp_getattro */
2441 0, /* tp_setattro */
2442 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002443 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2444 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002445 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002446 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2447 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002448 0, /* tp_richcompare */
2449 0, /* tp_weaklistoffset */
2450 0, /* tp_iter */
2451 0, /* tp_iternext */
2452 treebuilder_methods, /* tp_methods */
2453 0, /* tp_members */
2454 0, /* tp_getset */
2455 0, /* tp_base */
2456 0, /* tp_dict */
2457 0, /* tp_descr_get */
2458 0, /* tp_descr_set */
2459 0, /* tp_dictoffset */
2460 (initproc)treebuilder_init, /* tp_init */
2461 PyType_GenericAlloc, /* tp_alloc */
2462 treebuilder_new, /* tp_new */
2463 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002464};
2465
2466/* ==================================================================== */
2467/* the expat interface */
2468
2469#if defined(USE_EXPAT)
2470
2471#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002473static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475
Eli Bendersky52467b12012-06-01 07:13:08 +03002476static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2477 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2478
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479typedef struct {
2480 PyObject_HEAD
2481
2482 XML_Parser parser;
2483
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002484 PyObject *target;
2485 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002486
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002487 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002489 PyObject *handle_start;
2490 PyObject *handle_data;
2491 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002493 PyObject *handle_comment;
2494 PyObject *handle_pi;
2495 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002497 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002498
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499} XMLParserObject;
2500
Neal Norwitz227b5332006-03-22 09:28:35 +00002501static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002503#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2504
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505/* helpers */
2506
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507LOCAL(PyObject*)
2508makeuniversal(XMLParserObject* self, const char* string)
2509{
2510 /* convert a UTF-8 tag/attribute name from the expat parser
2511 to a universal name string */
2512
2513 int size = strlen(string);
2514 PyObject* key;
2515 PyObject* value;
2516
2517 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002518 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 if (!key)
2520 return NULL;
2521
2522 value = PyDict_GetItem(self->names, key);
2523
2524 if (value) {
2525 Py_INCREF(value);
2526 } else {
2527 /* new name. convert to universal name, and decode as
2528 necessary */
2529
2530 PyObject* tag;
2531 char* p;
2532 int i;
2533
2534 /* look for namespace separator */
2535 for (i = 0; i < size; i++)
2536 if (string[i] == '}')
2537 break;
2538 if (i != size) {
2539 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002540 tag = PyBytes_FromStringAndSize(NULL, size+1);
2541 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002542 p[0] = '{';
2543 memcpy(p+1, string, size);
2544 size++;
2545 } else {
2546 /* plain name; use key as tag */
2547 Py_INCREF(key);
2548 tag = key;
2549 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002550
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002552 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002553 value = PyUnicode_DecodeUTF8(p, size, "strict");
2554 Py_DECREF(tag);
2555 if (!value) {
2556 Py_DECREF(key);
2557 return NULL;
2558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559
2560 /* add to names dictionary */
2561 if (PyDict_SetItem(self->names, key, value) < 0) {
2562 Py_DECREF(key);
2563 Py_DECREF(value);
2564 return NULL;
2565 }
2566 }
2567
2568 Py_DECREF(key);
2569 return value;
2570}
2571
Eli Bendersky5b77d812012-03-16 08:20:05 +02002572/* Set the ParseError exception with the given parameters.
2573 * If message is not NULL, it's used as the error string. Otherwise, the
2574 * message string is the default for the given error_code.
2575*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002576static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002577expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002578{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002579 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002580
Victor Stinner499dfcf2011-03-21 13:26:24 +01002581 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002582 message ? message : EXPAT(ErrorString)(error_code),
2583 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002584 if (errmsg == NULL)
2585 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002586
Victor Stinner499dfcf2011-03-21 13:26:24 +01002587 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2588 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002589 if (!error)
2590 return;
2591
Eli Bendersky5b77d812012-03-16 08:20:05 +02002592 /* Add code and position attributes */
2593 code = PyLong_FromLong((long)error_code);
2594 if (!code) {
2595 Py_DECREF(error);
2596 return;
2597 }
2598 if (PyObject_SetAttrString(error, "code", code) == -1) {
2599 Py_DECREF(error);
2600 Py_DECREF(code);
2601 return;
2602 }
2603 Py_DECREF(code);
2604
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002605 position = Py_BuildValue("(ii)", line, column);
2606 if (!position) {
2607 Py_DECREF(error);
2608 return;
2609 }
2610 if (PyObject_SetAttrString(error, "position", position) == -1) {
2611 Py_DECREF(error);
2612 Py_DECREF(position);
2613 return;
2614 }
2615 Py_DECREF(position);
2616
2617 PyErr_SetObject(elementtree_parseerror_obj, error);
2618 Py_DECREF(error);
2619}
2620
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621/* -------------------------------------------------------------------- */
2622/* handlers */
2623
2624static void
2625expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2626 int data_len)
2627{
2628 PyObject* key;
2629 PyObject* value;
2630 PyObject* res;
2631
2632 if (data_len < 2 || data_in[0] != '&')
2633 return;
2634
Neal Norwitz0269b912007-08-08 06:56:02 +00002635 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636 if (!key)
2637 return;
2638
2639 value = PyDict_GetItem(self->entity, key);
2640
2641 if (value) {
2642 if (TreeBuilder_CheckExact(self->target))
2643 res = treebuilder_handle_data(
2644 (TreeBuilderObject*) self->target, value
2645 );
2646 else if (self->handle_data)
2647 res = PyObject_CallFunction(self->handle_data, "O", value);
2648 else
2649 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002651 } else if (!PyErr_Occurred()) {
2652 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002653 char message[128] = "undefined entity ";
2654 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002655 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002656 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002657 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002658 EXPAT(GetErrorColumnNumber)(self->parser),
2659 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002660 );
2661 }
2662
2663 Py_DECREF(key);
2664}
2665
2666static void
2667expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2668 const XML_Char **attrib_in)
2669{
2670 PyObject* res;
2671 PyObject* tag;
2672 PyObject* attrib;
2673 int ok;
2674
2675 /* tag name */
2676 tag = makeuniversal(self, tag_in);
2677 if (!tag)
2678 return; /* parser will look for errors */
2679
2680 /* attributes */
2681 if (attrib_in[0]) {
2682 attrib = PyDict_New();
2683 if (!attrib)
2684 return;
2685 while (attrib_in[0] && attrib_in[1]) {
2686 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002687 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688 if (!key || !value) {
2689 Py_XDECREF(value);
2690 Py_XDECREF(key);
2691 Py_DECREF(attrib);
2692 return;
2693 }
2694 ok = PyDict_SetItem(attrib, key, value);
2695 Py_DECREF(value);
2696 Py_DECREF(key);
2697 if (ok < 0) {
2698 Py_DECREF(attrib);
2699 return;
2700 }
2701 attrib_in += 2;
2702 }
2703 } else {
2704 Py_INCREF(Py_None);
2705 attrib = Py_None;
2706 }
2707
Eli Bendersky48d358b2012-05-30 17:57:50 +03002708 /* If we get None, pass an empty dictionary on */
2709 if (attrib == Py_None) {
2710 Py_DECREF(attrib);
2711 attrib = PyDict_New();
2712 if (!attrib)
2713 return;
2714 }
2715
2716 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717 /* shortcut */
2718 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2719 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002720 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002721 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002723 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724 res = NULL;
2725
2726 Py_DECREF(tag);
2727 Py_DECREF(attrib);
2728
2729 Py_XDECREF(res);
2730}
2731
2732static void
2733expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2734 int data_len)
2735{
2736 PyObject* data;
2737 PyObject* res;
2738
Neal Norwitz0269b912007-08-08 06:56:02 +00002739 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002740 if (!data)
2741 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
2743 if (TreeBuilder_CheckExact(self->target))
2744 /* shortcut */
2745 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2746 else if (self->handle_data)
2747 res = PyObject_CallFunction(self->handle_data, "O", data);
2748 else
2749 res = NULL;
2750
2751 Py_DECREF(data);
2752
2753 Py_XDECREF(res);
2754}
2755
2756static void
2757expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2758{
2759 PyObject* tag;
2760 PyObject* res = NULL;
2761
2762 if (TreeBuilder_CheckExact(self->target))
2763 /* shortcut */
2764 /* the standard tree builder doesn't look at the end tag */
2765 res = treebuilder_handle_end(
2766 (TreeBuilderObject*) self->target, Py_None
2767 );
2768 else if (self->handle_end) {
2769 tag = makeuniversal(self, tag_in);
2770 if (tag) {
2771 res = PyObject_CallFunction(self->handle_end, "O", tag);
2772 Py_DECREF(tag);
2773 }
2774 }
2775
2776 Py_XDECREF(res);
2777}
2778
2779static void
2780expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2781 const XML_Char *uri)
2782{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002783 PyObject* sprefix = NULL;
2784 PyObject* suri = NULL;
2785
2786 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2787 if (!suri)
2788 return;
2789
2790 if (prefix)
2791 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2792 else
2793 sprefix = PyUnicode_FromString("");
2794 if (!sprefix) {
2795 Py_DECREF(suri);
2796 return;
2797 }
2798
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002800 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002802
2803 Py_DECREF(sprefix);
2804 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805}
2806
2807static void
2808expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2809{
2810 treebuilder_handle_namespace(
2811 (TreeBuilderObject*) self->target, 0, NULL, NULL
2812 );
2813}
2814
2815static void
2816expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2817{
2818 PyObject* comment;
2819 PyObject* res;
2820
2821 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002822 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823 if (comment) {
2824 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2825 Py_XDECREF(res);
2826 Py_DECREF(comment);
2827 }
2828 }
2829}
2830
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002831static void
2832expat_start_doctype_handler(XMLParserObject *self,
2833 const XML_Char *doctype_name,
2834 const XML_Char *sysid,
2835 const XML_Char *pubid,
2836 int has_internal_subset)
2837{
2838 PyObject *self_pyobj = (PyObject *)self;
2839 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
2840 PyObject *parser_doctype = NULL;
2841 PyObject *res = NULL;
2842
2843 doctype_name_obj = makeuniversal(self, doctype_name);
2844 if (!doctype_name_obj)
2845 return;
2846
2847 if (sysid) {
2848 sysid_obj = makeuniversal(self, sysid);
2849 if (!sysid_obj) {
2850 Py_DECREF(doctype_name_obj);
2851 return;
2852 }
2853 } else {
2854 Py_INCREF(Py_None);
2855 sysid_obj = Py_None;
2856 }
2857
2858 if (pubid) {
2859 pubid_obj = makeuniversal(self, pubid);
2860 if (!pubid_obj) {
2861 Py_DECREF(doctype_name_obj);
2862 Py_DECREF(sysid_obj);
2863 return;
2864 }
2865 } else {
2866 Py_INCREF(Py_None);
2867 pubid_obj = Py_None;
2868 }
2869
2870 /* If the target has a handler for doctype, call it. */
2871 if (self->handle_doctype) {
2872 res = PyObject_CallFunction(self->handle_doctype, "OOO",
2873 doctype_name_obj, pubid_obj, sysid_obj);
2874 Py_CLEAR(res);
2875 }
2876
2877 /* Now see if the parser itself has a doctype method. If yes and it's
2878 * a subclass, call it but warn about deprecation. If it's not a subclass
2879 * (i.e. vanilla XMLParser), do nothing.
2880 */
2881 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
2882 if (parser_doctype) {
2883 if (!XMLParser_CheckExact(self_pyobj)) {
2884 if (PyErr_WarnEx(PyExc_DeprecationWarning,
2885 "This method of XMLParser is deprecated. Define"
2886 " doctype() method on the TreeBuilder target.",
2887 1) < 0) {
2888 goto clear;
2889 }
2890 res = PyObject_CallFunction(parser_doctype, "OOO",
2891 doctype_name_obj, pubid_obj, sysid_obj);
2892 Py_CLEAR(res);
2893 }
2894 }
2895
2896clear:
2897 Py_XDECREF(parser_doctype);
2898 Py_DECREF(doctype_name_obj);
2899 Py_DECREF(pubid_obj);
2900 Py_DECREF(sysid_obj);
2901}
2902
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002903static void
2904expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2905 const XML_Char* data_in)
2906{
2907 PyObject* target;
2908 PyObject* data;
2909 PyObject* res;
2910
2911 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002912 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2913 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914 if (target && data) {
2915 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2916 Py_XDECREF(res);
2917 Py_DECREF(data);
2918 Py_DECREF(target);
2919 } else {
2920 Py_XDECREF(data);
2921 Py_XDECREF(target);
2922 }
2923 }
2924}
2925
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002926static int
2927expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2928 XML_Encoding *info)
2929{
2930 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 unsigned char s[256];
2932 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002933 void *data;
2934 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935
2936 memset(info, 0, sizeof(XML_Encoding));
2937
2938 for (i = 0; i < 256; i++)
2939 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002940
Fredrik Lundhc3389992005-12-25 11:40:19 +00002941 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002942 if (!u)
2943 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002944 if (PyUnicode_READY(u))
2945 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002947 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948 Py_DECREF(u);
2949 return XML_STATUS_ERROR;
2950 }
2951
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002952 kind = PyUnicode_KIND(u);
2953 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002954 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002955 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2956 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2957 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002959 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 }
2961
2962 Py_DECREF(u);
2963
2964 return XML_STATUS_OK;
2965}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966
2967/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968
Eli Bendersky52467b12012-06-01 07:13:08 +03002969static PyObject *
2970xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971{
Eli Bendersky52467b12012-06-01 07:13:08 +03002972 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
2973 if (self) {
2974 self->parser = NULL;
2975 self->target = self->entity = self->names = NULL;
2976 self->handle_start = self->handle_data = self->handle_end = NULL;
2977 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002978 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002980 return (PyObject *)self;
2981}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982
Eli Bendersky52467b12012-06-01 07:13:08 +03002983static int
2984xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
2985{
2986 XMLParserObject *self_xp = (XMLParserObject *)self;
2987 PyObject *target = NULL, *html = NULL;
2988 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03002989 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990
Eli Bendersky52467b12012-06-01 07:13:08 +03002991 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
2992 &html, &target, &encoding)) {
2993 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002995
Eli Bendersky52467b12012-06-01 07:13:08 +03002996 self_xp->entity = PyDict_New();
2997 if (!self_xp->entity)
2998 return -1;
2999
3000 self_xp->names = PyDict_New();
3001 if (!self_xp->names) {
3002 Py_XDECREF(self_xp->entity);
3003 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 }
3005
Eli Bendersky52467b12012-06-01 07:13:08 +03003006 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3007 if (!self_xp->parser) {
3008 Py_XDECREF(self_xp->entity);
3009 Py_XDECREF(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003011 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012 }
3013
Eli Bendersky52467b12012-06-01 07:13:08 +03003014 if (target) {
3015 Py_INCREF(target);
3016 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003017 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 if (!target) {
Eli Bendersky52467b12012-06-01 07:13:08 +03003019 Py_XDECREF(self_xp->entity);
3020 Py_XDECREF(self_xp->names);
3021 EXPAT(ParserFree)(self_xp->parser);
3022 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003024 }
3025 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026
Eli Bendersky52467b12012-06-01 07:13:08 +03003027 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3028 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3029 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3030 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3031 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3032 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003033 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034
3035 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003036
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003038 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003040 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041 (XML_StartElementHandler) expat_start_handler,
3042 (XML_EndElementHandler) expat_end_handler
3043 );
3044 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003045 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003046 (XML_DefaultHandler) expat_default_handler
3047 );
3048 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003049 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050 (XML_CharacterDataHandler) expat_data_handler
3051 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003052 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003053 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003054 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055 (XML_CommentHandler) expat_comment_handler
3056 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003057 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003059 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 (XML_ProcessingInstructionHandler) expat_pi_handler
3061 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003062 EXPAT(SetStartDoctypeDeclHandler)(
3063 self_xp->parser,
3064 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3065 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003067 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3069 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070
Eli Bendersky52467b12012-06-01 07:13:08 +03003071 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072}
3073
Eli Bendersky52467b12012-06-01 07:13:08 +03003074static int
3075xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3076{
3077 Py_VISIT(self->handle_close);
3078 Py_VISIT(self->handle_pi);
3079 Py_VISIT(self->handle_comment);
3080 Py_VISIT(self->handle_end);
3081 Py_VISIT(self->handle_data);
3082 Py_VISIT(self->handle_start);
3083
3084 Py_VISIT(self->target);
3085 Py_VISIT(self->entity);
3086 Py_VISIT(self->names);
3087
3088 return 0;
3089}
3090
3091static int
3092xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093{
3094 EXPAT(ParserFree)(self->parser);
3095
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003096 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003097 Py_XDECREF(self->handle_pi);
3098 Py_XDECREF(self->handle_comment);
3099 Py_XDECREF(self->handle_end);
3100 Py_XDECREF(self->handle_data);
3101 Py_XDECREF(self->handle_start);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003102 Py_XDECREF(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003103
Eli Bendersky52467b12012-06-01 07:13:08 +03003104 Py_XDECREF(self->target);
3105 Py_XDECREF(self->entity);
3106 Py_XDECREF(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107
Eli Bendersky52467b12012-06-01 07:13:08 +03003108 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109}
3110
Eli Bendersky52467b12012-06-01 07:13:08 +03003111static void
3112xmlparser_dealloc(XMLParserObject* self)
3113{
3114 PyObject_GC_UnTrack(self);
3115 xmlparser_gc_clear(self);
3116 Py_TYPE(self)->tp_free((PyObject *)self);
3117}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003118
3119LOCAL(PyObject*)
3120expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3121{
3122 int ok;
3123
3124 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3125
3126 if (PyErr_Occurred())
3127 return NULL;
3128
3129 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003130 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003131 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003133 EXPAT(GetErrorColumnNumber)(self->parser),
3134 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003135 );
3136 return NULL;
3137 }
3138
3139 Py_RETURN_NONE;
3140}
3141
3142static PyObject*
3143xmlparser_close(XMLParserObject* self, PyObject* args)
3144{
3145 /* end feeding data to parser */
3146
3147 PyObject* res;
3148 if (!PyArg_ParseTuple(args, ":close"))
3149 return NULL;
3150
3151 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003152 if (!res)
3153 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003154
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003155 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003156 Py_DECREF(res);
3157 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003158 } if (self->handle_close) {
3159 Py_DECREF(res);
3160 return PyObject_CallFunction(self->handle_close, "");
3161 } else
3162 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003163}
3164
3165static PyObject*
3166xmlparser_feed(XMLParserObject* self, PyObject* args)
3167{
3168 /* feed data to parser */
3169
3170 char* data;
3171 int data_len;
3172 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3173 return NULL;
3174
3175 return expat_parse(self, data, data_len, 0);
3176}
3177
3178static PyObject*
3179xmlparser_parse(XMLParserObject* self, PyObject* args)
3180{
3181 /* (internal) parse until end of input stream */
3182
3183 PyObject* reader;
3184 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003185 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003186 PyObject* res;
3187
3188 PyObject* fileobj;
3189 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3190 return NULL;
3191
3192 reader = PyObject_GetAttrString(fileobj, "read");
3193 if (!reader)
3194 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003196 /* read from open file object */
3197 for (;;) {
3198
3199 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3200
3201 if (!buffer) {
3202 /* read failed (e.g. due to KeyboardInterrupt) */
3203 Py_DECREF(reader);
3204 return NULL;
3205 }
3206
Eli Benderskyf996e772012-03-16 05:53:30 +02003207 if (PyUnicode_CheckExact(buffer)) {
3208 /* A unicode object is encoded into bytes using UTF-8 */
3209 if (PyUnicode_GET_SIZE(buffer) == 0) {
3210 Py_DECREF(buffer);
3211 break;
3212 }
3213 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3214 if (!temp) {
3215 /* Propagate exception from PyUnicode_AsEncodedString */
3216 Py_DECREF(buffer);
3217 Py_DECREF(reader);
3218 return NULL;
3219 }
3220
3221 /* Here we no longer need the original buffer since it contains
3222 * unicode. Make it point to the encoded bytes object.
3223 */
3224 Py_DECREF(buffer);
3225 buffer = temp;
3226 }
3227 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228 Py_DECREF(buffer);
3229 break;
3230 }
3231
3232 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003233 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 );
3235
3236 Py_DECREF(buffer);
3237
3238 if (!res) {
3239 Py_DECREF(reader);
3240 return NULL;
3241 }
3242 Py_DECREF(res);
3243
3244 }
3245
3246 Py_DECREF(reader);
3247
3248 res = expat_parse(self, "", 0, 1);
3249
3250 if (res && TreeBuilder_CheckExact(self->target)) {
3251 Py_DECREF(res);
3252 return treebuilder_done((TreeBuilderObject*) self->target);
3253 }
3254
3255 return res;
3256}
3257
3258static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003259xmlparser_doctype(XMLParserObject *self, PyObject *args)
3260{
3261 Py_RETURN_NONE;
3262}
3263
3264static PyObject*
3265xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266{
3267 /* activate element event reporting */
3268
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003269 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270 TreeBuilderObject* target;
3271
3272 PyObject* events; /* event collector */
3273 PyObject* event_set = Py_None;
3274 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3275 &event_set))
3276 return NULL;
3277
3278 if (!TreeBuilder_CheckExact(self->target)) {
3279 PyErr_SetString(
3280 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003281 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 "targets"
3283 );
3284 return NULL;
3285 }
3286
3287 target = (TreeBuilderObject*) self->target;
3288
3289 Py_INCREF(events);
3290 Py_XDECREF(target->events);
3291 target->events = events;
3292
3293 /* clear out existing events */
3294 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
3295 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
3296 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
3297 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
3298
3299 if (event_set == Py_None) {
3300 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003301 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302 Py_RETURN_NONE;
3303 }
3304
3305 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3306 goto error;
3307
3308 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3309 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3310 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003311 if (PyUnicode_Check(item)) {
3312 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003313 if (event == NULL)
3314 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003315 } else if (PyBytes_Check(item))
3316 event = PyBytes_AS_STRING(item);
3317 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003318 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003319 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320 if (strcmp(event, "start") == 0) {
3321 Py_INCREF(item);
3322 target->start_event_obj = item;
3323 } else if (strcmp(event, "end") == 0) {
3324 Py_INCREF(item);
3325 Py_XDECREF(target->end_event_obj);
3326 target->end_event_obj = item;
3327 } else if (strcmp(event, "start-ns") == 0) {
3328 Py_INCREF(item);
3329 Py_XDECREF(target->start_ns_event_obj);
3330 target->start_ns_event_obj = item;
3331 EXPAT(SetNamespaceDeclHandler)(
3332 self->parser,
3333 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3334 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3335 );
3336 } else if (strcmp(event, "end-ns") == 0) {
3337 Py_INCREF(item);
3338 Py_XDECREF(target->end_ns_event_obj);
3339 target->end_ns_event_obj = item;
3340 EXPAT(SetNamespaceDeclHandler)(
3341 self->parser,
3342 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3343 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3344 );
3345 } else {
3346 PyErr_Format(
3347 PyExc_ValueError,
3348 "unknown event '%s'", event
3349 );
3350 return NULL;
3351 }
3352 }
3353
3354 Py_RETURN_NONE;
3355
3356 error:
3357 PyErr_SetString(
3358 PyExc_TypeError,
3359 "invalid event tuple"
3360 );
3361 return NULL;
3362}
3363
3364static PyMethodDef xmlparser_methods[] = {
3365 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3366 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3367 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3368 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003369 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 {NULL, NULL}
3371};
3372
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003373static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003374xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003375{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003376 if (PyUnicode_Check(nameobj)) {
3377 PyObject* res;
3378 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3379 res = self->entity;
3380 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3381 res = self->target;
3382 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3383 return PyUnicode_FromFormat(
3384 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003386 }
3387 else
3388 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389
Alexander Belopolskye239d232010-12-08 23:31:48 +00003390 Py_INCREF(res);
3391 return res;
3392 }
3393 generic:
3394 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003395}
3396
Neal Norwitz227b5332006-03-22 09:28:35 +00003397static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003398 PyVarObject_HEAD_INIT(NULL, 0)
3399 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003401 (destructor)xmlparser_dealloc, /* tp_dealloc */
3402 0, /* tp_print */
3403 0, /* tp_getattr */
3404 0, /* tp_setattr */
3405 0, /* tp_reserved */
3406 0, /* tp_repr */
3407 0, /* tp_as_number */
3408 0, /* tp_as_sequence */
3409 0, /* tp_as_mapping */
3410 0, /* tp_hash */
3411 0, /* tp_call */
3412 0, /* tp_str */
3413 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3414 0, /* tp_setattro */
3415 0, /* tp_as_buffer */
3416 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3417 /* tp_flags */
3418 0, /* tp_doc */
3419 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3420 (inquiry)xmlparser_gc_clear, /* tp_clear */
3421 0, /* tp_richcompare */
3422 0, /* tp_weaklistoffset */
3423 0, /* tp_iter */
3424 0, /* tp_iternext */
3425 xmlparser_methods, /* tp_methods */
3426 0, /* tp_members */
3427 0, /* tp_getset */
3428 0, /* tp_base */
3429 0, /* tp_dict */
3430 0, /* tp_descr_get */
3431 0, /* tp_descr_set */
3432 0, /* tp_dictoffset */
3433 (initproc)xmlparser_init, /* tp_init */
3434 PyType_GenericAlloc, /* tp_alloc */
3435 xmlparser_new, /* tp_new */
3436 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003437};
3438
3439#endif
3440
3441/* ==================================================================== */
3442/* python module interface */
3443
3444static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003445 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446 {NULL, NULL}
3447};
3448
Martin v. Löwis1a214512008-06-11 05:26:20 +00003449
3450static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003451 PyModuleDef_HEAD_INIT,
3452 "_elementtree",
3453 NULL,
3454 -1,
3455 _functions,
3456 NULL,
3457 NULL,
3458 NULL,
3459 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003460};
3461
Neal Norwitzf6657e62006-12-28 04:47:50 +00003462PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003463PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003465 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003466
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003467 /* Initialize object types */
3468 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003469 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003470 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003471 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003472#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003473 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003474 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475#endif
3476
Martin v. Löwis1a214512008-06-11 05:26:20 +00003477 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003478 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003479 return NULL;
3480
Eli Bendersky828efde2012-04-05 05:40:58 +03003481 if (!(temp = PyImport_ImportModule("copy")))
3482 return NULL;
3483 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3484 Py_XDECREF(temp);
3485
3486 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3487 return NULL;
3488
Eli Bendersky20d41742012-06-01 09:48:37 +03003489 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003490 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3491 if (expat_capi) {
3492 /* check that it's usable */
3493 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3494 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3495 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3496 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003497 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003498 expat_capi = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003499 }
3500 }
3501 if (!expat_capi) {
3502 PyErr_SetString(
3503 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
3504 );
3505 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003506 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003507
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003508 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003509 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003510 );
3511 Py_INCREF(elementtree_parseerror_obj);
3512 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3513
Eli Bendersky092af1f2012-03-04 07:14:03 +02003514 Py_INCREF((PyObject *)&Element_Type);
3515 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3516
Eli Bendersky58d548d2012-05-29 15:45:16 +03003517 Py_INCREF((PyObject *)&TreeBuilder_Type);
3518 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3519
Eli Bendersky52467b12012-06-01 07:13:08 +03003520#if defined(USE_EXPAT)
3521 Py_INCREF((PyObject *)&XMLParser_Type);
3522 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3523#endif
3524
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003525 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003526}