blob: cb840485809accc3f8f9d0b1c3df5ad0e87f1acd [file] [log] [blame]
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001/*
2 * ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003 * $Id: _elementtree.c 3473 2009-01-11 22:53:55Z fredrik $
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
5 * elementtree accelerator
6 *
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000010 * 2003-02-27 fl elementtree edition (alpha)
11 * 2004-06-03 fl updates for elementtree 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +000012 * 2005-01-05 fl major optimization effort
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000013 * 2005-01-11 fl first public release (cElementTree 0.8)
14 * 2005-01-12 fl split element object into base and extras
15 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
16 * 2005-01-17 fl added treebuilder close method
17 * 2005-01-17 fl fixed crash in getchildren
18 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
19 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
20 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
21 * 2005-01-28 fl added remove method (1.0.1)
22 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
23 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
24 * 2005-03-26 fl added Comment and PI support to XMLParser
25 * 2005-03-27 fl event optimizations; complain about bogus events
26 * 2005-08-08 fl fixed read error handling in parse
27 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
28 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
29 * 2005-12-16 fl added support for non-standard encodings
Fredrik Lundh44ed4db2006-03-12 21:06:35 +000030 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
31 * 2006-03-12 fl merge in 2.5 ssize_t changes
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 * 2007-08-25 fl call custom builder's close method from XMLParser
33 * 2007-08-31 fl added iter, extend from ET 1.3
34 * 2007-09-01 fl fixed ParseError exception, setslice source type, etc
35 * 2007-09-03 fl fixed handling of negative insert indexes
36 * 2007-09-04 fl added itertext from ET 1.3
37 * 2007-09-06 fl added position attribute to ParseError exception
38 * 2008-06-06 fl delay error reporting in iterparse (from Hrvoje Niksic)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000039 *
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
41 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000042 *
43 * info@pythonware.com
44 * http://www.pythonware.com
45 */
46
Fredrik Lundh6d52b552005-12-16 22:06:43 +000047/* Licensed to PSF under a Contributor Agreement. */
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048/* See http://www.python.org/psf/license for licensing details. */
Fredrik Lundh6d52b552005-12-16 22:06:43 +000049
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000050#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030051#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000052
Thomas Wouters00ee7ba2006-08-21 19:07:27 +000053#define VERSION "1.0.6"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054
55/* -------------------------------------------------------------------- */
56/* configuration */
57
58/* Leave defined to include the expat-based XMLParser type */
59#define USE_EXPAT
60
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061/* An element can hold this many children without extra memory
62 allocations. */
63#define STATIC_CHILDREN 4
64
65/* For best performance, chose a value so that 80-90% of all nodes
66 have no more than the given number of children. Set this to zero
67 to minimize the size of the element structure itself (this only
68 helps if you have lots of leaf nodes with attributes). */
69
70/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010071 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000072 that the number of children should be an even number, at least on
73 32-bit platforms. */
74
75/* -------------------------------------------------------------------- */
76
77#if 0
78static int memory = 0;
79#define ALLOC(size, comment)\
80do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
81#define RELEASE(size, comment)\
82do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83#else
84#define ALLOC(size, comment)
85#define RELEASE(size, comment)
86#endif
87
88/* compiler tweaks */
89#if defined(_MSC_VER)
90#define LOCAL(type) static __inline type __fastcall
91#else
92#define LOCAL(type) static type
93#endif
94
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000095/* macros used to store 'join' flags in string object pointers. note
96 that all use of text and tail as object pointers must be wrapped in
97 JOIN_OBJ. see comments in the ElementObject definition for more
98 info. */
99#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
100#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
101#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
102
103/* glue functions (see the init function for details) */
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000104static PyObject* elementtree_parseerror_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000105static PyObject* elementtree_deepcopy_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000106static PyObject* elementpath_obj;
107
108/* helpers */
109
110LOCAL(PyObject*)
111deepcopy(PyObject* object, PyObject* memo)
112{
113 /* do a deep copy of the given object */
114
115 PyObject* args;
116 PyObject* result;
117
118 if (!elementtree_deepcopy_obj) {
119 PyErr_SetString(
120 PyExc_RuntimeError,
121 "deepcopy helper not found"
122 );
123 return NULL;
124 }
125
126 args = PyTuple_New(2);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000127 if (!args)
128 return NULL;
129
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000130 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
131 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
132
133 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
134
135 Py_DECREF(args);
136
137 return result;
138}
139
140LOCAL(PyObject*)
141list_join(PyObject* list)
142{
143 /* join list elements (destroying the list in the process) */
144
145 PyObject* joiner;
146 PyObject* function;
147 PyObject* args;
148 PyObject* result;
149
150 switch (PyList_GET_SIZE(list)) {
151 case 0:
152 Py_DECREF(list);
Christian Heimes72b710a2008-05-26 13:28:38 +0000153 return PyBytes_FromString("");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 case 1:
155 result = PyList_GET_ITEM(list, 0);
156 Py_INCREF(result);
157 Py_DECREF(list);
158 return result;
159 }
160
161 /* two or more elements: slice out a suitable separator from the
162 first member, and use that to join the entire list */
163
164 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
165 if (!joiner)
166 return NULL;
167
168 function = PyObject_GetAttrString(joiner, "join");
169 if (!function) {
170 Py_DECREF(joiner);
171 return NULL;
172 }
173
174 args = PyTuple_New(1);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000175 if (!args)
176 return NULL;
177
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000178 PyTuple_SET_ITEM(args, 0, list);
179
180 result = PyObject_CallObject(function, args);
181
182 Py_DECREF(args); /* also removes list */
183 Py_DECREF(function);
184 Py_DECREF(joiner);
185
186 return result;
187}
188
Eli Bendersky48d358b2012-05-30 17:57:50 +0300189/* Is the given object an empty dictionary?
190*/
191static int
192is_empty_dict(PyObject *obj)
193{
194 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
195}
196
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200199/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
201typedef struct {
202
203 /* attributes (a dictionary object), or None if no attributes */
204 PyObject* attrib;
205
206 /* child elements */
207 int length; /* actual number of items */
208 int allocated; /* allocated items */
209
210 /* this either points to _children or to a malloced buffer */
211 PyObject* *children;
212
213 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215} ElementObjectExtra;
216
217typedef struct {
218 PyObject_HEAD
219
220 /* element tag (a string). */
221 PyObject* tag;
222
223 /* text before first child. note that this is a tagged pointer;
224 use JOIN_OBJ to get the object pointer. the join flag is used
225 to distinguish lists created by the tree builder from lists
226 assigned to the attribute by application code; the former
227 should be joined before being returned to the user, the latter
228 should be left intact. */
229 PyObject* text;
230
231 /* text after this element, in parent. note that this is a tagged
232 pointer; use JOIN_OBJ to get the object pointer. */
233 PyObject* tail;
234
235 ElementObjectExtra* extra;
236
Eli Benderskyebf37a22012-04-03 22:02:37 +0300237 PyObject *weakreflist; /* For tp_weaklistoffset */
238
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000239} ElementObject;
240
Neal Norwitz227b5332006-03-22 09:28:35 +0000241static PyTypeObject Element_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000242
Christian Heimes90aa7642007-12-19 02:45:37 +0000243#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
245/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200246/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
248LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200249create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000250{
251 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
252 if (!self->extra)
253 return -1;
254
255 if (!attrib)
256 attrib = Py_None;
257
258 Py_INCREF(attrib);
259 self->extra->attrib = attrib;
260
261 self->extra->length = 0;
262 self->extra->allocated = STATIC_CHILDREN;
263 self->extra->children = self->extra->_children;
264
265 return 0;
266}
267
268LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200269dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270{
Eli Bendersky08b85292012-04-04 15:55:07 +0300271 ElementObjectExtra *myextra;
272 int i;
273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 if (!self->extra)
275 return;
276
277 /* Avoid DECREFs calling into this code again (cycles, etc.)
278 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300279 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 self->extra = NULL;
281
282 Py_DECREF(myextra->attrib);
283
Eli Benderskyebf37a22012-04-03 22:02:37 +0300284 for (i = 0; i < myextra->length; i++)
285 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286
Eli Benderskyebf37a22012-04-03 22:02:37 +0300287 if (myextra->children != myextra->_children)
288 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289
Eli Benderskyebf37a22012-04-03 22:02:37 +0300290 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000291}
292
Eli Bendersky092af1f2012-03-04 07:14:03 +0200293/* Convenience internal function to create new Element objects with the given
294 * tag and attributes.
295*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000296LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200297create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000298{
299 ElementObject* self;
300
Eli Bendersky0192ba32012-03-30 16:38:33 +0300301 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000302 if (self == NULL)
303 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 self->extra = NULL;
305
Eli Bendersky48d358b2012-05-30 17:57:50 +0300306 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307 if (create_extra(self, attrib) < 0) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000308 PyObject_Del(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000310 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 }
312
313 Py_INCREF(tag);
314 self->tag = tag;
315
316 Py_INCREF(Py_None);
317 self->text = Py_None;
318
319 Py_INCREF(Py_None);
320 self->tail = Py_None;
321
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 self->weakreflist = NULL;
323
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000324 ALLOC(sizeof(ElementObject), "create element");
Eli Bendersky0192ba32012-03-30 16:38:33 +0300325 PyObject_GC_Track(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000326 return (PyObject*) self;
327}
328
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329static PyObject *
330element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
331{
332 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
333 if (e != NULL) {
334 Py_INCREF(Py_None);
335 e->tag = Py_None;
336
337 Py_INCREF(Py_None);
338 e->text = Py_None;
339
340 Py_INCREF(Py_None);
341 e->tail = Py_None;
342
343 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300344 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200345 }
346 return (PyObject *)e;
347}
348
Eli Bendersky737b1732012-05-29 06:02:56 +0300349/* Helper function for extracting the attrib dictionary from a keywords dict.
350 * This is required by some constructors/functions in this module that can
351 * either accept attrib as a keyword argument or all attributes splashed
352 * directly into *kwds.
353 * If there is no 'attrib' keyword, return an empty dict.
354 */
355static PyObject*
356get_attrib_from_keywords(PyObject *kwds)
357{
358 PyObject *attrib_str = PyUnicode_FromString("attrib");
359 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
360
361 if (attrib) {
362 /* If attrib was found in kwds, copy its value and remove it from
363 * kwds
364 */
365 if (!PyDict_Check(attrib)) {
366 Py_DECREF(attrib_str);
367 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
368 Py_TYPE(attrib)->tp_name);
369 return NULL;
370 }
371 attrib = PyDict_Copy(attrib);
372 PyDict_DelItem(kwds, attrib_str);
373 } else {
374 attrib = PyDict_New();
375 }
376
377 Py_DECREF(attrib_str);
378
379 if (attrib)
380 PyDict_Update(attrib, kwds);
381 return attrib;
382}
383
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384static int
385element_init(PyObject *self, PyObject *args, PyObject *kwds)
386{
387 PyObject *tag;
388 PyObject *tmp;
389 PyObject *attrib = NULL;
390 ElementObject *self_elem;
391
392 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
393 return -1;
394
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 if (attrib) {
396 /* attrib passed as positional arg */
397 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 if (!attrib)
399 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 if (kwds) {
401 if (PyDict_Update(attrib, kwds) < 0) {
402 return -1;
403 }
404 }
405 } else if (kwds) {
406 /* have keywords args */
407 attrib = get_attrib_from_keywords(kwds);
408 if (!attrib)
409 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300411 /* no attrib arg, no kwds, so no attributes */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 Py_INCREF(Py_None);
413 attrib = Py_None;
414 }
415
416 self_elem = (ElementObject *)self;
417
Eli Bendersky48d358b2012-05-30 17:57:50 +0300418 if (attrib != Py_None && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 if (create_extra(self_elem, attrib) < 0) {
420 PyObject_Del(self_elem);
421 return -1;
422 }
423 }
424
Eli Bendersky48d358b2012-05-30 17:57:50 +0300425 /* We own a reference to attrib here and it's no longer needed. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(attrib);
427
428 /* Replace the objects already pointed to by tag, text and tail. */
429 tmp = self_elem->tag;
430 self_elem->tag = tag;
431 Py_INCREF(tag);
432 Py_DECREF(tmp);
433
434 tmp = self_elem->text;
435 self_elem->text = Py_None;
436 Py_INCREF(Py_None);
437 Py_DECREF(JOIN_OBJ(tmp));
438
439 tmp = self_elem->tail;
440 self_elem->tail = Py_None;
441 Py_INCREF(Py_None);
442 Py_DECREF(JOIN_OBJ(tmp));
443
444 return 0;
445}
446
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447LOCAL(int)
448element_resize(ElementObject* self, int extra)
449{
450 int size;
451 PyObject* *children;
452
453 /* make sure self->children can hold the given number of extra
454 elements. set an exception and return -1 if allocation failed */
455
456 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200457 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000458
459 size = self->extra->length + extra;
460
461 if (size > self->extra->allocated) {
462 /* use Python 2.4's list growth strategy */
463 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100465 * which needs at least 4 bytes.
466 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000467 * be safe.
468 */
469 size = size ? size : 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000470 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100472 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000473 * false alarm always assume at least one child to be safe.
474 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000475 children = PyObject_Realloc(self->extra->children,
476 size * sizeof(PyObject*));
477 if (!children)
478 goto nomemory;
479 } else {
480 children = PyObject_Malloc(size * sizeof(PyObject*));
481 if (!children)
482 goto nomemory;
483 /* copy existing children from static area to malloc buffer */
484 memcpy(children, self->extra->children,
485 self->extra->length * sizeof(PyObject*));
486 }
487 self->extra->children = children;
488 self->extra->allocated = size;
489 }
490
491 return 0;
492
493 nomemory:
494 PyErr_NoMemory();
495 return -1;
496}
497
498LOCAL(int)
499element_add_subelement(ElementObject* self, PyObject* element)
500{
501 /* add a child element to a parent */
502
503 if (element_resize(self, 1) < 0)
504 return -1;
505
506 Py_INCREF(element);
507 self->extra->children[self->extra->length] = element;
508
509 self->extra->length++;
510
511 return 0;
512}
513
514LOCAL(PyObject*)
515element_get_attrib(ElementObject* self)
516{
517 /* return borrowed reference to attrib dictionary */
518 /* note: this function assumes that the extra section exists */
519
520 PyObject* res = self->extra->attrib;
521
522 if (res == Py_None) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000523 Py_DECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524 /* create missing dictionary */
525 res = PyDict_New();
526 if (!res)
527 return NULL;
528 self->extra->attrib = res;
529 }
530
531 return res;
532}
533
534LOCAL(PyObject*)
535element_get_text(ElementObject* self)
536{
537 /* return borrowed reference to text attribute */
538
539 PyObject* res = self->text;
540
541 if (JOIN_GET(res)) {
542 res = JOIN_OBJ(res);
543 if (PyList_CheckExact(res)) {
544 res = list_join(res);
545 if (!res)
546 return NULL;
547 self->text = res;
548 }
549 }
550
551 return res;
552}
553
554LOCAL(PyObject*)
555element_get_tail(ElementObject* self)
556{
557 /* return borrowed reference to text attribute */
558
559 PyObject* res = self->tail;
560
561 if (JOIN_GET(res)) {
562 res = JOIN_OBJ(res);
563 if (PyList_CheckExact(res)) {
564 res = list_join(res);
565 if (!res)
566 return NULL;
567 self->tail = res;
568 }
569 }
570
571 return res;
572}
573
574static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300575subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576{
577 PyObject* elem;
578
579 ElementObject* parent;
580 PyObject* tag;
581 PyObject* attrib = NULL;
582 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
583 &Element_Type, &parent, &tag,
584 &PyDict_Type, &attrib))
585 return NULL;
586
Eli Bendersky737b1732012-05-29 06:02:56 +0300587 if (attrib) {
588 /* attrib passed as positional arg */
589 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 if (!attrib)
591 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300592 if (kwds) {
593 if (PyDict_Update(attrib, kwds) < 0) {
594 return NULL;
595 }
596 }
597 } else if (kwds) {
598 /* have keyword args */
599 attrib = get_attrib_from_keywords(kwds);
600 if (!attrib)
601 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604 Py_INCREF(Py_None);
605 attrib = Py_None;
606 }
607
Eli Bendersky092af1f2012-03-04 07:14:03 +0200608 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609
610 Py_DECREF(attrib);
611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
628 int i;
629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300641
642 /* The following is like Py_CLEAR for self->text and self->tail, but
643 * written explicitily because the real pointers hide behind access
644 * macros.
645 */
646 if (self->text) {
647 PyObject *tmp = JOIN_OBJ(self->text);
648 self->text = NULL;
649 Py_DECREF(tmp);
650 }
651
652 if (self->tail) {
653 PyObject *tmp = JOIN_OBJ(self->tail);
654 self->tail = NULL;
655 Py_DECREF(tmp);
656 }
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657
658 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300659 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300660 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 return 0;
663}
664
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665static void
666element_dealloc(ElementObject* self)
667{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300668 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300669
670 if (self->weakreflist != NULL)
671 PyObject_ClearWeakRefs((PyObject *) self);
672
Eli Bendersky0192ba32012-03-30 16:38:33 +0300673 /* element_gc_clear clears all references and deallocates extra
674 */
675 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676
677 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200678 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679}
680
681/* -------------------------------------------------------------------- */
682/* methods (in alphabetical order) */
683
684static PyObject*
685element_append(ElementObject* self, PyObject* args)
686{
687 PyObject* element;
688 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
689 return NULL;
690
691 if (element_add_subelement(self, element) < 0)
692 return NULL;
693
694 Py_RETURN_NONE;
695}
696
697static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300698element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699{
700 if (!PyArg_ParseTuple(args, ":clear"))
701 return NULL;
702
Eli Benderskyebf37a22012-04-03 22:02:37 +0300703 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 Py_INCREF(Py_None);
706 Py_DECREF(JOIN_OBJ(self->text));
707 self->text = Py_None;
708
709 Py_INCREF(Py_None);
710 Py_DECREF(JOIN_OBJ(self->tail));
711 self->tail = Py_None;
712
713 Py_RETURN_NONE;
714}
715
716static PyObject*
717element_copy(ElementObject* self, PyObject* args)
718{
719 int i;
720 ElementObject* element;
721
722 if (!PyArg_ParseTuple(args, ":__copy__"))
723 return NULL;
724
Eli Bendersky092af1f2012-03-04 07:14:03 +0200725 element = (ElementObject*) create_new_element(
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000726 self->tag, (self->extra) ? self->extra->attrib : Py_None
727 );
728 if (!element)
729 return NULL;
730
731 Py_DECREF(JOIN_OBJ(element->text));
732 element->text = self->text;
733 Py_INCREF(JOIN_OBJ(element->text));
734
735 Py_DECREF(JOIN_OBJ(element->tail));
736 element->tail = self->tail;
737 Py_INCREF(JOIN_OBJ(element->tail));
738
739 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100740
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000741 if (element_resize(element, self->extra->length) < 0) {
742 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000744 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
746 for (i = 0; i < self->extra->length; i++) {
747 Py_INCREF(self->extra->children[i]);
748 element->extra->children[i] = self->extra->children[i];
749 }
750
751 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 }
754
755 return (PyObject*) element;
756}
757
758static PyObject*
759element_deepcopy(ElementObject* self, PyObject* args)
760{
761 int i;
762 ElementObject* element;
763 PyObject* tag;
764 PyObject* attrib;
765 PyObject* text;
766 PyObject* tail;
767 PyObject* id;
768
769 PyObject* memo;
770 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
771 return NULL;
772
773 tag = deepcopy(self->tag, memo);
774 if (!tag)
775 return NULL;
776
777 if (self->extra) {
778 attrib = deepcopy(self->extra->attrib, memo);
779 if (!attrib) {
780 Py_DECREF(tag);
781 return NULL;
782 }
783 } else {
784 Py_INCREF(Py_None);
785 attrib = Py_None;
786 }
787
Eli Bendersky092af1f2012-03-04 07:14:03 +0200788 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 Py_DECREF(tag);
791 Py_DECREF(attrib);
792
793 if (!element)
794 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796 text = deepcopy(JOIN_OBJ(self->text), memo);
797 if (!text)
798 goto error;
799 Py_DECREF(element->text);
800 element->text = JOIN_SET(text, JOIN_GET(self->text));
801
802 tail = deepcopy(JOIN_OBJ(self->tail), memo);
803 if (!tail)
804 goto error;
805 Py_DECREF(element->tail);
806 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
807
808 if (self->extra) {
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100809
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000810 if (element_resize(element, self->extra->length) < 0)
811 goto error;
812
813 for (i = 0; i < self->extra->length; i++) {
814 PyObject* child = deepcopy(self->extra->children[i], memo);
815 if (!child) {
816 element->extra->length = i;
817 goto error;
818 }
819 element->extra->children[i] = child;
820 }
821
822 element->extra->length = self->extra->length;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100823
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000824 }
825
826 /* add object to memo dictionary (so deepcopy won't visit it again) */
Christian Heimes217cfd12007-12-02 14:31:20 +0000827 id = PyLong_FromLong((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000828 if (!id)
829 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830
831 i = PyDict_SetItem(memo, id, (PyObject*) element);
832
833 Py_DECREF(id);
834
835 if (i < 0)
836 goto error;
837
838 return (PyObject*) element;
839
840 error:
841 Py_DECREF(element);
842 return NULL;
843}
844
845LOCAL(int)
846checkpath(PyObject* tag)
847{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000848 Py_ssize_t i;
849 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 /* check if a tag contains an xpath character */
852
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000853#define PATHCHAR(ch) \
854 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000855
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000856 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200857 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
858 void *data = PyUnicode_DATA(tag);
859 unsigned int kind = PyUnicode_KIND(tag);
860 for (i = 0; i < len; i++) {
861 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
862 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000863 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200864 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000865 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200866 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000867 return 1;
868 }
869 return 0;
870 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000871 if (PyBytes_Check(tag)) {
872 char *p = PyBytes_AS_STRING(tag);
873 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000874 if (p[i] == '{')
875 check = 0;
876 else if (p[i] == '}')
877 check = 1;
878 else if (check && PATHCHAR(p[i]))
879 return 1;
880 }
881 return 0;
882 }
883
884 return 1; /* unknown type; might be path expression */
885}
886
887static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000888element_extend(ElementObject* self, PyObject* args)
889{
890 PyObject* seq;
891 Py_ssize_t i, seqlen = 0;
892
893 PyObject* seq_in;
894 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
895 return NULL;
896
897 seq = PySequence_Fast(seq_in, "");
898 if (!seq) {
899 PyErr_Format(
900 PyExc_TypeError,
901 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
902 );
903 return NULL;
904 }
905
906 seqlen = PySequence_Size(seq);
907 for (i = 0; i < seqlen; i++) {
908 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200909 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
910 Py_DECREF(seq);
911 PyErr_Format(
912 PyExc_TypeError,
913 "expected an Element, not \"%.200s\"",
914 Py_TYPE(element)->tp_name);
915 return NULL;
916 }
917
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000918 if (element_add_subelement(self, element) < 0) {
919 Py_DECREF(seq);
920 return NULL;
921 }
922 }
923
924 Py_DECREF(seq);
925
926 Py_RETURN_NONE;
927}
928
929static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300930element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000931{
932 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000933 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000934 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +0300935 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200936
Eli Bendersky737b1732012-05-29 06:02:56 +0300937 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
938 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000939 return NULL;
940
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200941 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200942 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200943 return _PyObject_CallMethodId(
944 elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000945 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200946 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000947
948 if (!self->extra)
949 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100950
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000951 for (i = 0; i < self->extra->length; i++) {
952 PyObject* item = self->extra->children[i];
953 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +0000954 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000955 Py_INCREF(item);
956 return item;
957 }
958 }
959
960 Py_RETURN_NONE;
961}
962
963static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300964element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000965{
966 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000967 PyObject* tag;
968 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000969 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200970 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +0300971 static char *kwlist[] = {"path", "default", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200972
Eli Bendersky737b1732012-05-29 06:02:56 +0300973 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
974 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000975 return NULL;
976
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000977 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200978 return _PyObject_CallMethodId(
979 elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000980 );
981
982 if (!self->extra) {
983 Py_INCREF(default_value);
984 return default_value;
985 }
986
987 for (i = 0; i < self->extra->length; i++) {
988 ElementObject* item = (ElementObject*) self->extra->children[i];
Mark Dickinson211c6252009-02-01 10:28:51 +0000989 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
990
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000991 PyObject* text = element_get_text(item);
992 if (text == Py_None)
Christian Heimes72b710a2008-05-26 13:28:38 +0000993 return PyBytes_FromString("");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000994 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000995 return text;
996 }
997 }
998
999 Py_INCREF(default_value);
1000 return default_value;
1001}
1002
1003static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001004element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001005{
1006 int i;
1007 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001008 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001009 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001010 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001011
Eli Bendersky737b1732012-05-29 06:02:56 +03001012 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1013 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001014 return NULL;
1015
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001016 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001017 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001018 return _PyObject_CallMethodId(
1019 elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001020 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001021 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001022
1023 out = PyList_New(0);
1024 if (!out)
1025 return NULL;
1026
1027 if (!self->extra)
1028 return out;
1029
1030 for (i = 0; i < self->extra->length; i++) {
1031 PyObject* item = self->extra->children[i];
1032 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001033 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001034 if (PyList_Append(out, item) < 0) {
1035 Py_DECREF(out);
1036 return NULL;
1037 }
1038 }
1039 }
1040
1041 return out;
1042}
1043
1044static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001045element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001046{
1047 PyObject* tag;
1048 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001049 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001050 static char *kwlist[] = {"path", "namespaces", 0};
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001051
Eli Bendersky737b1732012-05-29 06:02:56 +03001052 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
1053 &tag, &namespaces))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001054 return NULL;
1055
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001056 return _PyObject_CallMethodId(
1057 elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001058 );
1059}
1060
1061static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062element_get(ElementObject* self, PyObject* args)
1063{
1064 PyObject* value;
1065
1066 PyObject* key;
1067 PyObject* default_value = Py_None;
1068 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
1069 return NULL;
1070
1071 if (!self->extra || self->extra->attrib == Py_None)
1072 value = default_value;
1073 else {
1074 value = PyDict_GetItem(self->extra->attrib, key);
1075 if (!value)
1076 value = default_value;
1077 }
1078
1079 Py_INCREF(value);
1080 return value;
1081}
1082
1083static PyObject*
1084element_getchildren(ElementObject* self, PyObject* args)
1085{
1086 int i;
1087 PyObject* list;
1088
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001089 /* FIXME: report as deprecated? */
1090
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091 if (!PyArg_ParseTuple(args, ":getchildren"))
1092 return NULL;
1093
1094 if (!self->extra)
1095 return PyList_New(0);
1096
1097 list = PyList_New(self->extra->length);
1098 if (!list)
1099 return NULL;
1100
1101 for (i = 0; i < self->extra->length; i++) {
1102 PyObject* item = self->extra->children[i];
1103 Py_INCREF(item);
1104 PyList_SET_ITEM(list, i, item);
1105 }
1106
1107 return list;
1108}
1109
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001110
Eli Bendersky64d11e62012-06-15 07:42:50 +03001111static PyObject *
1112create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1113
1114
1115static PyObject *
1116element_iter(ElementObject *self, PyObject *args)
1117{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001118 PyObject* tag = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119 if (!PyArg_ParseTuple(args, "|O:iter", &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001120 return NULL;
1121
Eli Bendersky64d11e62012-06-15 07:42:50 +03001122 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123}
1124
1125
1126static PyObject*
1127element_itertext(ElementObject* self, PyObject* args)
1128{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001129 if (!PyArg_ParseTuple(args, ":itertext"))
1130 return NULL;
1131
Eli Bendersky64d11e62012-06-15 07:42:50 +03001132 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133}
1134
Eli Bendersky64d11e62012-06-15 07:42:50 +03001135
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001137element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001139 ElementObject* self = (ElementObject*) self_;
1140
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141 if (!self->extra || index < 0 || index >= self->extra->length) {
1142 PyErr_SetString(
1143 PyExc_IndexError,
1144 "child index out of range"
1145 );
1146 return NULL;
1147 }
1148
1149 Py_INCREF(self->extra->children[index]);
1150 return self->extra->children[index];
1151}
1152
1153static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154element_insert(ElementObject* self, PyObject* args)
1155{
1156 int i;
1157
1158 int index;
1159 PyObject* element;
1160 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1161 &Element_Type, &element))
1162 return NULL;
1163
1164 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001165 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001167 if (index < 0) {
1168 index += self->extra->length;
1169 if (index < 0)
1170 index = 0;
1171 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 if (index > self->extra->length)
1173 index = self->extra->length;
1174
1175 if (element_resize(self, 1) < 0)
1176 return NULL;
1177
1178 for (i = self->extra->length; i > index; i--)
1179 self->extra->children[i] = self->extra->children[i-1];
1180
1181 Py_INCREF(element);
1182 self->extra->children[index] = element;
1183
1184 self->extra->length++;
1185
1186 Py_RETURN_NONE;
1187}
1188
1189static PyObject*
1190element_items(ElementObject* self, PyObject* args)
1191{
1192 if (!PyArg_ParseTuple(args, ":items"))
1193 return NULL;
1194
1195 if (!self->extra || self->extra->attrib == Py_None)
1196 return PyList_New(0);
1197
1198 return PyDict_Items(self->extra->attrib);
1199}
1200
1201static PyObject*
1202element_keys(ElementObject* self, PyObject* args)
1203{
1204 if (!PyArg_ParseTuple(args, ":keys"))
1205 return NULL;
1206
1207 if (!self->extra || self->extra->attrib == Py_None)
1208 return PyList_New(0);
1209
1210 return PyDict_Keys(self->extra->attrib);
1211}
1212
Martin v. Löwis18e16552006-02-15 17:27:45 +00001213static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214element_length(ElementObject* self)
1215{
1216 if (!self->extra)
1217 return 0;
1218
1219 return self->extra->length;
1220}
1221
1222static PyObject*
1223element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1224{
1225 PyObject* elem;
1226
1227 PyObject* tag;
1228 PyObject* attrib;
1229 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1230 return NULL;
1231
1232 attrib = PyDict_Copy(attrib);
1233 if (!attrib)
1234 return NULL;
1235
Eli Bendersky092af1f2012-03-04 07:14:03 +02001236 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237
1238 Py_DECREF(attrib);
1239
1240 return elem;
1241}
1242
1243static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244element_remove(ElementObject* self, PyObject* args)
1245{
1246 int i;
1247
1248 PyObject* element;
1249 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1250 return NULL;
1251
1252 if (!self->extra) {
1253 /* element has no children, so raise exception */
1254 PyErr_SetString(
1255 PyExc_ValueError,
1256 "list.remove(x): x not in list"
1257 );
1258 return NULL;
1259 }
1260
1261 for (i = 0; i < self->extra->length; i++) {
1262 if (self->extra->children[i] == element)
1263 break;
Mark Dickinson211c6252009-02-01 10:28:51 +00001264 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001265 break;
1266 }
1267
1268 if (i == self->extra->length) {
1269 /* element is not in children, so raise exception */
1270 PyErr_SetString(
1271 PyExc_ValueError,
1272 "list.remove(x): x not in list"
1273 );
1274 return NULL;
1275 }
1276
1277 Py_DECREF(self->extra->children[i]);
1278
1279 self->extra->length--;
1280
1281 for (; i < self->extra->length; i++)
1282 self->extra->children[i] = self->extra->children[i+1];
1283
1284 Py_RETURN_NONE;
1285}
1286
1287static PyObject*
1288element_repr(ElementObject* self)
1289{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001290 if (self->tag)
1291 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1292 else
1293 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001294}
1295
1296static PyObject*
1297element_set(ElementObject* self, PyObject* args)
1298{
1299 PyObject* attrib;
1300
1301 PyObject* key;
1302 PyObject* value;
1303 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1304 return NULL;
1305
1306 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001307 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001308
1309 attrib = element_get_attrib(self);
1310 if (!attrib)
1311 return NULL;
1312
1313 if (PyDict_SetItem(attrib, key, value) < 0)
1314 return NULL;
1315
1316 Py_RETURN_NONE;
1317}
1318
1319static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001320element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001321{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001322 ElementObject* self = (ElementObject*) self_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 int i;
1324 PyObject* old;
1325
1326 if (!self->extra || index < 0 || index >= self->extra->length) {
1327 PyErr_SetString(
1328 PyExc_IndexError,
1329 "child assignment index out of range");
1330 return -1;
1331 }
1332
1333 old = self->extra->children[index];
1334
1335 if (item) {
1336 Py_INCREF(item);
1337 self->extra->children[index] = item;
1338 } else {
1339 self->extra->length--;
1340 for (i = index; i < self->extra->length; i++)
1341 self->extra->children[i] = self->extra->children[i+1];
1342 }
1343
1344 Py_DECREF(old);
1345
1346 return 0;
1347}
1348
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001349static PyObject*
1350element_subscr(PyObject* self_, PyObject* item)
1351{
1352 ElementObject* self = (ElementObject*) self_;
1353
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001354 if (PyIndex_Check(item)) {
1355 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001356
1357 if (i == -1 && PyErr_Occurred()) {
1358 return NULL;
1359 }
1360 if (i < 0 && self->extra)
1361 i += self->extra->length;
1362 return element_getitem(self_, i);
1363 }
1364 else if (PySlice_Check(item)) {
1365 Py_ssize_t start, stop, step, slicelen, cur, i;
1366 PyObject* list;
1367
1368 if (!self->extra)
1369 return PyList_New(0);
1370
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001371 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001372 self->extra->length,
1373 &start, &stop, &step, &slicelen) < 0) {
1374 return NULL;
1375 }
1376
1377 if (slicelen <= 0)
1378 return PyList_New(0);
1379 else {
1380 list = PyList_New(slicelen);
1381 if (!list)
1382 return NULL;
1383
1384 for (cur = start, i = 0; i < slicelen;
1385 cur += step, i++) {
1386 PyObject* item = self->extra->children[cur];
1387 Py_INCREF(item);
1388 PyList_SET_ITEM(list, i, item);
1389 }
1390
1391 return list;
1392 }
1393 }
1394 else {
1395 PyErr_SetString(PyExc_TypeError,
1396 "element indices must be integers");
1397 return NULL;
1398 }
1399}
1400
1401static int
1402element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1403{
1404 ElementObject* self = (ElementObject*) self_;
1405
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001406 if (PyIndex_Check(item)) {
1407 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001408
1409 if (i == -1 && PyErr_Occurred()) {
1410 return -1;
1411 }
1412 if (i < 0 && self->extra)
1413 i += self->extra->length;
1414 return element_setitem(self_, i, value);
1415 }
1416 else if (PySlice_Check(item)) {
1417 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1418
1419 PyObject* recycle = NULL;
1420 PyObject* seq = NULL;
1421
1422 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001423 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001424
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001425 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001426 self->extra->length,
1427 &start, &stop, &step, &slicelen) < 0) {
1428 return -1;
1429 }
1430
Eli Bendersky865756a2012-03-09 13:38:15 +02001431 if (value == NULL) {
1432 /* Delete slice */
1433 size_t cur;
1434 Py_ssize_t i;
1435
1436 if (slicelen <= 0)
1437 return 0;
1438
1439 /* Since we're deleting, the direction of the range doesn't matter,
1440 * so for simplicity make it always ascending.
1441 */
1442 if (step < 0) {
1443 stop = start + 1;
1444 start = stop + step * (slicelen - 1) - 1;
1445 step = -step;
1446 }
1447
1448 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1449
1450 /* recycle is a list that will contain all the children
1451 * scheduled for removal.
1452 */
1453 if (!(recycle = PyList_New(slicelen))) {
1454 PyErr_NoMemory();
1455 return -1;
1456 }
1457
1458 /* This loop walks over all the children that have to be deleted,
1459 * with cur pointing at them. num_moved is the amount of children
1460 * until the next deleted child that have to be "shifted down" to
1461 * occupy the deleted's places.
1462 * Note that in the ith iteration, shifting is done i+i places down
1463 * because i children were already removed.
1464 */
1465 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1466 /* Compute how many children have to be moved, clipping at the
1467 * list end.
1468 */
1469 Py_ssize_t num_moved = step - 1;
1470 if (cur + step >= (size_t)self->extra->length) {
1471 num_moved = self->extra->length - cur - 1;
1472 }
1473
1474 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1475
1476 memmove(
1477 self->extra->children + cur - i,
1478 self->extra->children + cur + 1,
1479 num_moved * sizeof(PyObject *));
1480 }
1481
1482 /* Leftover "tail" after the last removed child */
1483 cur = start + (size_t)slicelen * step;
1484 if (cur < (size_t)self->extra->length) {
1485 memmove(
1486 self->extra->children + cur - slicelen,
1487 self->extra->children + cur,
1488 (self->extra->length - cur) * sizeof(PyObject *));
1489 }
1490
1491 self->extra->length -= slicelen;
1492
1493 /* Discard the recycle list with all the deleted sub-elements */
1494 Py_XDECREF(recycle);
1495 return 0;
1496 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001497 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001498 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001499 seq = PySequence_Fast(value, "");
1500 if (!seq) {
1501 PyErr_Format(
1502 PyExc_TypeError,
1503 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1504 );
1505 return -1;
1506 }
1507 newlen = PySequence_Size(seq);
1508 }
1509
1510 if (step != 1 && newlen != slicelen)
1511 {
1512 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001513 "attempt to assign sequence of size %zd "
1514 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001515 newlen, slicelen
1516 );
1517 return -1;
1518 }
1519
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001520 /* Resize before creating the recycle bin, to prevent refleaks. */
1521 if (newlen > slicelen) {
1522 if (element_resize(self, newlen - slicelen) < 0) {
1523 if (seq) {
1524 Py_DECREF(seq);
1525 }
1526 return -1;
1527 }
1528 }
1529
1530 if (slicelen > 0) {
1531 /* to avoid recursive calls to this method (via decref), move
1532 old items to the recycle bin here, and get rid of them when
1533 we're done modifying the element */
1534 recycle = PyList_New(slicelen);
1535 if (!recycle) {
1536 if (seq) {
1537 Py_DECREF(seq);
1538 }
1539 return -1;
1540 }
1541 for (cur = start, i = 0; i < slicelen;
1542 cur += step, i++)
1543 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1544 }
1545
1546 if (newlen < slicelen) {
1547 /* delete slice */
1548 for (i = stop; i < self->extra->length; i++)
1549 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1550 } else if (newlen > slicelen) {
1551 /* insert slice */
1552 for (i = self->extra->length-1; i >= stop; i--)
1553 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1554 }
1555
1556 /* replace the slice */
1557 for (cur = start, i = 0; i < newlen;
1558 cur += step, i++) {
1559 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1560 Py_INCREF(element);
1561 self->extra->children[cur] = element;
1562 }
1563
1564 self->extra->length += newlen - slicelen;
1565
1566 if (seq) {
1567 Py_DECREF(seq);
1568 }
1569
1570 /* discard the recycle bin, and everything in it */
1571 Py_XDECREF(recycle);
1572
1573 return 0;
1574 }
1575 else {
1576 PyErr_SetString(PyExc_TypeError,
1577 "element indices must be integers");
1578 return -1;
1579 }
1580}
1581
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582static PyMethodDef element_methods[] = {
1583
Eli Bendersky0192ba32012-03-30 16:38:33 +03001584 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585
1586 {"get", (PyCFunction) element_get, METH_VARARGS},
1587 {"set", (PyCFunction) element_set, METH_VARARGS},
1588
Eli Bendersky737b1732012-05-29 06:02:56 +03001589 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1590 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1591 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592
1593 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001594 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1596 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1597
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001598 {"iter", (PyCFunction) element_iter, METH_VARARGS},
1599 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001600 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001601
1602 {"getiterator", (PyCFunction) element_iter, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1604
1605 {"items", (PyCFunction) element_items, METH_VARARGS},
1606 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1607
1608 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1609
1610 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1611 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1612
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 {NULL, NULL}
1614};
1615
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001616static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001617element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001618{
1619 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001620 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001621
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001622 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001623 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001624
Alexander Belopolskye239d232010-12-08 23:31:48 +00001625 if (name == NULL)
1626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001628 /* handle common attributes first */
1629 if (strcmp(name, "tag") == 0) {
1630 res = self->tag;
1631 Py_INCREF(res);
1632 return res;
1633 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634 res = element_get_text(self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001635 Py_INCREF(res);
1636 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637 }
1638
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001639 /* methods */
1640 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1641 if (res)
1642 return res;
1643
1644 /* less common attributes */
1645 if (strcmp(name, "tail") == 0) {
1646 PyErr_Clear();
1647 res = element_get_tail(self);
1648 } else if (strcmp(name, "attrib") == 0) {
1649 PyErr_Clear();
1650 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001651 create_extra(self, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001652 res = element_get_attrib(self);
1653 }
1654
1655 if (!res)
1656 return NULL;
1657
1658 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001659 return res;
1660}
1661
Eli Benderskyb20df952012-05-20 06:33:29 +03001662static PyObject*
1663element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664{
Eli Benderskyb20df952012-05-20 06:33:29 +03001665 char *name = "";
1666 if (PyUnicode_Check(nameobj))
1667 name = _PyUnicode_AsString(nameobj);
1668
1669 if (name == NULL)
1670 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671
1672 if (strcmp(name, "tag") == 0) {
1673 Py_DECREF(self->tag);
1674 self->tag = value;
1675 Py_INCREF(self->tag);
1676 } else if (strcmp(name, "text") == 0) {
1677 Py_DECREF(JOIN_OBJ(self->text));
1678 self->text = value;
1679 Py_INCREF(self->text);
1680 } else if (strcmp(name, "tail") == 0) {
1681 Py_DECREF(JOIN_OBJ(self->tail));
1682 self->tail = value;
1683 Py_INCREF(self->tail);
1684 } else if (strcmp(name, "attrib") == 0) {
1685 if (!self->extra)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001686 create_extra(self, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001687 Py_DECREF(self->extra->attrib);
1688 self->extra->attrib = value;
1689 Py_INCREF(self->extra->attrib);
1690 } else {
1691 PyErr_SetString(PyExc_AttributeError, name);
Eli Benderskyb20df952012-05-20 06:33:29 +03001692 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 }
1694
Eli Benderskyb20df952012-05-20 06:33:29 +03001695 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001696}
1697
1698static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001699 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001700 0, /* sq_concat */
1701 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001702 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001703 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001704 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001705 0,
1706};
1707
1708static PyMappingMethods element_as_mapping = {
1709 (lenfunc) element_length,
1710 (binaryfunc) element_subscr,
1711 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001712};
1713
Neal Norwitz227b5332006-03-22 09:28:35 +00001714static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001715 PyVarObject_HEAD_INIT(NULL, 0)
1716 "Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001717 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001718 (destructor)element_dealloc, /* tp_dealloc */
1719 0, /* tp_print */
1720 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001721 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001722 0, /* tp_reserved */
1723 (reprfunc)element_repr, /* tp_repr */
1724 0, /* tp_as_number */
1725 &element_as_sequence, /* tp_as_sequence */
1726 &element_as_mapping, /* tp_as_mapping */
1727 0, /* tp_hash */
1728 0, /* tp_call */
1729 0, /* tp_str */
1730 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001731 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001732 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001733 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1734 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001735 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001736 (traverseproc)element_gc_traverse, /* tp_traverse */
1737 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001738 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001739 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001740 0, /* tp_iter */
1741 0, /* tp_iternext */
1742 element_methods, /* tp_methods */
1743 0, /* tp_members */
1744 0, /* tp_getset */
1745 0, /* tp_base */
1746 0, /* tp_dict */
1747 0, /* tp_descr_get */
1748 0, /* tp_descr_set */
1749 0, /* tp_dictoffset */
1750 (initproc)element_init, /* tp_init */
1751 PyType_GenericAlloc, /* tp_alloc */
1752 element_new, /* tp_new */
1753 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754};
1755
Eli Bendersky64d11e62012-06-15 07:42:50 +03001756/******************************* Element iterator ****************************/
1757
1758/* ElementIterObject represents the iteration state over an XML element in
1759 * pre-order traversal. To keep track of which sub-element should be returned
1760 * next, a stack of parents is maintained. This is a standard stack-based
1761 * iterative pre-order traversal of a tree.
1762 * The stack is managed using a single-linked list starting at parent_stack.
1763 * Each stack node contains the saved parent to which we should return after
1764 * the current one is exhausted, and the next child to examine in that parent.
1765 */
1766typedef struct ParentLocator_t {
1767 ElementObject *parent;
1768 Py_ssize_t child_index;
1769 struct ParentLocator_t *next;
1770} ParentLocator;
1771
1772typedef struct {
1773 PyObject_HEAD
1774 ParentLocator *parent_stack;
1775 ElementObject *root_element;
1776 PyObject *sought_tag;
1777 int root_done;
1778 int gettext;
1779} ElementIterObject;
1780
1781
1782static void
1783elementiter_dealloc(ElementIterObject *it)
1784{
1785 ParentLocator *p = it->parent_stack;
1786 while (p) {
1787 ParentLocator *temp = p;
1788 Py_XDECREF(p->parent);
1789 p = p->next;
1790 PyObject_Free(temp);
1791 }
1792
1793 Py_XDECREF(it->sought_tag);
1794 Py_XDECREF(it->root_element);
1795
1796 PyObject_GC_UnTrack(it);
1797 PyObject_GC_Del(it);
1798}
1799
1800static int
1801elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1802{
1803 ParentLocator *p = it->parent_stack;
1804 while (p) {
1805 Py_VISIT(p->parent);
1806 p = p->next;
1807 }
1808
1809 Py_VISIT(it->root_element);
1810 Py_VISIT(it->sought_tag);
1811 return 0;
1812}
1813
1814/* Helper function for elementiter_next. Add a new parent to the parent stack.
1815 */
1816static ParentLocator *
1817parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
1818{
1819 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
1820 if (new_node) {
1821 new_node->parent = parent;
1822 Py_INCREF(parent);
1823 new_node->child_index = 0;
1824 new_node->next = stack;
1825 }
1826 return new_node;
1827}
1828
1829static PyObject *
1830elementiter_next(ElementIterObject *it)
1831{
1832 /* Sub-element iterator.
1833 *
1834 * A short note on gettext: this function serves both the iter() and
1835 * itertext() methods to avoid code duplication. However, there are a few
1836 * small differences in the way these iterations work. Namely:
1837 * - itertext() only yields text from nodes that have it, and continues
1838 * iterating when a node doesn't have text (so it doesn't return any
1839 * node like iter())
1840 * - itertext() also has to handle tail, after finishing with all the
1841 * children of a node.
1842 */
Eli Bendersky113da642012-06-15 07:52:49 +03001843 ElementObject *cur_parent;
1844 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001845
1846 while (1) {
1847 /* Handle the case reached in the beginning and end of iteration, where
1848 * the parent stack is empty. The root_done flag gives us indication
1849 * whether we've just started iterating (so root_done is 0), in which
1850 * case the root is returned. If root_done is 1 and we're here, the
1851 * iterator is exhausted.
1852 */
1853 if (!it->parent_stack->parent) {
1854 if (it->root_done) {
1855 PyErr_SetNone(PyExc_StopIteration);
1856 return NULL;
1857 } else {
1858 it->parent_stack = parent_stack_push_new(it->parent_stack,
1859 it->root_element);
1860 if (!it->parent_stack) {
1861 PyErr_NoMemory();
1862 return NULL;
1863 }
1864
1865 it->root_done = 1;
1866 if (it->sought_tag == Py_None ||
1867 PyObject_RichCompareBool(it->root_element->tag,
1868 it->sought_tag, Py_EQ) == 1) {
1869 if (it->gettext) {
1870 PyObject *text = JOIN_OBJ(it->root_element->text);
1871 if (PyObject_IsTrue(text)) {
1872 Py_INCREF(text);
1873 return text;
1874 }
1875 } else {
1876 Py_INCREF(it->root_element);
1877 return (PyObject *)it->root_element;
1878 }
1879 }
1880 }
1881 }
1882
1883 /* See if there are children left to traverse in the current parent. If
1884 * yes, visit the next child. If not, pop the stack and try again.
1885 */
Eli Bendersky113da642012-06-15 07:52:49 +03001886 cur_parent = it->parent_stack->parent;
1887 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03001888 if (cur_parent->extra && child_index < cur_parent->extra->length) {
1889 ElementObject *child = (ElementObject *)
1890 cur_parent->extra->children[child_index];
1891 it->parent_stack->child_index++;
1892 it->parent_stack = parent_stack_push_new(it->parent_stack,
1893 child);
1894 if (!it->parent_stack) {
1895 PyErr_NoMemory();
1896 return NULL;
1897 }
1898
1899 if (it->gettext) {
1900 PyObject *text = JOIN_OBJ(child->text);
1901 if (PyObject_IsTrue(text)) {
1902 Py_INCREF(text);
1903 return text;
1904 }
1905 } else if (it->sought_tag == Py_None ||
1906 PyObject_RichCompareBool(child->tag,
1907 it->sought_tag, Py_EQ) == 1) {
1908 Py_INCREF(child);
1909 return (PyObject *)child;
1910 }
1911 else
1912 continue;
1913 }
1914 else {
1915 PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
1916 ParentLocator *next = it->parent_stack->next;
1917 Py_XDECREF(it->parent_stack->parent);
1918 PyObject_Free(it->parent_stack);
1919 it->parent_stack = next;
1920
1921 /* Note that extra condition on it->parent_stack->parent here;
1922 * this is because itertext() is supposed to only return *inner*
1923 * text, not text following the element it began iteration with.
1924 */
1925 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
1926 Py_INCREF(tail);
1927 return tail;
1928 }
1929 }
1930 }
1931
1932 return NULL;
1933}
1934
1935
1936static PyTypeObject ElementIter_Type = {
1937 PyVarObject_HEAD_INIT(NULL, 0)
1938 "_elementtree._element_iterator", /* tp_name */
1939 sizeof(ElementIterObject), /* tp_basicsize */
1940 0, /* tp_itemsize */
1941 /* methods */
1942 (destructor)elementiter_dealloc, /* tp_dealloc */
1943 0, /* tp_print */
1944 0, /* tp_getattr */
1945 0, /* tp_setattr */
1946 0, /* tp_reserved */
1947 0, /* tp_repr */
1948 0, /* tp_as_number */
1949 0, /* tp_as_sequence */
1950 0, /* tp_as_mapping */
1951 0, /* tp_hash */
1952 0, /* tp_call */
1953 0, /* tp_str */
1954 0, /* tp_getattro */
1955 0, /* tp_setattro */
1956 0, /* tp_as_buffer */
1957 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1958 0, /* tp_doc */
1959 (traverseproc)elementiter_traverse, /* tp_traverse */
1960 0, /* tp_clear */
1961 0, /* tp_richcompare */
1962 0, /* tp_weaklistoffset */
1963 PyObject_SelfIter, /* tp_iter */
1964 (iternextfunc)elementiter_next, /* tp_iternext */
1965 0, /* tp_methods */
1966 0, /* tp_members */
1967 0, /* tp_getset */
1968 0, /* tp_base */
1969 0, /* tp_dict */
1970 0, /* tp_descr_get */
1971 0, /* tp_descr_set */
1972 0, /* tp_dictoffset */
1973 0, /* tp_init */
1974 0, /* tp_alloc */
1975 0, /* tp_new */
1976};
1977
1978
1979static PyObject *
1980create_elementiter(ElementObject *self, PyObject *tag, int gettext)
1981{
1982 ElementIterObject *it;
1983 PyObject *star = NULL;
1984
1985 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
1986 if (!it)
1987 return NULL;
1988 if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
1989 PyObject_GC_Del(it);
1990 return NULL;
1991 }
1992
1993 it->parent_stack->parent = NULL;
1994 it->parent_stack->child_index = 0;
1995 it->parent_stack->next = NULL;
1996
1997 if (PyUnicode_Check(tag))
1998 star = PyUnicode_FromString("*");
1999 else if (PyBytes_Check(tag))
2000 star = PyBytes_FromString("*");
2001
2002 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2003 tag = Py_None;
2004
2005 Py_XDECREF(star);
2006 it->sought_tag = tag;
2007 it->root_done = 0;
2008 it->gettext = gettext;
2009 it->root_element = self;
2010
2011 Py_INCREF(self);
2012 Py_INCREF(tag);
2013
2014 PyObject_GC_Track(it);
2015 return (PyObject *)it;
2016}
2017
2018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019/* ==================================================================== */
2020/* the tree builder type */
2021
2022typedef struct {
2023 PyObject_HEAD
2024
Eli Bendersky58d548d2012-05-29 15:45:16 +03002025 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002026
Eli Bendersky58d548d2012-05-29 15:45:16 +03002027 ElementObject *this; /* current node */
2028 ElementObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002029
Eli Bendersky58d548d2012-05-29 15:45:16 +03002030 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002031
Eli Bendersky58d548d2012-05-29 15:45:16 +03002032 PyObject *stack; /* element stack */
2033 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002034
Eli Bendersky48d358b2012-05-30 17:57:50 +03002035 PyObject *element_factory;
2036
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002037 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002038 PyObject *events; /* list of events, or NULL if not collecting */
2039 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2040 PyObject *end_event_obj;
2041 PyObject *start_ns_event_obj;
2042 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002043} TreeBuilderObject;
2044
Neal Norwitz227b5332006-03-22 09:28:35 +00002045static PyTypeObject TreeBuilder_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002046
Christian Heimes90aa7642007-12-19 02:45:37 +00002047#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002048
2049/* -------------------------------------------------------------------- */
2050/* constructor and destructor */
2051
Eli Bendersky58d548d2012-05-29 15:45:16 +03002052static PyObject *
2053treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002054{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002055 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2056 if (t != NULL) {
2057 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002058
Eli Bendersky58d548d2012-05-29 15:45:16 +03002059 Py_INCREF(Py_None);
2060 t->this = (ElementObject *)Py_None;
2061 Py_INCREF(Py_None);
2062 t->last = (ElementObject *)Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002063
Eli Bendersky58d548d2012-05-29 15:45:16 +03002064 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002065 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002066 t->stack = PyList_New(20);
2067 if (!t->stack) {
2068 Py_DECREF(t->this);
2069 Py_DECREF(t->last);
2070 return NULL;
2071 }
2072 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002073
Eli Bendersky58d548d2012-05-29 15:45:16 +03002074 t->events = NULL;
2075 t->start_event_obj = t->end_event_obj = NULL;
2076 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2077 }
2078 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002079}
2080
Eli Bendersky58d548d2012-05-29 15:45:16 +03002081static int
2082treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002083{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002084 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002085 PyObject *element_factory = NULL;
2086 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
2087
2088 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2089 &element_factory)) {
2090 return -1;
2091 }
2092
2093 if (element_factory) {
2094 Py_INCREF(element_factory);
2095 Py_XDECREF(self_tb->element_factory);
2096 self_tb->element_factory = element_factory;
2097 }
2098
Eli Bendersky58d548d2012-05-29 15:45:16 +03002099 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002100}
2101
Eli Bendersky48d358b2012-05-30 17:57:50 +03002102static int
2103treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2104{
2105 Py_VISIT(self->root);
2106 Py_VISIT(self->this);
2107 Py_VISIT(self->last);
2108 Py_VISIT(self->data);
2109 Py_VISIT(self->stack);
2110 Py_VISIT(self->element_factory);
2111 return 0;
2112}
2113
2114static int
2115treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002116{
2117 Py_XDECREF(self->end_ns_event_obj);
2118 Py_XDECREF(self->start_ns_event_obj);
2119 Py_XDECREF(self->end_event_obj);
2120 Py_XDECREF(self->start_event_obj);
2121 Py_XDECREF(self->events);
2122 Py_DECREF(self->stack);
2123 Py_XDECREF(self->data);
2124 Py_DECREF(self->last);
2125 Py_DECREF(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002126 Py_CLEAR(self->element_factory);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002127 Py_XDECREF(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002128 return 0;
2129}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002130
Eli Bendersky48d358b2012-05-30 17:57:50 +03002131static void
2132treebuilder_dealloc(TreeBuilderObject *self)
2133{
2134 PyObject_GC_UnTrack(self);
2135 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002136 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002137}
2138
2139/* -------------------------------------------------------------------- */
2140/* handlers */
2141
2142LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002143treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2144 PyObject* attrib)
2145{
2146 PyObject* node;
2147 PyObject* this;
2148
2149 if (self->data) {
2150 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002151 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002152 self->last->text = JOIN_SET(
2153 self->data, PyList_CheckExact(self->data)
2154 );
2155 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002156 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002157 self->last->tail = JOIN_SET(
2158 self->data, PyList_CheckExact(self->data)
2159 );
2160 }
2161 self->data = NULL;
2162 }
2163
Eli Bendersky48d358b2012-05-30 17:57:50 +03002164 if (self->element_factory) {
2165 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2166 } else {
2167 node = create_new_element(tag, attrib);
2168 }
2169 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002170 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002171 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002172
2173 this = (PyObject*) self->this;
2174
2175 if (this != Py_None) {
2176 if (element_add_subelement((ElementObject*) this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002177 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002178 } else {
2179 if (self->root) {
2180 PyErr_SetString(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002181 elementtree_parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002182 "multiple elements on top level"
2183 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002184 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002185 }
2186 Py_INCREF(node);
2187 self->root = node;
2188 }
2189
2190 if (self->index < PyList_GET_SIZE(self->stack)) {
2191 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002192 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002193 Py_INCREF(this);
2194 } else {
2195 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002196 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002197 }
2198 self->index++;
2199
2200 Py_DECREF(this);
2201 Py_INCREF(node);
2202 self->this = (ElementObject*) node;
2203
2204 Py_DECREF(self->last);
2205 Py_INCREF(node);
2206 self->last = (ElementObject*) node;
2207
2208 if (self->start_event_obj) {
2209 PyObject* res;
2210 PyObject* action = self->start_event_obj;
2211 res = PyTuple_New(2);
2212 if (res) {
2213 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2214 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2215 PyList_Append(self->events, res);
2216 Py_DECREF(res);
2217 } else
2218 PyErr_Clear(); /* FIXME: propagate error */
2219 }
2220
2221 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002222
2223 error:
2224 Py_DECREF(node);
2225 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002226}
2227
2228LOCAL(PyObject*)
2229treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2230{
2231 if (!self->data) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002232 if (self->last == (ElementObject*) Py_None) {
2233 /* ignore calls to data before the first call to start */
2234 Py_RETURN_NONE;
2235 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002236 /* store the first item as is */
2237 Py_INCREF(data); self->data = data;
2238 } else {
2239 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002240 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2241 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002242 /* expat often generates single character data sections; handle
2243 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002244 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2245 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002246 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002247 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002248 } else if (PyList_CheckExact(self->data)) {
2249 if (PyList_Append(self->data, data) < 0)
2250 return NULL;
2251 } else {
2252 PyObject* list = PyList_New(2);
2253 if (!list)
2254 return NULL;
2255 PyList_SET_ITEM(list, 0, self->data);
2256 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2257 self->data = list;
2258 }
2259 }
2260
2261 Py_RETURN_NONE;
2262}
2263
2264LOCAL(PyObject*)
2265treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2266{
2267 PyObject* item;
2268
2269 if (self->data) {
2270 if (self->this == self->last) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002271 Py_DECREF(JOIN_OBJ(self->last->text));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272 self->last->text = JOIN_SET(
2273 self->data, PyList_CheckExact(self->data)
2274 );
2275 } else {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002276 Py_DECREF(JOIN_OBJ(self->last->tail));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277 self->last->tail = JOIN_SET(
2278 self->data, PyList_CheckExact(self->data)
2279 );
2280 }
2281 self->data = NULL;
2282 }
2283
2284 if (self->index == 0) {
2285 PyErr_SetString(
2286 PyExc_IndexError,
2287 "pop from empty stack"
2288 );
2289 return NULL;
2290 }
2291
2292 self->index--;
2293
2294 item = PyList_GET_ITEM(self->stack, self->index);
2295 Py_INCREF(item);
2296
2297 Py_DECREF(self->last);
2298
2299 self->last = (ElementObject*) self->this;
2300 self->this = (ElementObject*) item;
2301
2302 if (self->end_event_obj) {
2303 PyObject* res;
2304 PyObject* action = self->end_event_obj;
2305 PyObject* node = (PyObject*) self->last;
2306 res = PyTuple_New(2);
2307 if (res) {
2308 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
2309 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
2310 PyList_Append(self->events, res);
2311 Py_DECREF(res);
2312 } else
2313 PyErr_Clear(); /* FIXME: propagate error */
2314 }
2315
2316 Py_INCREF(self->last);
2317 return (PyObject*) self->last;
2318}
2319
2320LOCAL(void)
2321treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002322 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323{
2324 PyObject* res;
2325 PyObject* action;
2326 PyObject* parcel;
2327
2328 if (!self->events)
2329 return;
2330
2331 if (start) {
2332 if (!self->start_ns_event_obj)
2333 return;
2334 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002335 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002336 if (!parcel)
2337 return;
2338 Py_INCREF(action);
2339 } else {
2340 if (!self->end_ns_event_obj)
2341 return;
2342 action = self->end_ns_event_obj;
2343 Py_INCREF(action);
2344 parcel = Py_None;
2345 Py_INCREF(parcel);
2346 }
2347
2348 res = PyTuple_New(2);
2349
2350 if (res) {
2351 PyTuple_SET_ITEM(res, 0, action);
2352 PyTuple_SET_ITEM(res, 1, parcel);
2353 PyList_Append(self->events, res);
2354 Py_DECREF(res);
2355 } else
2356 PyErr_Clear(); /* FIXME: propagate error */
2357}
2358
2359/* -------------------------------------------------------------------- */
2360/* methods (in alphabetical order) */
2361
2362static PyObject*
2363treebuilder_data(TreeBuilderObject* self, PyObject* args)
2364{
2365 PyObject* data;
2366 if (!PyArg_ParseTuple(args, "O:data", &data))
2367 return NULL;
2368
2369 return treebuilder_handle_data(self, data);
2370}
2371
2372static PyObject*
2373treebuilder_end(TreeBuilderObject* self, PyObject* args)
2374{
2375 PyObject* tag;
2376 if (!PyArg_ParseTuple(args, "O:end", &tag))
2377 return NULL;
2378
2379 return treebuilder_handle_end(self, tag);
2380}
2381
2382LOCAL(PyObject*)
2383treebuilder_done(TreeBuilderObject* self)
2384{
2385 PyObject* res;
2386
2387 /* FIXME: check stack size? */
2388
2389 if (self->root)
2390 res = self->root;
2391 else
2392 res = Py_None;
2393
2394 Py_INCREF(res);
2395 return res;
2396}
2397
2398static PyObject*
2399treebuilder_close(TreeBuilderObject* self, PyObject* args)
2400{
2401 if (!PyArg_ParseTuple(args, ":close"))
2402 return NULL;
2403
2404 return treebuilder_done(self);
2405}
2406
2407static PyObject*
2408treebuilder_start(TreeBuilderObject* self, PyObject* args)
2409{
2410 PyObject* tag;
2411 PyObject* attrib = Py_None;
2412 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2413 return NULL;
2414
2415 return treebuilder_handle_start(self, tag, attrib);
2416}
2417
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418static PyMethodDef treebuilder_methods[] = {
2419 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2420 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2421 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2423 {NULL, NULL}
2424};
2425
Neal Norwitz227b5332006-03-22 09:28:35 +00002426static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002427 PyVarObject_HEAD_INIT(NULL, 0)
2428 "TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002430 (destructor)treebuilder_dealloc, /* tp_dealloc */
2431 0, /* tp_print */
2432 0, /* tp_getattr */
2433 0, /* tp_setattr */
2434 0, /* tp_reserved */
2435 0, /* tp_repr */
2436 0, /* tp_as_number */
2437 0, /* tp_as_sequence */
2438 0, /* tp_as_mapping */
2439 0, /* tp_hash */
2440 0, /* tp_call */
2441 0, /* tp_str */
2442 0, /* tp_getattro */
2443 0, /* tp_setattro */
2444 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002445 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2446 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002447 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002448 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2449 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002450 0, /* tp_richcompare */
2451 0, /* tp_weaklistoffset */
2452 0, /* tp_iter */
2453 0, /* tp_iternext */
2454 treebuilder_methods, /* tp_methods */
2455 0, /* tp_members */
2456 0, /* tp_getset */
2457 0, /* tp_base */
2458 0, /* tp_dict */
2459 0, /* tp_descr_get */
2460 0, /* tp_descr_set */
2461 0, /* tp_dictoffset */
2462 (initproc)treebuilder_init, /* tp_init */
2463 PyType_GenericAlloc, /* tp_alloc */
2464 treebuilder_new, /* tp_new */
2465 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002466};
2467
2468/* ==================================================================== */
2469/* the expat interface */
2470
2471#if defined(USE_EXPAT)
2472
2473#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474#include "pyexpat.h"
Eli Bendersky20d41742012-06-01 09:48:37 +03002475static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477
Eli Bendersky52467b12012-06-01 07:13:08 +03002478static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2479 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2480
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481typedef struct {
2482 PyObject_HEAD
2483
2484 XML_Parser parser;
2485
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002486 PyObject *target;
2487 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002489 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002491 PyObject *handle_start;
2492 PyObject *handle_data;
2493 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002494
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002495 PyObject *handle_comment;
2496 PyObject *handle_pi;
2497 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002499 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002500
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002501} XMLParserObject;
2502
Neal Norwitz227b5332006-03-22 09:28:35 +00002503static PyTypeObject XMLParser_Type;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002505#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2506
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507/* helpers */
2508
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509LOCAL(PyObject*)
2510makeuniversal(XMLParserObject* self, const char* string)
2511{
2512 /* convert a UTF-8 tag/attribute name from the expat parser
2513 to a universal name string */
2514
2515 int size = strlen(string);
2516 PyObject* key;
2517 PyObject* value;
2518
2519 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002520 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 if (!key)
2522 return NULL;
2523
2524 value = PyDict_GetItem(self->names, key);
2525
2526 if (value) {
2527 Py_INCREF(value);
2528 } else {
2529 /* new name. convert to universal name, and decode as
2530 necessary */
2531
2532 PyObject* tag;
2533 char* p;
2534 int i;
2535
2536 /* look for namespace separator */
2537 for (i = 0; i < size; i++)
2538 if (string[i] == '}')
2539 break;
2540 if (i != size) {
2541 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002542 tag = PyBytes_FromStringAndSize(NULL, size+1);
2543 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 p[0] = '{';
2545 memcpy(p+1, string, size);
2546 size++;
2547 } else {
2548 /* plain name; use key as tag */
2549 Py_INCREF(key);
2550 tag = key;
2551 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002552
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002554 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002555 value = PyUnicode_DecodeUTF8(p, size, "strict");
2556 Py_DECREF(tag);
2557 if (!value) {
2558 Py_DECREF(key);
2559 return NULL;
2560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561
2562 /* add to names dictionary */
2563 if (PyDict_SetItem(self->names, key, value) < 0) {
2564 Py_DECREF(key);
2565 Py_DECREF(value);
2566 return NULL;
2567 }
2568 }
2569
2570 Py_DECREF(key);
2571 return value;
2572}
2573
Eli Bendersky5b77d812012-03-16 08:20:05 +02002574/* Set the ParseError exception with the given parameters.
2575 * If message is not NULL, it's used as the error string. Otherwise, the
2576 * message string is the default for the given error_code.
2577*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002578static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002579expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002580{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002581 PyObject *errmsg, *error, *position, *code;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002582
Victor Stinner499dfcf2011-03-21 13:26:24 +01002583 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002584 message ? message : EXPAT(ErrorString)(error_code),
2585 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002586 if (errmsg == NULL)
2587 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002588
Victor Stinner499dfcf2011-03-21 13:26:24 +01002589 error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
2590 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002591 if (!error)
2592 return;
2593
Eli Bendersky5b77d812012-03-16 08:20:05 +02002594 /* Add code and position attributes */
2595 code = PyLong_FromLong((long)error_code);
2596 if (!code) {
2597 Py_DECREF(error);
2598 return;
2599 }
2600 if (PyObject_SetAttrString(error, "code", code) == -1) {
2601 Py_DECREF(error);
2602 Py_DECREF(code);
2603 return;
2604 }
2605 Py_DECREF(code);
2606
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002607 position = Py_BuildValue("(ii)", line, column);
2608 if (!position) {
2609 Py_DECREF(error);
2610 return;
2611 }
2612 if (PyObject_SetAttrString(error, "position", position) == -1) {
2613 Py_DECREF(error);
2614 Py_DECREF(position);
2615 return;
2616 }
2617 Py_DECREF(position);
2618
2619 PyErr_SetObject(elementtree_parseerror_obj, error);
2620 Py_DECREF(error);
2621}
2622
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623/* -------------------------------------------------------------------- */
2624/* handlers */
2625
2626static void
2627expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2628 int data_len)
2629{
2630 PyObject* key;
2631 PyObject* value;
2632 PyObject* res;
2633
2634 if (data_len < 2 || data_in[0] != '&')
2635 return;
2636
Neal Norwitz0269b912007-08-08 06:56:02 +00002637 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638 if (!key)
2639 return;
2640
2641 value = PyDict_GetItem(self->entity, key);
2642
2643 if (value) {
2644 if (TreeBuilder_CheckExact(self->target))
2645 res = treebuilder_handle_data(
2646 (TreeBuilderObject*) self->target, value
2647 );
2648 else if (self->handle_data)
2649 res = PyObject_CallFunction(self->handle_data, "O", value);
2650 else
2651 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002653 } else if (!PyErr_Occurred()) {
2654 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002655 char message[128] = "undefined entity ";
2656 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002657 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002658 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002659 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002660 EXPAT(GetErrorColumnNumber)(self->parser),
2661 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662 );
2663 }
2664
2665 Py_DECREF(key);
2666}
2667
2668static void
2669expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2670 const XML_Char **attrib_in)
2671{
2672 PyObject* res;
2673 PyObject* tag;
2674 PyObject* attrib;
2675 int ok;
2676
2677 /* tag name */
2678 tag = makeuniversal(self, tag_in);
2679 if (!tag)
2680 return; /* parser will look for errors */
2681
2682 /* attributes */
2683 if (attrib_in[0]) {
2684 attrib = PyDict_New();
2685 if (!attrib)
2686 return;
2687 while (attrib_in[0] && attrib_in[1]) {
2688 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002689 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690 if (!key || !value) {
2691 Py_XDECREF(value);
2692 Py_XDECREF(key);
2693 Py_DECREF(attrib);
2694 return;
2695 }
2696 ok = PyDict_SetItem(attrib, key, value);
2697 Py_DECREF(value);
2698 Py_DECREF(key);
2699 if (ok < 0) {
2700 Py_DECREF(attrib);
2701 return;
2702 }
2703 attrib_in += 2;
2704 }
2705 } else {
2706 Py_INCREF(Py_None);
2707 attrib = Py_None;
2708 }
2709
Eli Bendersky48d358b2012-05-30 17:57:50 +03002710 /* If we get None, pass an empty dictionary on */
2711 if (attrib == Py_None) {
2712 Py_DECREF(attrib);
2713 attrib = PyDict_New();
2714 if (!attrib)
2715 return;
2716 }
2717
2718 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719 /* shortcut */
2720 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2721 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002722 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002723 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002725 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726 res = NULL;
2727
2728 Py_DECREF(tag);
2729 Py_DECREF(attrib);
2730
2731 Py_XDECREF(res);
2732}
2733
2734static void
2735expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2736 int data_len)
2737{
2738 PyObject* data;
2739 PyObject* res;
2740
Neal Norwitz0269b912007-08-08 06:56:02 +00002741 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002742 if (!data)
2743 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744
2745 if (TreeBuilder_CheckExact(self->target))
2746 /* shortcut */
2747 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2748 else if (self->handle_data)
2749 res = PyObject_CallFunction(self->handle_data, "O", data);
2750 else
2751 res = NULL;
2752
2753 Py_DECREF(data);
2754
2755 Py_XDECREF(res);
2756}
2757
2758static void
2759expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2760{
2761 PyObject* tag;
2762 PyObject* res = NULL;
2763
2764 if (TreeBuilder_CheckExact(self->target))
2765 /* shortcut */
2766 /* the standard tree builder doesn't look at the end tag */
2767 res = treebuilder_handle_end(
2768 (TreeBuilderObject*) self->target, Py_None
2769 );
2770 else if (self->handle_end) {
2771 tag = makeuniversal(self, tag_in);
2772 if (tag) {
2773 res = PyObject_CallFunction(self->handle_end, "O", tag);
2774 Py_DECREF(tag);
2775 }
2776 }
2777
2778 Py_XDECREF(res);
2779}
2780
2781static void
2782expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2783 const XML_Char *uri)
2784{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002785 PyObject* sprefix = NULL;
2786 PyObject* suri = NULL;
2787
2788 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
2789 if (!suri)
2790 return;
2791
2792 if (prefix)
2793 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
2794 else
2795 sprefix = PyUnicode_FromString("");
2796 if (!sprefix) {
2797 Py_DECREF(suri);
2798 return;
2799 }
2800
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002802 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002804
2805 Py_DECREF(sprefix);
2806 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807}
2808
2809static void
2810expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2811{
2812 treebuilder_handle_namespace(
2813 (TreeBuilderObject*) self->target, 0, NULL, NULL
2814 );
2815}
2816
2817static void
2818expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2819{
2820 PyObject* comment;
2821 PyObject* res;
2822
2823 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002824 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825 if (comment) {
2826 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2827 Py_XDECREF(res);
2828 Py_DECREF(comment);
2829 }
2830 }
2831}
2832
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002833static void
2834expat_start_doctype_handler(XMLParserObject *self,
2835 const XML_Char *doctype_name,
2836 const XML_Char *sysid,
2837 const XML_Char *pubid,
2838 int has_internal_subset)
2839{
2840 PyObject *self_pyobj = (PyObject *)self;
2841 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
2842 PyObject *parser_doctype = NULL;
2843 PyObject *res = NULL;
2844
2845 doctype_name_obj = makeuniversal(self, doctype_name);
2846 if (!doctype_name_obj)
2847 return;
2848
2849 if (sysid) {
2850 sysid_obj = makeuniversal(self, sysid);
2851 if (!sysid_obj) {
2852 Py_DECREF(doctype_name_obj);
2853 return;
2854 }
2855 } else {
2856 Py_INCREF(Py_None);
2857 sysid_obj = Py_None;
2858 }
2859
2860 if (pubid) {
2861 pubid_obj = makeuniversal(self, pubid);
2862 if (!pubid_obj) {
2863 Py_DECREF(doctype_name_obj);
2864 Py_DECREF(sysid_obj);
2865 return;
2866 }
2867 } else {
2868 Py_INCREF(Py_None);
2869 pubid_obj = Py_None;
2870 }
2871
2872 /* If the target has a handler for doctype, call it. */
2873 if (self->handle_doctype) {
2874 res = PyObject_CallFunction(self->handle_doctype, "OOO",
2875 doctype_name_obj, pubid_obj, sysid_obj);
2876 Py_CLEAR(res);
2877 }
2878
2879 /* Now see if the parser itself has a doctype method. If yes and it's
2880 * a subclass, call it but warn about deprecation. If it's not a subclass
2881 * (i.e. vanilla XMLParser), do nothing.
2882 */
2883 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
2884 if (parser_doctype) {
2885 if (!XMLParser_CheckExact(self_pyobj)) {
2886 if (PyErr_WarnEx(PyExc_DeprecationWarning,
2887 "This method of XMLParser is deprecated. Define"
2888 " doctype() method on the TreeBuilder target.",
2889 1) < 0) {
2890 goto clear;
2891 }
2892 res = PyObject_CallFunction(parser_doctype, "OOO",
2893 doctype_name_obj, pubid_obj, sysid_obj);
2894 Py_CLEAR(res);
2895 }
2896 }
2897
2898clear:
2899 Py_XDECREF(parser_doctype);
2900 Py_DECREF(doctype_name_obj);
2901 Py_DECREF(pubid_obj);
2902 Py_DECREF(sysid_obj);
2903}
2904
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002905static void
2906expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2907 const XML_Char* data_in)
2908{
2909 PyObject* target;
2910 PyObject* data;
2911 PyObject* res;
2912
2913 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00002914 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2915 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002916 if (target && data) {
2917 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2918 Py_XDECREF(res);
2919 Py_DECREF(data);
2920 Py_DECREF(target);
2921 } else {
2922 Py_XDECREF(data);
2923 Py_XDECREF(target);
2924 }
2925 }
2926}
2927
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928static int
2929expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2930 XML_Encoding *info)
2931{
2932 PyObject* u;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 unsigned char s[256];
2934 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002935 void *data;
2936 unsigned int kind;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937
2938 memset(info, 0, sizeof(XML_Encoding));
2939
2940 for (i = 0; i < 256; i++)
2941 s[i] = i;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002942
Fredrik Lundhc3389992005-12-25 11:40:19 +00002943 u = PyUnicode_Decode((char*) s, 256, name, "replace");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 if (!u)
2945 return XML_STATUS_ERROR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002946 if (PyUnicode_READY(u))
2947 return XML_STATUS_ERROR;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002949 if (PyUnicode_GET_LENGTH(u) != 256) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950 Py_DECREF(u);
2951 return XML_STATUS_ERROR;
2952 }
2953
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002954 kind = PyUnicode_KIND(u);
2955 data = PyUnicode_DATA(u);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 for (i = 0; i < 256; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002957 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2958 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
2959 info->map[i] = ch;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 else
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002961 info->map[i] = -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 }
2963
2964 Py_DECREF(u);
2965
2966 return XML_STATUS_OK;
2967}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968
2969/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970
Eli Bendersky52467b12012-06-01 07:13:08 +03002971static PyObject *
2972xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973{
Eli Bendersky52467b12012-06-01 07:13:08 +03002974 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
2975 if (self) {
2976 self->parser = NULL;
2977 self->target = self->entity = self->names = NULL;
2978 self->handle_start = self->handle_data = self->handle_end = NULL;
2979 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002980 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 }
Eli Bendersky52467b12012-06-01 07:13:08 +03002982 return (PyObject *)self;
2983}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002984
Eli Bendersky52467b12012-06-01 07:13:08 +03002985static int
2986xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
2987{
2988 XMLParserObject *self_xp = (XMLParserObject *)self;
2989 PyObject *target = NULL, *html = NULL;
2990 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03002991 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992
Eli Bendersky52467b12012-06-01 07:13:08 +03002993 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
2994 &html, &target, &encoding)) {
2995 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002996 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002997
Eli Bendersky52467b12012-06-01 07:13:08 +03002998 self_xp->entity = PyDict_New();
2999 if (!self_xp->entity)
3000 return -1;
3001
3002 self_xp->names = PyDict_New();
3003 if (!self_xp->names) {
3004 Py_XDECREF(self_xp->entity);
3005 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006 }
3007
Eli Bendersky52467b12012-06-01 07:13:08 +03003008 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3009 if (!self_xp->parser) {
3010 Py_XDECREF(self_xp->entity);
3011 Py_XDECREF(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003013 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 }
3015
Eli Bendersky52467b12012-06-01 07:13:08 +03003016 if (target) {
3017 Py_INCREF(target);
3018 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003019 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003020 if (!target) {
Eli Bendersky52467b12012-06-01 07:13:08 +03003021 Py_XDECREF(self_xp->entity);
3022 Py_XDECREF(self_xp->names);
3023 EXPAT(ParserFree)(self_xp->parser);
3024 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003025 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003026 }
3027 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028
Eli Bendersky52467b12012-06-01 07:13:08 +03003029 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3030 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3031 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3032 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3033 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3034 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003035 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036
3037 PyErr_Clear();
Eli Bendersky52467b12012-06-01 07:13:08 +03003038
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003040 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003042 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003043 (XML_StartElementHandler) expat_start_handler,
3044 (XML_EndElementHandler) expat_end_handler
3045 );
3046 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003047 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048 (XML_DefaultHandler) expat_default_handler
3049 );
3050 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003051 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052 (XML_CharacterDataHandler) expat_data_handler
3053 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003054 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003056 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003057 (XML_CommentHandler) expat_comment_handler
3058 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003059 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003061 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 (XML_ProcessingInstructionHandler) expat_pi_handler
3063 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003064 EXPAT(SetStartDoctypeDeclHandler)(
3065 self_xp->parser,
3066 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3067 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003069 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
3071 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072
Eli Bendersky52467b12012-06-01 07:13:08 +03003073 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003074}
3075
Eli Bendersky52467b12012-06-01 07:13:08 +03003076static int
3077xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3078{
3079 Py_VISIT(self->handle_close);
3080 Py_VISIT(self->handle_pi);
3081 Py_VISIT(self->handle_comment);
3082 Py_VISIT(self->handle_end);
3083 Py_VISIT(self->handle_data);
3084 Py_VISIT(self->handle_start);
3085
3086 Py_VISIT(self->target);
3087 Py_VISIT(self->entity);
3088 Py_VISIT(self->names);
3089
3090 return 0;
3091}
3092
3093static int
3094xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003095{
3096 EXPAT(ParserFree)(self->parser);
3097
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003098 Py_XDECREF(self->handle_close);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099 Py_XDECREF(self->handle_pi);
3100 Py_XDECREF(self->handle_comment);
3101 Py_XDECREF(self->handle_end);
3102 Py_XDECREF(self->handle_data);
3103 Py_XDECREF(self->handle_start);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003104 Py_XDECREF(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105
Eli Bendersky52467b12012-06-01 07:13:08 +03003106 Py_XDECREF(self->target);
3107 Py_XDECREF(self->entity);
3108 Py_XDECREF(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109
Eli Bendersky52467b12012-06-01 07:13:08 +03003110 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003111}
3112
Eli Bendersky52467b12012-06-01 07:13:08 +03003113static void
3114xmlparser_dealloc(XMLParserObject* self)
3115{
3116 PyObject_GC_UnTrack(self);
3117 xmlparser_gc_clear(self);
3118 Py_TYPE(self)->tp_free((PyObject *)self);
3119}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120
3121LOCAL(PyObject*)
3122expat_parse(XMLParserObject* self, char* data, int data_len, int final)
3123{
3124 int ok;
3125
3126 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3127
3128 if (PyErr_Occurred())
3129 return NULL;
3130
3131 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003132 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003133 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003135 EXPAT(GetErrorColumnNumber)(self->parser),
3136 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003137 );
3138 return NULL;
3139 }
3140
3141 Py_RETURN_NONE;
3142}
3143
3144static PyObject*
3145xmlparser_close(XMLParserObject* self, PyObject* args)
3146{
3147 /* end feeding data to parser */
3148
3149 PyObject* res;
3150 if (!PyArg_ParseTuple(args, ":close"))
3151 return NULL;
3152
3153 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003154 if (!res)
3155 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003156
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003157 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003158 Py_DECREF(res);
3159 return treebuilder_done((TreeBuilderObject*) self->target);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003160 } if (self->handle_close) {
3161 Py_DECREF(res);
3162 return PyObject_CallFunction(self->handle_close, "");
3163 } else
3164 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003165}
3166
3167static PyObject*
3168xmlparser_feed(XMLParserObject* self, PyObject* args)
3169{
3170 /* feed data to parser */
3171
3172 char* data;
3173 int data_len;
3174 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
3175 return NULL;
3176
3177 return expat_parse(self, data, data_len, 0);
3178}
3179
3180static PyObject*
3181xmlparser_parse(XMLParserObject* self, PyObject* args)
3182{
3183 /* (internal) parse until end of input stream */
3184
3185 PyObject* reader;
3186 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003187 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188 PyObject* res;
3189
3190 PyObject* fileobj;
3191 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3192 return NULL;
3193
3194 reader = PyObject_GetAttrString(fileobj, "read");
3195 if (!reader)
3196 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003198 /* read from open file object */
3199 for (;;) {
3200
3201 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3202
3203 if (!buffer) {
3204 /* read failed (e.g. due to KeyboardInterrupt) */
3205 Py_DECREF(reader);
3206 return NULL;
3207 }
3208
Eli Benderskyf996e772012-03-16 05:53:30 +02003209 if (PyUnicode_CheckExact(buffer)) {
3210 /* A unicode object is encoded into bytes using UTF-8 */
3211 if (PyUnicode_GET_SIZE(buffer) == 0) {
3212 Py_DECREF(buffer);
3213 break;
3214 }
3215 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3216 if (!temp) {
3217 /* Propagate exception from PyUnicode_AsEncodedString */
3218 Py_DECREF(buffer);
3219 Py_DECREF(reader);
3220 return NULL;
3221 }
3222
3223 /* Here we no longer need the original buffer since it contains
3224 * unicode. Make it point to the encoded bytes object.
3225 */
3226 Py_DECREF(buffer);
3227 buffer = temp;
3228 }
3229 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003230 Py_DECREF(buffer);
3231 break;
3232 }
3233
3234 res = expat_parse(
Christian Heimes72b710a2008-05-26 13:28:38 +00003235 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 );
3237
3238 Py_DECREF(buffer);
3239
3240 if (!res) {
3241 Py_DECREF(reader);
3242 return NULL;
3243 }
3244 Py_DECREF(res);
3245
3246 }
3247
3248 Py_DECREF(reader);
3249
3250 res = expat_parse(self, "", 0, 1);
3251
3252 if (res && TreeBuilder_CheckExact(self->target)) {
3253 Py_DECREF(res);
3254 return treebuilder_done((TreeBuilderObject*) self->target);
3255 }
3256
3257 return res;
3258}
3259
3260static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003261xmlparser_doctype(XMLParserObject *self, PyObject *args)
3262{
3263 Py_RETURN_NONE;
3264}
3265
3266static PyObject*
3267xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268{
3269 /* activate element event reporting */
3270
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003271 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003272 TreeBuilderObject* target;
3273
3274 PyObject* events; /* event collector */
3275 PyObject* event_set = Py_None;
3276 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
3277 &event_set))
3278 return NULL;
3279
3280 if (!TreeBuilder_CheckExact(self->target)) {
3281 PyErr_SetString(
3282 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003283 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 "targets"
3285 );
3286 return NULL;
3287 }
3288
3289 target = (TreeBuilderObject*) self->target;
3290
3291 Py_INCREF(events);
3292 Py_XDECREF(target->events);
3293 target->events = events;
3294
3295 /* clear out existing events */
3296 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
3297 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
3298 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
3299 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
3300
3301 if (event_set == Py_None) {
3302 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003303 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 Py_RETURN_NONE;
3305 }
3306
3307 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
3308 goto error;
3309
3310 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
3311 PyObject* item = PyTuple_GET_ITEM(event_set, i);
3312 char* event;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003313 if (PyUnicode_Check(item)) {
3314 event = _PyUnicode_AsString(item);
Victor Stinner0477bf32010-03-22 12:11:44 +00003315 if (event == NULL)
3316 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003317 } else if (PyBytes_Check(item))
3318 event = PyBytes_AS_STRING(item);
3319 else {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320 goto error;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003321 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003322 if (strcmp(event, "start") == 0) {
3323 Py_INCREF(item);
3324 target->start_event_obj = item;
3325 } else if (strcmp(event, "end") == 0) {
3326 Py_INCREF(item);
3327 Py_XDECREF(target->end_event_obj);
3328 target->end_event_obj = item;
3329 } else if (strcmp(event, "start-ns") == 0) {
3330 Py_INCREF(item);
3331 Py_XDECREF(target->start_ns_event_obj);
3332 target->start_ns_event_obj = item;
3333 EXPAT(SetNamespaceDeclHandler)(
3334 self->parser,
3335 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3336 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3337 );
3338 } else if (strcmp(event, "end-ns") == 0) {
3339 Py_INCREF(item);
3340 Py_XDECREF(target->end_ns_event_obj);
3341 target->end_ns_event_obj = item;
3342 EXPAT(SetNamespaceDeclHandler)(
3343 self->parser,
3344 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3345 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3346 );
3347 } else {
3348 PyErr_Format(
3349 PyExc_ValueError,
3350 "unknown event '%s'", event
3351 );
3352 return NULL;
3353 }
3354 }
3355
3356 Py_RETURN_NONE;
3357
3358 error:
3359 PyErr_SetString(
3360 PyExc_TypeError,
3361 "invalid event tuple"
3362 );
3363 return NULL;
3364}
3365
3366static PyMethodDef xmlparser_methods[] = {
3367 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
3368 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
3369 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
3370 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003371 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003372 {NULL, NULL}
3373};
3374
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003375static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003376xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003377{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003378 if (PyUnicode_Check(nameobj)) {
3379 PyObject* res;
3380 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3381 res = self->entity;
3382 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3383 res = self->target;
3384 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3385 return PyUnicode_FromFormat(
3386 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003388 }
3389 else
3390 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003391
Alexander Belopolskye239d232010-12-08 23:31:48 +00003392 Py_INCREF(res);
3393 return res;
3394 }
3395 generic:
3396 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397}
3398
Neal Norwitz227b5332006-03-22 09:28:35 +00003399static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003400 PyVarObject_HEAD_INIT(NULL, 0)
3401 "XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003403 (destructor)xmlparser_dealloc, /* tp_dealloc */
3404 0, /* tp_print */
3405 0, /* tp_getattr */
3406 0, /* tp_setattr */
3407 0, /* tp_reserved */
3408 0, /* tp_repr */
3409 0, /* tp_as_number */
3410 0, /* tp_as_sequence */
3411 0, /* tp_as_mapping */
3412 0, /* tp_hash */
3413 0, /* tp_call */
3414 0, /* tp_str */
3415 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3416 0, /* tp_setattro */
3417 0, /* tp_as_buffer */
3418 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3419 /* tp_flags */
3420 0, /* tp_doc */
3421 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3422 (inquiry)xmlparser_gc_clear, /* tp_clear */
3423 0, /* tp_richcompare */
3424 0, /* tp_weaklistoffset */
3425 0, /* tp_iter */
3426 0, /* tp_iternext */
3427 xmlparser_methods, /* tp_methods */
3428 0, /* tp_members */
3429 0, /* tp_getset */
3430 0, /* tp_base */
3431 0, /* tp_dict */
3432 0, /* tp_descr_get */
3433 0, /* tp_descr_set */
3434 0, /* tp_dictoffset */
3435 (initproc)xmlparser_init, /* tp_init */
3436 PyType_GenericAlloc, /* tp_alloc */
3437 xmlparser_new, /* tp_new */
3438 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003439};
3440
3441#endif
3442
3443/* ==================================================================== */
3444/* python module interface */
3445
3446static PyMethodDef _functions[] = {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003447 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003448 {NULL, NULL}
3449};
3450
Martin v. Löwis1a214512008-06-11 05:26:20 +00003451
3452static struct PyModuleDef _elementtreemodule = {
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003453 PyModuleDef_HEAD_INIT,
3454 "_elementtree",
3455 NULL,
3456 -1,
3457 _functions,
3458 NULL,
3459 NULL,
3460 NULL,
3461 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003462};
3463
Neal Norwitzf6657e62006-12-28 04:47:50 +00003464PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003465PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003466{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003467 PyObject *m, *temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003469 /* Initialize object types */
3470 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003471 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003472 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003473 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474#if defined(USE_EXPAT)
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003475 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003476 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003477#endif
3478
Martin v. Löwis1a214512008-06-11 05:26:20 +00003479 m = PyModule_Create(&_elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003480 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003481 return NULL;
3482
Eli Bendersky828efde2012-04-05 05:40:58 +03003483 if (!(temp = PyImport_ImportModule("copy")))
3484 return NULL;
3485 elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
3486 Py_XDECREF(temp);
3487
3488 if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
3489 return NULL;
3490
Eli Bendersky20d41742012-06-01 09:48:37 +03003491 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003492 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3493 if (expat_capi) {
3494 /* check that it's usable */
3495 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3496 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3497 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3498 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003499 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003500 expat_capi = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003501 }
3502 }
3503 if (!expat_capi) {
3504 PyErr_SetString(
3505 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
3506 );
3507 return NULL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003508 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003509
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003510 elementtree_parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003511 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003512 );
3513 Py_INCREF(elementtree_parseerror_obj);
3514 PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
3515
Eli Bendersky092af1f2012-03-04 07:14:03 +02003516 Py_INCREF((PyObject *)&Element_Type);
3517 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3518
Eli Bendersky58d548d2012-05-29 15:45:16 +03003519 Py_INCREF((PyObject *)&TreeBuilder_Type);
3520 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3521
Eli Bendersky52467b12012-06-01 07:13:08 +03003522#if defined(USE_EXPAT)
3523 Py_INCREF((PyObject *)&XMLParser_Type);
3524 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
3525#endif
3526
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003527 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003528}