| Eli Bendersky | bf05df2 | 2013-04-20 05:44:01 -0700 | [diff] [blame] | 1 | /*-------------------------------------------------------------------- | 
 | 2 |  * Licensed to PSF under a Contributor Agreement. | 
 | 3 |  * See http://www.python.org/psf/license for licensing details. | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 4 |  * | 
| Eli Bendersky | bf05df2 | 2013-04-20 05:44:01 -0700 | [diff] [blame] | 5 |  * _elementtree - C accelerator for xml.etree.ElementTree | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 6 |  * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved. | 
 | 7 |  * Copyright (c) 1999-2009 by Fredrik Lundh. | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 8 |  * | 
 | 9 |  * info@pythonware.com | 
 | 10 |  * http://www.pythonware.com | 
| Eli Bendersky | bf05df2 | 2013-04-20 05:44:01 -0700 | [diff] [blame] | 11 |  *-------------------------------------------------------------------- | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 12 |  */ | 
 | 13 |  | 
 | 14 | #include "Python.h" | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 15 | #include "structmember.h" | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 16 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 17 | /* -------------------------------------------------------------------- */ | 
 | 18 | /* configuration */ | 
 | 19 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 20 | /* An element can hold this many children without extra memory | 
 | 21 |    allocations. */ | 
 | 22 | #define STATIC_CHILDREN 4 | 
 | 23 |  | 
 | 24 | /* For best performance, chose a value so that 80-90% of all nodes | 
 | 25 |    have no more than the given number of children.  Set this to zero | 
 | 26 |    to minimize the size of the element structure itself (this only | 
 | 27 |    helps if you have lots of leaf nodes with attributes). */ | 
 | 28 |  | 
 | 29 | /* Also note that pymalloc always allocates blocks in multiples of | 
| Florent Xicluna | a72a98f | 2012-02-13 11:03:30 +0100 | [diff] [blame] | 30 |    eight bytes.  For the current C version of ElementTree, this means | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 31 |    that the number of children should be an even number, at least on | 
 | 32 |    32-bit platforms. */ | 
 | 33 |  | 
 | 34 | /* -------------------------------------------------------------------- */ | 
 | 35 |  | 
 | 36 | #if 0 | 
 | 37 | static int memory = 0; | 
 | 38 | #define ALLOC(size, comment)\ | 
 | 39 | do { memory += size; printf("%8d - %s\n", memory, comment); } while (0) | 
 | 40 | #define RELEASE(size, comment)\ | 
 | 41 | do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) | 
 | 42 | #else | 
 | 43 | #define ALLOC(size, comment) | 
 | 44 | #define RELEASE(size, comment) | 
 | 45 | #endif | 
 | 46 |  | 
 | 47 | /* compiler tweaks */ | 
 | 48 | #if defined(_MSC_VER) | 
 | 49 | #define LOCAL(type) static __inline type __fastcall | 
 | 50 | #else | 
 | 51 | #define LOCAL(type) static type | 
 | 52 | #endif | 
 | 53 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 54 | /* macros used to store 'join' flags in string object pointers.  note | 
 | 55 |    that all use of text and tail as object pointers must be wrapped in | 
 | 56 |    JOIN_OBJ.  see comments in the ElementObject definition for more | 
 | 57 |    info. */ | 
 | 58 | #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1) | 
 | 59 | #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag))) | 
| Antoine Pitrou | ca8aa4a | 2012-09-20 20:56:47 +0200 | [diff] [blame] | 60 | #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1)) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 61 |  | 
 | 62 | /* glue functions (see the init function for details) */ | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 63 | static PyObject* elementtree_parseerror_obj; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 64 | static PyObject* elementtree_deepcopy_obj; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 65 | static PyObject* elementpath_obj; | 
 | 66 |  | 
 | 67 | /* helpers */ | 
 | 68 |  | 
 | 69 | LOCAL(PyObject*) | 
 | 70 | deepcopy(PyObject* object, PyObject* memo) | 
 | 71 | { | 
 | 72 |     /* do a deep copy of the given object */ | 
 | 73 |  | 
 | 74 |     PyObject* args; | 
 | 75 |     PyObject* result; | 
 | 76 |  | 
 | 77 |     if (!elementtree_deepcopy_obj) { | 
 | 78 |         PyErr_SetString( | 
 | 79 |             PyExc_RuntimeError, | 
 | 80 |             "deepcopy helper not found" | 
 | 81 |             ); | 
 | 82 |         return NULL; | 
 | 83 |     } | 
 | 84 |  | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 85 |     args = PyTuple_Pack(2, object, memo); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 86 |     if (!args) | 
 | 87 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 88 |     result = PyObject_CallObject(elementtree_deepcopy_obj, args); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 89 |     Py_DECREF(args); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 90 |     return result; | 
 | 91 | } | 
 | 92 |  | 
 | 93 | LOCAL(PyObject*) | 
 | 94 | list_join(PyObject* list) | 
 | 95 | { | 
 | 96 |     /* join list elements (destroying the list in the process) */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 97 |     PyObject* joiner; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 98 |     PyObject* result; | 
 | 99 |  | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 100 |     joiner = PyUnicode_FromStringAndSize("", 0); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 101 |     if (!joiner) | 
 | 102 |         return NULL; | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 103 |     result = PyUnicode_Join(joiner, list); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 104 |     Py_DECREF(joiner); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 105 |     if (result) | 
 | 106 |         Py_DECREF(list); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 107 |     return result; | 
 | 108 | } | 
 | 109 |  | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 110 | /* Is the given object an empty dictionary? | 
 | 111 | */ | 
 | 112 | static int | 
 | 113 | is_empty_dict(PyObject *obj) | 
 | 114 | { | 
 | 115 |     return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0; | 
 | 116 | } | 
 | 117 |  | 
 | 118 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 119 | /* -------------------------------------------------------------------- */ | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 120 | /* the Element type */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 121 |  | 
 | 122 | typedef struct { | 
 | 123 |  | 
 | 124 |     /* attributes (a dictionary object), or None if no attributes */ | 
 | 125 |     PyObject* attrib; | 
 | 126 |  | 
 | 127 |     /* child elements */ | 
 | 128 |     int length; /* actual number of items */ | 
 | 129 |     int allocated; /* allocated items */ | 
 | 130 |  | 
 | 131 |     /* this either points to _children or to a malloced buffer */ | 
 | 132 |     PyObject* *children; | 
 | 133 |  | 
 | 134 |     PyObject* _children[STATIC_CHILDREN]; | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 135 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 136 | } ElementObjectExtra; | 
 | 137 |  | 
 | 138 | typedef struct { | 
 | 139 |     PyObject_HEAD | 
 | 140 |  | 
 | 141 |     /* element tag (a string). */ | 
 | 142 |     PyObject* tag; | 
 | 143 |  | 
 | 144 |     /* text before first child.  note that this is a tagged pointer; | 
 | 145 |        use JOIN_OBJ to get the object pointer.  the join flag is used | 
 | 146 |        to distinguish lists created by the tree builder from lists | 
 | 147 |        assigned to the attribute by application code; the former | 
 | 148 |        should be joined before being returned to the user, the latter | 
 | 149 |        should be left intact. */ | 
 | 150 |     PyObject* text; | 
 | 151 |  | 
 | 152 |     /* text after this element, in parent.  note that this is a tagged | 
 | 153 |        pointer; use JOIN_OBJ to get the object pointer. */ | 
 | 154 |     PyObject* tail; | 
 | 155 |  | 
 | 156 |     ElementObjectExtra* extra; | 
 | 157 |  | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 158 |     PyObject *weakreflist; /* For tp_weaklistoffset */ | 
 | 159 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 160 | } ElementObject; | 
 | 161 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 162 | static PyTypeObject Element_Type; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 163 |  | 
| Christian Heimes | 90aa764 | 2007-12-19 02:45:37 +0000 | [diff] [blame] | 164 | #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 165 |  | 
 | 166 | /* -------------------------------------------------------------------- */ | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 167 | /* Element constructors and destructor */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 168 |  | 
 | 169 | LOCAL(int) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 170 | create_extra(ElementObject* self, PyObject* attrib) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 171 | { | 
 | 172 |     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); | 
 | 173 |     if (!self->extra) | 
 | 174 |         return -1; | 
 | 175 |  | 
 | 176 |     if (!attrib) | 
 | 177 |         attrib = Py_None; | 
 | 178 |  | 
 | 179 |     Py_INCREF(attrib); | 
 | 180 |     self->extra->attrib = attrib; | 
 | 181 |  | 
 | 182 |     self->extra->length = 0; | 
 | 183 |     self->extra->allocated = STATIC_CHILDREN; | 
 | 184 |     self->extra->children = self->extra->_children; | 
 | 185 |  | 
 | 186 |     return 0; | 
 | 187 | } | 
 | 188 |  | 
 | 189 | LOCAL(void) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 190 | dealloc_extra(ElementObject* self) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 191 | { | 
| Eli Bendersky | 08b8529 | 2012-04-04 15:55:07 +0300 | [diff] [blame] | 192 |     ElementObjectExtra *myextra; | 
 | 193 |     int i; | 
 | 194 |  | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 195 |     if (!self->extra) | 
 | 196 |         return; | 
 | 197 |  | 
 | 198 |     /* Avoid DECREFs calling into this code again (cycles, etc.) | 
 | 199 |     */ | 
| Eli Bendersky | 08b8529 | 2012-04-04 15:55:07 +0300 | [diff] [blame] | 200 |     myextra = self->extra; | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 201 |     self->extra = NULL; | 
 | 202 |  | 
 | 203 |     Py_DECREF(myextra->attrib); | 
 | 204 |  | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 205 |     for (i = 0; i < myextra->length; i++) | 
 | 206 |         Py_DECREF(myextra->children[i]); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 207 |  | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 208 |     if (myextra->children != myextra->_children) | 
 | 209 |         PyObject_Free(myextra->children); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 210 |  | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 211 |     PyObject_Free(myextra); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 212 | } | 
 | 213 |  | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 214 | /* Convenience internal function to create new Element objects with the given | 
 | 215 |  * tag and attributes. | 
 | 216 | */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 217 | LOCAL(PyObject*) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 218 | create_new_element(PyObject* tag, PyObject* attrib) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 219 | { | 
 | 220 |     ElementObject* self; | 
 | 221 |  | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 222 |     self = PyObject_GC_New(ElementObject, &Element_Type); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 223 |     if (self == NULL) | 
 | 224 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 225 |     self->extra = NULL; | 
 | 226 |  | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 227 |     if (attrib != Py_None && !is_empty_dict(attrib)) { | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 228 |         if (create_extra(self, attrib) < 0) { | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 229 |             PyObject_Del(self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 230 |             return NULL; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 231 |         } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 232 |     } | 
 | 233 |  | 
 | 234 |     Py_INCREF(tag); | 
 | 235 |     self->tag = tag; | 
 | 236 |  | 
 | 237 |     Py_INCREF(Py_None); | 
 | 238 |     self->text = Py_None; | 
 | 239 |  | 
 | 240 |     Py_INCREF(Py_None); | 
 | 241 |     self->tail = Py_None; | 
 | 242 |  | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 243 |     self->weakreflist = NULL; | 
 | 244 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 245 |     ALLOC(sizeof(ElementObject), "create element"); | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 246 |     PyObject_GC_Track(self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 247 |     return (PyObject*) self; | 
 | 248 | } | 
 | 249 |  | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 250 | static PyObject * | 
 | 251 | element_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | 
 | 252 | { | 
 | 253 |     ElementObject *e = (ElementObject *)type->tp_alloc(type, 0); | 
 | 254 |     if (e != NULL) { | 
 | 255 |         Py_INCREF(Py_None); | 
 | 256 |         e->tag = Py_None; | 
 | 257 |  | 
 | 258 |         Py_INCREF(Py_None); | 
 | 259 |         e->text = Py_None; | 
 | 260 |  | 
 | 261 |         Py_INCREF(Py_None); | 
 | 262 |         e->tail = Py_None; | 
 | 263 |  | 
 | 264 |         e->extra = NULL; | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 265 |         e->weakreflist = NULL; | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 266 |     } | 
 | 267 |     return (PyObject *)e; | 
 | 268 | } | 
 | 269 |  | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 270 | /* Helper function for extracting the attrib dictionary from a keywords dict. | 
 | 271 |  * This is required by some constructors/functions in this module that can | 
| Eli Bendersky | 4583990 | 2013-01-13 05:14:47 -0800 | [diff] [blame] | 272 |  * either accept attrib as a keyword argument or all attributes splashed | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 273 |  * directly into *kwds. | 
| Eli Bendersky | d4cb4b7 | 2013-04-22 05:25:25 -0700 | [diff] [blame] | 274 |  * | 
 | 275 |  * Return a dictionary with the content of kwds merged into the content of | 
 | 276 |  * attrib. If there is no attrib keyword, return a copy of kwds. | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 277 |  */ | 
 | 278 | static PyObject* | 
 | 279 | get_attrib_from_keywords(PyObject *kwds) | 
 | 280 | { | 
| Eli Bendersky | 45f3d2f | 2013-04-24 05:34:07 -0700 | [diff] [blame] | 281 |     PyObject *attrib_str = PyUnicode_FromString("attrib"); | 
 | 282 |     PyObject *attrib = PyDict_GetItem(kwds, attrib_str); | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 283 |  | 
 | 284 |     if (attrib) { | 
 | 285 |         /* If attrib was found in kwds, copy its value and remove it from | 
 | 286 |          * kwds | 
 | 287 |          */ | 
 | 288 |         if (!PyDict_Check(attrib)) { | 
| Eli Bendersky | 45f3d2f | 2013-04-24 05:34:07 -0700 | [diff] [blame] | 289 |             Py_DECREF(attrib_str); | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 290 |             PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s", | 
 | 291 |                          Py_TYPE(attrib)->tp_name); | 
 | 292 |             return NULL; | 
 | 293 |         } | 
 | 294 |         attrib = PyDict_Copy(attrib); | 
| Eli Bendersky | 45f3d2f | 2013-04-24 05:34:07 -0700 | [diff] [blame] | 295 |         PyDict_DelItem(kwds, attrib_str); | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 296 |     } else { | 
 | 297 |         attrib = PyDict_New(); | 
 | 298 |     } | 
| Eli Bendersky | 45f3d2f | 2013-04-24 05:34:07 -0700 | [diff] [blame] | 299 |  | 
 | 300 |     Py_DECREF(attrib_str); | 
 | 301 |  | 
 | 302 |     /* attrib can be NULL if PyDict_New failed */ | 
 | 303 |     if (attrib) | 
 | 304 |         PyDict_Update(attrib, kwds); | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 305 |     return attrib; | 
 | 306 | } | 
 | 307 |  | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 308 | static int | 
 | 309 | element_init(PyObject *self, PyObject *args, PyObject *kwds) | 
 | 310 | { | 
 | 311 |     PyObject *tag; | 
 | 312 |     PyObject *tmp; | 
 | 313 |     PyObject *attrib = NULL; | 
 | 314 |     ElementObject *self_elem; | 
 | 315 |  | 
 | 316 |     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib)) | 
 | 317 |         return -1; | 
 | 318 |  | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 319 |     if (attrib) { | 
 | 320 |         /* attrib passed as positional arg */ | 
 | 321 |         attrib = PyDict_Copy(attrib); | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 322 |         if (!attrib) | 
 | 323 |             return -1; | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 324 |         if (kwds) { | 
 | 325 |             if (PyDict_Update(attrib, kwds) < 0) { | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 326 |                 Py_DECREF(attrib); | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 327 |                 return -1; | 
 | 328 |             } | 
 | 329 |         } | 
 | 330 |     } else if (kwds) { | 
 | 331 |         /* have keywords args */ | 
 | 332 |         attrib = get_attrib_from_keywords(kwds); | 
 | 333 |         if (!attrib) | 
 | 334 |             return -1; | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 335 |     } | 
 | 336 |  | 
 | 337 |     self_elem = (ElementObject *)self; | 
 | 338 |  | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 339 |     if (attrib != NULL && !is_empty_dict(attrib)) { | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 340 |         if (create_extra(self_elem, attrib) < 0) { | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 341 |             Py_DECREF(attrib); | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 342 |             return -1; | 
 | 343 |         } | 
 | 344 |     } | 
 | 345 |  | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 346 |     /* We own a reference to attrib here and it's no longer needed. */ | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 347 |     Py_XDECREF(attrib); | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 348 |  | 
 | 349 |     /* Replace the objects already pointed to by tag, text and tail. */ | 
 | 350 |     tmp = self_elem->tag; | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 351 |     Py_INCREF(tag); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 352 |     self_elem->tag = tag; | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 353 |     Py_DECREF(tmp); | 
 | 354 |  | 
 | 355 |     tmp = self_elem->text; | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 356 |     Py_INCREF(Py_None); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 357 |     self_elem->text = Py_None; | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 358 |     Py_DECREF(JOIN_OBJ(tmp)); | 
 | 359 |  | 
 | 360 |     tmp = self_elem->tail; | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 361 |     Py_INCREF(Py_None); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 362 |     self_elem->tail = Py_None; | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 363 |     Py_DECREF(JOIN_OBJ(tmp)); | 
 | 364 |  | 
 | 365 |     return 0; | 
 | 366 | } | 
 | 367 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 368 | LOCAL(int) | 
 | 369 | element_resize(ElementObject* self, int extra) | 
 | 370 | { | 
 | 371 |     int size; | 
 | 372 |     PyObject* *children; | 
 | 373 |  | 
 | 374 |     /* make sure self->children can hold the given number of extra | 
 | 375 |        elements.  set an exception and return -1 if allocation failed */ | 
 | 376 |  | 
 | 377 |     if (!self->extra) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 378 |         create_extra(self, NULL); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 379 |  | 
 | 380 |     size = self->extra->length + extra; | 
 | 381 |  | 
 | 382 |     if (size > self->extra->allocated) { | 
 | 383 |         /* use Python 2.4's list growth strategy */ | 
 | 384 |         size = (size >> 3) + (size < 9 ? 3 : 6) + size; | 
| Christian Heimes | 679db4a | 2008-01-18 09:56:22 +0000 | [diff] [blame] | 385 |         /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 386 |          * which needs at least 4 bytes. | 
 | 387 |          * Although it's a false alarm always assume at least one child to | 
| Christian Heimes | 679db4a | 2008-01-18 09:56:22 +0000 | [diff] [blame] | 388 |          * be safe. | 
 | 389 |          */ | 
 | 390 |         size = size ? size : 1; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 391 |         if (self->extra->children != self->extra->_children) { | 
| Christian Heimes | 679db4a | 2008-01-18 09:56:22 +0000 | [diff] [blame] | 392 |             /* Coverity CID #182 size_error: Allocating 1 bytes to pointer | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 393 |              * "children", which needs at least 4 bytes. Although it's a | 
| Christian Heimes | 679db4a | 2008-01-18 09:56:22 +0000 | [diff] [blame] | 394 |              * false alarm always assume at least one child to be safe. | 
 | 395 |              */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 396 |             children = PyObject_Realloc(self->extra->children, | 
 | 397 |                                         size * sizeof(PyObject*)); | 
 | 398 |             if (!children) | 
 | 399 |                 goto nomemory; | 
 | 400 |         } else { | 
 | 401 |             children = PyObject_Malloc(size * sizeof(PyObject*)); | 
 | 402 |             if (!children) | 
 | 403 |                 goto nomemory; | 
 | 404 |             /* copy existing children from static area to malloc buffer */ | 
 | 405 |             memcpy(children, self->extra->children, | 
 | 406 |                    self->extra->length * sizeof(PyObject*)); | 
 | 407 |         } | 
 | 408 |         self->extra->children = children; | 
 | 409 |         self->extra->allocated = size; | 
 | 410 |     } | 
 | 411 |  | 
 | 412 |     return 0; | 
 | 413 |  | 
 | 414 |   nomemory: | 
 | 415 |     PyErr_NoMemory(); | 
 | 416 |     return -1; | 
 | 417 | } | 
 | 418 |  | 
 | 419 | LOCAL(int) | 
 | 420 | element_add_subelement(ElementObject* self, PyObject* element) | 
 | 421 | { | 
 | 422 |     /* add a child element to a parent */ | 
 | 423 |  | 
 | 424 |     if (element_resize(self, 1) < 0) | 
 | 425 |         return -1; | 
 | 426 |  | 
 | 427 |     Py_INCREF(element); | 
 | 428 |     self->extra->children[self->extra->length] = element; | 
 | 429 |  | 
 | 430 |     self->extra->length++; | 
 | 431 |  | 
 | 432 |     return 0; | 
 | 433 | } | 
 | 434 |  | 
 | 435 | LOCAL(PyObject*) | 
 | 436 | element_get_attrib(ElementObject* self) | 
 | 437 | { | 
 | 438 |     /* return borrowed reference to attrib dictionary */ | 
 | 439 |     /* note: this function assumes that the extra section exists */ | 
 | 440 |  | 
 | 441 |     PyObject* res = self->extra->attrib; | 
 | 442 |  | 
 | 443 |     if (res == Py_None) { | 
 | 444 |         /* create missing dictionary */ | 
 | 445 |         res = PyDict_New(); | 
 | 446 |         if (!res) | 
 | 447 |             return NULL; | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 448 |         Py_DECREF(Py_None); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 449 |         self->extra->attrib = res; | 
 | 450 |     } | 
 | 451 |  | 
 | 452 |     return res; | 
 | 453 | } | 
 | 454 |  | 
 | 455 | LOCAL(PyObject*) | 
 | 456 | element_get_text(ElementObject* self) | 
 | 457 | { | 
 | 458 |     /* return borrowed reference to text attribute */ | 
 | 459 |  | 
 | 460 |     PyObject* res = self->text; | 
 | 461 |  | 
 | 462 |     if (JOIN_GET(res)) { | 
 | 463 |         res = JOIN_OBJ(res); | 
 | 464 |         if (PyList_CheckExact(res)) { | 
 | 465 |             res = list_join(res); | 
 | 466 |             if (!res) | 
 | 467 |                 return NULL; | 
 | 468 |             self->text = res; | 
 | 469 |         } | 
 | 470 |     } | 
 | 471 |  | 
 | 472 |     return res; | 
 | 473 | } | 
 | 474 |  | 
 | 475 | LOCAL(PyObject*) | 
 | 476 | element_get_tail(ElementObject* self) | 
 | 477 | { | 
 | 478 |     /* return borrowed reference to text attribute */ | 
 | 479 |  | 
 | 480 |     PyObject* res = self->tail; | 
 | 481 |  | 
 | 482 |     if (JOIN_GET(res)) { | 
 | 483 |         res = JOIN_OBJ(res); | 
 | 484 |         if (PyList_CheckExact(res)) { | 
 | 485 |             res = list_join(res); | 
 | 486 |             if (!res) | 
 | 487 |                 return NULL; | 
 | 488 |             self->tail = res; | 
 | 489 |         } | 
 | 490 |     } | 
 | 491 |  | 
 | 492 |     return res; | 
 | 493 | } | 
 | 494 |  | 
 | 495 | static PyObject* | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 496 | subelement(PyObject *self, PyObject *args, PyObject *kwds) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 497 | { | 
 | 498 |     PyObject* elem; | 
 | 499 |  | 
 | 500 |     ElementObject* parent; | 
 | 501 |     PyObject* tag; | 
 | 502 |     PyObject* attrib = NULL; | 
 | 503 |     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", | 
 | 504 |                           &Element_Type, &parent, &tag, | 
 | 505 |                           &PyDict_Type, &attrib)) | 
 | 506 |         return NULL; | 
 | 507 |  | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 508 |     if (attrib) { | 
 | 509 |         /* attrib passed as positional arg */ | 
 | 510 |         attrib = PyDict_Copy(attrib); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 511 |         if (!attrib) | 
 | 512 |             return NULL; | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 513 |         if (kwds) { | 
 | 514 |             if (PyDict_Update(attrib, kwds) < 0) { | 
 | 515 |                 return NULL; | 
 | 516 |             } | 
 | 517 |         } | 
 | 518 |     } else if (kwds) { | 
 | 519 |         /* have keyword args */ | 
 | 520 |         attrib = get_attrib_from_keywords(kwds); | 
 | 521 |         if (!attrib) | 
 | 522 |             return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 523 |     } else { | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 524 |         /* no attrib arg, no kwds, so no attribute */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 525 |         Py_INCREF(Py_None); | 
 | 526 |         attrib = Py_None; | 
 | 527 |     } | 
 | 528 |  | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 529 |     elem = create_new_element(tag, attrib); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 530 |  | 
 | 531 |     Py_DECREF(attrib); | 
 | 532 |  | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 533 |     if (element_add_subelement(parent, elem) < 0) { | 
 | 534 |         Py_DECREF(elem); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 535 |         return NULL; | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 536 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 537 |  | 
 | 538 |     return elem; | 
 | 539 | } | 
 | 540 |  | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 541 | static int | 
 | 542 | element_gc_traverse(ElementObject *self, visitproc visit, void *arg) | 
 | 543 | { | 
 | 544 |     Py_VISIT(self->tag); | 
 | 545 |     Py_VISIT(JOIN_OBJ(self->text)); | 
 | 546 |     Py_VISIT(JOIN_OBJ(self->tail)); | 
 | 547 |  | 
 | 548 |     if (self->extra) { | 
 | 549 |         int i; | 
 | 550 |         Py_VISIT(self->extra->attrib); | 
 | 551 |  | 
 | 552 |         for (i = 0; i < self->extra->length; ++i) | 
 | 553 |             Py_VISIT(self->extra->children[i]); | 
 | 554 |     } | 
 | 555 |     return 0; | 
 | 556 | } | 
 | 557 |  | 
 | 558 | static int | 
 | 559 | element_gc_clear(ElementObject *self) | 
 | 560 | { | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 561 |     Py_CLEAR(self->tag); | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 562 |  | 
 | 563 |     /* The following is like Py_CLEAR for self->text and self->tail, but | 
 | 564 |      * written explicitily because the real pointers hide behind access | 
 | 565 |      * macros. | 
 | 566 |     */ | 
 | 567 |     if (self->text) { | 
 | 568 |         PyObject *tmp = JOIN_OBJ(self->text); | 
 | 569 |         self->text = NULL; | 
 | 570 |         Py_DECREF(tmp); | 
 | 571 |     } | 
 | 572 |  | 
 | 573 |     if (self->tail) { | 
 | 574 |         PyObject *tmp = JOIN_OBJ(self->tail); | 
 | 575 |         self->tail = NULL; | 
 | 576 |         Py_DECREF(tmp); | 
 | 577 |     } | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 578 |  | 
 | 579 |     /* After dropping all references from extra, it's no longer valid anyway, | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 580 |      * so fully deallocate it. | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 581 |     */ | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 582 |     dealloc_extra(self); | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 583 |     return 0; | 
 | 584 | } | 
 | 585 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 586 | static void | 
 | 587 | element_dealloc(ElementObject* self) | 
 | 588 | { | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 589 |     PyObject_GC_UnTrack(self); | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 590 |  | 
 | 591 |     if (self->weakreflist != NULL) | 
 | 592 |         PyObject_ClearWeakRefs((PyObject *) self); | 
 | 593 |  | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 594 |     /* element_gc_clear clears all references and deallocates extra | 
 | 595 |     */ | 
 | 596 |     element_gc_clear(self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 597 |  | 
 | 598 |     RELEASE(sizeof(ElementObject), "destroy element"); | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 599 |     Py_TYPE(self)->tp_free((PyObject *)self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 600 | } | 
 | 601 |  | 
 | 602 | /* -------------------------------------------------------------------- */ | 
 | 603 | /* methods (in alphabetical order) */ | 
 | 604 |  | 
 | 605 | static PyObject* | 
 | 606 | element_append(ElementObject* self, PyObject* args) | 
 | 607 | { | 
 | 608 |     PyObject* element; | 
 | 609 |     if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element)) | 
 | 610 |         return NULL; | 
 | 611 |  | 
 | 612 |     if (element_add_subelement(self, element) < 0) | 
 | 613 |         return NULL; | 
 | 614 |  | 
 | 615 |     Py_RETURN_NONE; | 
 | 616 | } | 
 | 617 |  | 
 | 618 | static PyObject* | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 619 | element_clearmethod(ElementObject* self, PyObject* args) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 620 | { | 
 | 621 |     if (!PyArg_ParseTuple(args, ":clear")) | 
 | 622 |         return NULL; | 
 | 623 |  | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 624 |     dealloc_extra(self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 625 |  | 
 | 626 |     Py_INCREF(Py_None); | 
 | 627 |     Py_DECREF(JOIN_OBJ(self->text)); | 
 | 628 |     self->text = Py_None; | 
 | 629 |  | 
 | 630 |     Py_INCREF(Py_None); | 
 | 631 |     Py_DECREF(JOIN_OBJ(self->tail)); | 
 | 632 |     self->tail = Py_None; | 
 | 633 |  | 
 | 634 |     Py_RETURN_NONE; | 
 | 635 | } | 
 | 636 |  | 
 | 637 | static PyObject* | 
 | 638 | element_copy(ElementObject* self, PyObject* args) | 
 | 639 | { | 
 | 640 |     int i; | 
 | 641 |     ElementObject* element; | 
 | 642 |  | 
 | 643 |     if (!PyArg_ParseTuple(args, ":__copy__")) | 
 | 644 |         return NULL; | 
 | 645 |  | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 646 |     element = (ElementObject*) create_new_element( | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 647 |         self->tag, (self->extra) ? self->extra->attrib : Py_None | 
 | 648 |         ); | 
 | 649 |     if (!element) | 
 | 650 |         return NULL; | 
 | 651 |  | 
 | 652 |     Py_DECREF(JOIN_OBJ(element->text)); | 
 | 653 |     element->text = self->text; | 
 | 654 |     Py_INCREF(JOIN_OBJ(element->text)); | 
 | 655 |  | 
 | 656 |     Py_DECREF(JOIN_OBJ(element->tail)); | 
 | 657 |     element->tail = self->tail; | 
 | 658 |     Py_INCREF(JOIN_OBJ(element->tail)); | 
 | 659 |  | 
 | 660 |     if (self->extra) { | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 661 |  | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 662 |         if (element_resize(element, self->extra->length) < 0) { | 
 | 663 |             Py_DECREF(element); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 664 |             return NULL; | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 665 |         } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 666 |  | 
 | 667 |         for (i = 0; i < self->extra->length; i++) { | 
 | 668 |             Py_INCREF(self->extra->children[i]); | 
 | 669 |             element->extra->children[i] = self->extra->children[i]; | 
 | 670 |         } | 
 | 671 |  | 
 | 672 |         element->extra->length = self->extra->length; | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 673 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 674 |     } | 
 | 675 |  | 
 | 676 |     return (PyObject*) element; | 
 | 677 | } | 
 | 678 |  | 
 | 679 | static PyObject* | 
 | 680 | element_deepcopy(ElementObject* self, PyObject* args) | 
 | 681 | { | 
 | 682 |     int i; | 
 | 683 |     ElementObject* element; | 
 | 684 |     PyObject* tag; | 
 | 685 |     PyObject* attrib; | 
 | 686 |     PyObject* text; | 
 | 687 |     PyObject* tail; | 
 | 688 |     PyObject* id; | 
 | 689 |  | 
 | 690 |     PyObject* memo; | 
 | 691 |     if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) | 
 | 692 |         return NULL; | 
 | 693 |  | 
 | 694 |     tag = deepcopy(self->tag, memo); | 
 | 695 |     if (!tag) | 
 | 696 |         return NULL; | 
 | 697 |  | 
 | 698 |     if (self->extra) { | 
 | 699 |         attrib = deepcopy(self->extra->attrib, memo); | 
 | 700 |         if (!attrib) { | 
 | 701 |             Py_DECREF(tag); | 
 | 702 |             return NULL; | 
 | 703 |         } | 
 | 704 |     } else { | 
 | 705 |         Py_INCREF(Py_None); | 
 | 706 |         attrib = Py_None; | 
 | 707 |     } | 
 | 708 |  | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 709 |     element = (ElementObject*) create_new_element(tag, attrib); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 710 |  | 
 | 711 |     Py_DECREF(tag); | 
 | 712 |     Py_DECREF(attrib); | 
 | 713 |  | 
 | 714 |     if (!element) | 
 | 715 |         return NULL; | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 716 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 717 |     text = deepcopy(JOIN_OBJ(self->text), memo); | 
 | 718 |     if (!text) | 
 | 719 |         goto error; | 
 | 720 |     Py_DECREF(element->text); | 
 | 721 |     element->text = JOIN_SET(text, JOIN_GET(self->text)); | 
 | 722 |  | 
 | 723 |     tail = deepcopy(JOIN_OBJ(self->tail), memo); | 
 | 724 |     if (!tail) | 
 | 725 |         goto error; | 
 | 726 |     Py_DECREF(element->tail); | 
 | 727 |     element->tail = JOIN_SET(tail, JOIN_GET(self->tail)); | 
 | 728 |  | 
 | 729 |     if (self->extra) { | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 730 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 731 |         if (element_resize(element, self->extra->length) < 0) | 
 | 732 |             goto error; | 
 | 733 |  | 
 | 734 |         for (i = 0; i < self->extra->length; i++) { | 
 | 735 |             PyObject* child = deepcopy(self->extra->children[i], memo); | 
 | 736 |             if (!child) { | 
 | 737 |                 element->extra->length = i; | 
 | 738 |                 goto error; | 
 | 739 |             } | 
 | 740 |             element->extra->children[i] = child; | 
 | 741 |         } | 
 | 742 |  | 
 | 743 |         element->extra->length = self->extra->length; | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 744 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 745 |     } | 
 | 746 |  | 
 | 747 |     /* add object to memo dictionary (so deepcopy won't visit it again) */ | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 748 |     id = PyLong_FromSsize_t((Py_uintptr_t) self); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 749 |     if (!id) | 
 | 750 |         goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 751 |  | 
 | 752 |     i = PyDict_SetItem(memo, id, (PyObject*) element); | 
 | 753 |  | 
 | 754 |     Py_DECREF(id); | 
 | 755 |  | 
 | 756 |     if (i < 0) | 
 | 757 |         goto error; | 
 | 758 |  | 
 | 759 |     return (PyObject*) element; | 
 | 760 |  | 
 | 761 |   error: | 
 | 762 |     Py_DECREF(element); | 
 | 763 |     return NULL; | 
 | 764 | } | 
 | 765 |  | 
| Martin v. Löwis | bce1666 | 2012-06-17 10:41:22 +0200 | [diff] [blame] | 766 | static PyObject* | 
 | 767 | element_sizeof(PyObject* _self, PyObject* args) | 
 | 768 | { | 
 | 769 |     ElementObject *self = (ElementObject*)_self; | 
 | 770 |     Py_ssize_t result = sizeof(ElementObject); | 
 | 771 |     if (self->extra) { | 
 | 772 |         result += sizeof(ElementObjectExtra); | 
 | 773 |         if (self->extra->children != self->extra->_children) | 
 | 774 |             result += sizeof(PyObject*) * self->extra->allocated; | 
 | 775 |     } | 
 | 776 |     return PyLong_FromSsize_t(result); | 
 | 777 | } | 
 | 778 |  | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 779 | /* dict keys for getstate/setstate. */ | 
 | 780 | #define PICKLED_TAG "tag" | 
 | 781 | #define PICKLED_CHILDREN "_children" | 
 | 782 | #define PICKLED_ATTRIB "attrib" | 
 | 783 | #define PICKLED_TAIL "tail" | 
 | 784 | #define PICKLED_TEXT "text" | 
 | 785 |  | 
 | 786 | /* __getstate__ returns a fabricated instance dict as in the pure-Python | 
 | 787 |  * Element implementation, for interoperability/interchangeability.  This | 
 | 788 |  * makes the pure-Python implementation details an API, but (a) there aren't | 
 | 789 |  * any unnecessary structures there; and (b) it buys compatibility with 3.2 | 
 | 790 |  * pickles.  See issue #16076. | 
 | 791 |  */ | 
 | 792 | static PyObject * | 
 | 793 | element_getstate(ElementObject *self) | 
 | 794 | { | 
 | 795 |     int i, noattrib; | 
 | 796 |     PyObject *instancedict = NULL, *children; | 
 | 797 |  | 
 | 798 |     /* Build a list of children. */ | 
 | 799 |     children = PyList_New(self->extra ? self->extra->length : 0); | 
 | 800 |     if (!children) | 
 | 801 |         return NULL; | 
 | 802 |     for (i = 0; i < PyList_GET_SIZE(children); i++) { | 
 | 803 |         PyObject *child = self->extra->children[i]; | 
 | 804 |         Py_INCREF(child); | 
 | 805 |         PyList_SET_ITEM(children, i, child); | 
 | 806 |     } | 
 | 807 |  | 
 | 808 |     /* Construct the state object. */ | 
 | 809 |     noattrib = (self->extra == NULL || self->extra->attrib == Py_None); | 
 | 810 |     if (noattrib) | 
 | 811 |         instancedict = Py_BuildValue("{sOsOs{}sOsO}", | 
 | 812 |                                      PICKLED_TAG, self->tag, | 
 | 813 |                                      PICKLED_CHILDREN, children, | 
 | 814 |                                      PICKLED_ATTRIB, | 
 | 815 |                                      PICKLED_TEXT, self->text, | 
 | 816 |                                      PICKLED_TAIL, self->tail); | 
 | 817 |     else | 
 | 818 |         instancedict = Py_BuildValue("{sOsOsOsOsO}", | 
 | 819 |                                      PICKLED_TAG, self->tag, | 
 | 820 |                                      PICKLED_CHILDREN, children, | 
 | 821 |                                      PICKLED_ATTRIB, self->extra->attrib, | 
 | 822 |                                      PICKLED_TEXT, self->text, | 
 | 823 |                                      PICKLED_TAIL, self->tail); | 
| Eli Bendersky | b8f6dc8 | 2013-01-12 05:20:16 -0800 | [diff] [blame] | 824 |     if (instancedict) { | 
 | 825 |         Py_DECREF(children); | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 826 |         return instancedict; | 
| Eli Bendersky | b8f6dc8 | 2013-01-12 05:20:16 -0800 | [diff] [blame] | 827 |     } | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 828 |     else { | 
 | 829 |         for (i = 0; i < PyList_GET_SIZE(children); i++) | 
 | 830 |             Py_DECREF(PyList_GET_ITEM(children, i)); | 
 | 831 |         Py_DECREF(children); | 
 | 832 |  | 
 | 833 |         return NULL; | 
 | 834 |     } | 
 | 835 | } | 
 | 836 |  | 
 | 837 | static PyObject * | 
 | 838 | element_setstate_from_attributes(ElementObject *self, | 
 | 839 |                                  PyObject *tag, | 
 | 840 |                                  PyObject *attrib, | 
 | 841 |                                  PyObject *text, | 
 | 842 |                                  PyObject *tail, | 
 | 843 |                                  PyObject *children) | 
 | 844 | { | 
 | 845 |     Py_ssize_t i, nchildren; | 
 | 846 |  | 
 | 847 |     if (!tag) { | 
 | 848 |         PyErr_SetString(PyExc_TypeError, "tag may not be NULL"); | 
 | 849 |         return NULL; | 
 | 850 |     } | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 851 |  | 
 | 852 |     Py_CLEAR(self->tag); | 
 | 853 |     self->tag = tag; | 
 | 854 |     Py_INCREF(self->tag); | 
 | 855 |  | 
 | 856 |     Py_CLEAR(self->text); | 
| Eli Bendersky | b8f6dc8 | 2013-01-12 05:20:16 -0800 | [diff] [blame] | 857 |     self->text = text ? text : Py_None; | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 858 |     Py_INCREF(self->text); | 
 | 859 |  | 
 | 860 |     Py_CLEAR(self->tail); | 
| Eli Bendersky | b8f6dc8 | 2013-01-12 05:20:16 -0800 | [diff] [blame] | 861 |     self->tail = tail ? tail : Py_None; | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 862 |     Py_INCREF(self->tail); | 
 | 863 |  | 
 | 864 |     /* Handle ATTRIB and CHILDREN. */ | 
 | 865 |     if (!children && !attrib) | 
 | 866 |         Py_RETURN_NONE; | 
 | 867 |  | 
 | 868 |     /* Compute 'nchildren'. */ | 
 | 869 |     if (children) { | 
 | 870 |         if (!PyList_Check(children)) { | 
 | 871 |             PyErr_SetString(PyExc_TypeError, "'_children' is not a list"); | 
 | 872 |             return NULL; | 
 | 873 |         } | 
 | 874 |         nchildren = PyList_Size(children); | 
 | 875 |     } | 
 | 876 |     else { | 
 | 877 |         nchildren = 0; | 
 | 878 |     } | 
 | 879 |  | 
 | 880 |     /* Allocate 'extra'. */ | 
 | 881 |     if (element_resize(self, nchildren)) { | 
 | 882 |         return NULL; | 
 | 883 |     } | 
 | 884 |     assert(self->extra && self->extra->allocated >= nchildren); | 
 | 885 |  | 
 | 886 |     /* Copy children */ | 
 | 887 |     for (i = 0; i < nchildren; i++) { | 
 | 888 |         self->extra->children[i] = PyList_GET_ITEM(children, i); | 
 | 889 |         Py_INCREF(self->extra->children[i]); | 
 | 890 |     } | 
 | 891 |  | 
 | 892 |     self->extra->length = nchildren; | 
 | 893 |     self->extra->allocated = nchildren; | 
 | 894 |  | 
 | 895 |     /* Stash attrib. */ | 
 | 896 |     if (attrib) { | 
 | 897 |         Py_CLEAR(self->extra->attrib); | 
 | 898 |         self->extra->attrib = attrib; | 
 | 899 |         Py_INCREF(attrib); | 
 | 900 |     } | 
 | 901 |  | 
 | 902 |     Py_RETURN_NONE; | 
 | 903 | } | 
 | 904 |  | 
 | 905 | /* __setstate__ for Element instance from the Python implementation. | 
 | 906 |  * 'state' should be the instance dict. | 
 | 907 |  */ | 
 | 908 | static PyObject * | 
 | 909 | element_setstate_from_Python(ElementObject *self, PyObject *state) | 
 | 910 | { | 
 | 911 |     static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT, | 
 | 912 |                              PICKLED_TAIL, PICKLED_CHILDREN, 0}; | 
 | 913 |     PyObject *args; | 
 | 914 |     PyObject *tag, *attrib, *text, *tail, *children; | 
| Eli Bendersky | 799e3ed | 2013-01-12 05:42:38 -0800 | [diff] [blame] | 915 |     PyObject *retval; | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 916 |  | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 917 |     tag = attrib = text = tail = children = NULL; | 
 | 918 |     args = PyTuple_New(0); | 
| Eli Bendersky | 799e3ed | 2013-01-12 05:42:38 -0800 | [diff] [blame] | 919 |     if (!args) | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 920 |         return NULL; | 
| Eli Bendersky | 799e3ed | 2013-01-12 05:42:38 -0800 | [diff] [blame] | 921 |  | 
 | 922 |     if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag, | 
 | 923 |                                     &attrib, &text, &tail, &children)) | 
 | 924 |         retval = element_setstate_from_attributes(self, tag, attrib, text, | 
 | 925 |                                                   tail, children); | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 926 |     else | 
| Eli Bendersky | 799e3ed | 2013-01-12 05:42:38 -0800 | [diff] [blame] | 927 |         retval = NULL; | 
 | 928 |  | 
 | 929 |     Py_DECREF(args); | 
 | 930 |     return retval; | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 931 | } | 
 | 932 |  | 
 | 933 | static PyObject * | 
 | 934 | element_setstate(ElementObject *self, PyObject *state) | 
 | 935 | { | 
 | 936 |     if (!PyDict_CheckExact(state)) { | 
 | 937 |         PyErr_Format(PyExc_TypeError, | 
 | 938 |                      "Don't know how to unpickle \"%.200R\" as an Element", | 
 | 939 |                      state); | 
 | 940 |         return NULL; | 
 | 941 |     } | 
 | 942 |     else | 
 | 943 |         return element_setstate_from_Python(self, state); | 
 | 944 | } | 
 | 945 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 946 | LOCAL(int) | 
 | 947 | checkpath(PyObject* tag) | 
 | 948 | { | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 949 |     Py_ssize_t i; | 
 | 950 |     int check = 1; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 951 |  | 
 | 952 |     /* check if a tag contains an xpath character */ | 
 | 953 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 954 | #define PATHCHAR(ch) \ | 
 | 955 |     (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 956 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 957 |     if (PyUnicode_Check(tag)) { | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 958 |         const Py_ssize_t len = PyUnicode_GET_LENGTH(tag); | 
 | 959 |         void *data = PyUnicode_DATA(tag); | 
 | 960 |         unsigned int kind = PyUnicode_KIND(tag); | 
 | 961 |         for (i = 0; i < len; i++) { | 
 | 962 |             Py_UCS4 ch = PyUnicode_READ(kind, data, i); | 
 | 963 |             if (ch == '{') | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 964 |                 check = 0; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 965 |             else if (ch == '}') | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 966 |                 check = 1; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 967 |             else if (check && PATHCHAR(ch)) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 968 |                 return 1; | 
 | 969 |         } | 
 | 970 |         return 0; | 
 | 971 |     } | 
| Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame] | 972 |     if (PyBytes_Check(tag)) { | 
 | 973 |         char *p = PyBytes_AS_STRING(tag); | 
 | 974 |         for (i = 0; i < PyBytes_GET_SIZE(tag); i++) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 975 |             if (p[i] == '{') | 
 | 976 |                 check = 0; | 
 | 977 |             else if (p[i] == '}') | 
 | 978 |                 check = 1; | 
 | 979 |             else if (check && PATHCHAR(p[i])) | 
 | 980 |                 return 1; | 
 | 981 |         } | 
 | 982 |         return 0; | 
 | 983 |     } | 
 | 984 |  | 
 | 985 |     return 1; /* unknown type; might be path expression */ | 
 | 986 | } | 
 | 987 |  | 
 | 988 | static PyObject* | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 989 | element_extend(ElementObject* self, PyObject* args) | 
 | 990 | { | 
 | 991 |     PyObject* seq; | 
 | 992 |     Py_ssize_t i, seqlen = 0; | 
 | 993 |  | 
 | 994 |     PyObject* seq_in; | 
 | 995 |     if (!PyArg_ParseTuple(args, "O:extend", &seq_in)) | 
 | 996 |         return NULL; | 
 | 997 |  | 
 | 998 |     seq = PySequence_Fast(seq_in, ""); | 
 | 999 |     if (!seq) { | 
 | 1000 |         PyErr_Format( | 
 | 1001 |             PyExc_TypeError, | 
 | 1002 |             "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name | 
 | 1003 |             ); | 
 | 1004 |         return NULL; | 
 | 1005 |     } | 
 | 1006 |  | 
 | 1007 |     seqlen = PySequence_Size(seq); | 
 | 1008 |     for (i = 0; i < seqlen; i++) { | 
 | 1009 |         PyObject* element = PySequence_Fast_GET_ITEM(seq, i); | 
| Eli Bendersky | 396e8fc | 2012-03-23 14:24:20 +0200 | [diff] [blame] | 1010 |         if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) { | 
 | 1011 |             Py_DECREF(seq); | 
 | 1012 |             PyErr_Format( | 
 | 1013 |                 PyExc_TypeError, | 
 | 1014 |                 "expected an Element, not \"%.200s\"", | 
 | 1015 |                 Py_TYPE(element)->tp_name); | 
 | 1016 |             return NULL; | 
 | 1017 |         } | 
 | 1018 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1019 |         if (element_add_subelement(self, element) < 0) { | 
 | 1020 |             Py_DECREF(seq); | 
 | 1021 |             return NULL; | 
 | 1022 |         } | 
 | 1023 |     } | 
 | 1024 |  | 
 | 1025 |     Py_DECREF(seq); | 
 | 1026 |  | 
 | 1027 |     Py_RETURN_NONE; | 
 | 1028 | } | 
 | 1029 |  | 
 | 1030 | static PyObject* | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1031 | element_find(ElementObject *self, PyObject *args, PyObject *kwds) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1032 | { | 
 | 1033 |     int i; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1034 |     PyObject* tag; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1035 |     PyObject* namespaces = Py_None; | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1036 |     static char *kwlist[] = {"path", "namespaces", 0}; | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1037 |  | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1038 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist, | 
 | 1039 |                                      &tag, &namespaces)) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1040 |         return NULL; | 
 | 1041 |  | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1042 |     if (checkpath(tag) || namespaces != Py_None) { | 
| Martin v. Löwis | bd928fe | 2011-10-14 10:20:37 +0200 | [diff] [blame] | 1043 |         _Py_IDENTIFIER(find); | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1044 |         return _PyObject_CallMethodId( | 
 | 1045 |             elementpath_obj, &PyId_find, "OOO", self, tag, namespaces | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1046 |             ); | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1047 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1048 |  | 
 | 1049 |     if (!self->extra) | 
 | 1050 |         Py_RETURN_NONE; | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 1051 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1052 |     for (i = 0; i < self->extra->length; i++) { | 
 | 1053 |         PyObject* item = self->extra->children[i]; | 
 | 1054 |         if (Element_CheckExact(item) && | 
| Mark Dickinson | 211c625 | 2009-02-01 10:28:51 +0000 | [diff] [blame] | 1055 |             PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1056 |             Py_INCREF(item); | 
 | 1057 |             return item; | 
 | 1058 |         } | 
 | 1059 |     } | 
 | 1060 |  | 
 | 1061 |     Py_RETURN_NONE; | 
 | 1062 | } | 
 | 1063 |  | 
 | 1064 | static PyObject* | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1065 | element_findtext(ElementObject *self, PyObject *args, PyObject *kwds) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1066 | { | 
 | 1067 |     int i; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1068 |     PyObject* tag; | 
 | 1069 |     PyObject* default_value = Py_None; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1070 |     PyObject* namespaces = Py_None; | 
| Martin v. Löwis | bd928fe | 2011-10-14 10:20:37 +0200 | [diff] [blame] | 1071 |     _Py_IDENTIFIER(findtext); | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1072 |     static char *kwlist[] = {"path", "default", "namespaces", 0}; | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1073 |  | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1074 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist, | 
 | 1075 |                                      &tag, &default_value, &namespaces)) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1076 |         return NULL; | 
 | 1077 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1078 |     if (checkpath(tag) || namespaces != Py_None) | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1079 |         return _PyObject_CallMethodId( | 
 | 1080 |             elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1081 |             ); | 
 | 1082 |  | 
 | 1083 |     if (!self->extra) { | 
 | 1084 |         Py_INCREF(default_value); | 
 | 1085 |         return default_value; | 
 | 1086 |     } | 
 | 1087 |  | 
 | 1088 |     for (i = 0; i < self->extra->length; i++) { | 
 | 1089 |         ElementObject* item = (ElementObject*) self->extra->children[i]; | 
| Mark Dickinson | 211c625 | 2009-02-01 10:28:51 +0000 | [diff] [blame] | 1090 |         if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) { | 
 | 1091 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1092 |             PyObject* text = element_get_text(item); | 
 | 1093 |             if (text == Py_None) | 
| Eli Bendersky | 25771b3 | 2013-01-13 05:26:07 -0800 | [diff] [blame] | 1094 |                 return PyUnicode_New(0, 0); | 
| Thomas Wouters | 00ee7ba | 2006-08-21 19:07:27 +0000 | [diff] [blame] | 1095 |             Py_XINCREF(text); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1096 |             return text; | 
 | 1097 |         } | 
 | 1098 |     } | 
 | 1099 |  | 
 | 1100 |     Py_INCREF(default_value); | 
 | 1101 |     return default_value; | 
 | 1102 | } | 
 | 1103 |  | 
 | 1104 | static PyObject* | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1105 | element_findall(ElementObject *self, PyObject *args, PyObject *kwds) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1106 | { | 
 | 1107 |     int i; | 
 | 1108 |     PyObject* out; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1109 |     PyObject* tag; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1110 |     PyObject* namespaces = Py_None; | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1111 |     static char *kwlist[] = {"path", "namespaces", 0}; | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1112 |  | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1113 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist, | 
 | 1114 |                                      &tag, &namespaces)) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1115 |         return NULL; | 
 | 1116 |  | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1117 |     if (checkpath(tag) || namespaces != Py_None) { | 
| Martin v. Löwis | bd928fe | 2011-10-14 10:20:37 +0200 | [diff] [blame] | 1118 |         _Py_IDENTIFIER(findall); | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1119 |         return _PyObject_CallMethodId( | 
 | 1120 |             elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1121 |             ); | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1122 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1123 |  | 
 | 1124 |     out = PyList_New(0); | 
 | 1125 |     if (!out) | 
 | 1126 |         return NULL; | 
 | 1127 |  | 
 | 1128 |     if (!self->extra) | 
 | 1129 |         return out; | 
 | 1130 |  | 
 | 1131 |     for (i = 0; i < self->extra->length; i++) { | 
 | 1132 |         PyObject* item = self->extra->children[i]; | 
 | 1133 |         if (Element_CheckExact(item) && | 
| Mark Dickinson | 211c625 | 2009-02-01 10:28:51 +0000 | [diff] [blame] | 1134 |             PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1135 |             if (PyList_Append(out, item) < 0) { | 
 | 1136 |                 Py_DECREF(out); | 
 | 1137 |                 return NULL; | 
 | 1138 |             } | 
 | 1139 |         } | 
 | 1140 |     } | 
 | 1141 |  | 
 | 1142 |     return out; | 
 | 1143 | } | 
 | 1144 |  | 
 | 1145 | static PyObject* | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1146 | element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds) | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1147 | { | 
 | 1148 |     PyObject* tag; | 
 | 1149 |     PyObject* namespaces = Py_None; | 
| Martin v. Löwis | bd928fe | 2011-10-14 10:20:37 +0200 | [diff] [blame] | 1150 |     _Py_IDENTIFIER(iterfind); | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1151 |     static char *kwlist[] = {"path", "namespaces", 0}; | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1152 |  | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1153 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist, | 
 | 1154 |                                      &tag, &namespaces)) | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1155 |         return NULL; | 
 | 1156 |  | 
| Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 1157 |     return _PyObject_CallMethodId( | 
 | 1158 |         elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1159 |         ); | 
 | 1160 | } | 
 | 1161 |  | 
 | 1162 | static PyObject* | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 1163 | element_get(ElementObject* self, PyObject* args, PyObject* kwds) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1164 | { | 
 | 1165 |     PyObject* value; | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 1166 |     static char* kwlist[] = {"key", "default", 0}; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1167 |  | 
 | 1168 |     PyObject* key; | 
 | 1169 |     PyObject* default_value = Py_None; | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 1170 |  | 
 | 1171 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key, | 
 | 1172 |                                      &default_value)) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1173 |         return NULL; | 
 | 1174 |  | 
 | 1175 |     if (!self->extra || self->extra->attrib == Py_None) | 
 | 1176 |         value = default_value; | 
 | 1177 |     else { | 
 | 1178 |         value = PyDict_GetItem(self->extra->attrib, key); | 
 | 1179 |         if (!value) | 
 | 1180 |             value = default_value; | 
 | 1181 |     } | 
 | 1182 |  | 
 | 1183 |     Py_INCREF(value); | 
 | 1184 |     return value; | 
 | 1185 | } | 
 | 1186 |  | 
 | 1187 | static PyObject* | 
 | 1188 | element_getchildren(ElementObject* self, PyObject* args) | 
 | 1189 | { | 
 | 1190 |     int i; | 
 | 1191 |     PyObject* list; | 
 | 1192 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1193 |     /* FIXME: report as deprecated? */ | 
 | 1194 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1195 |     if (!PyArg_ParseTuple(args, ":getchildren")) | 
 | 1196 |         return NULL; | 
 | 1197 |  | 
 | 1198 |     if (!self->extra) | 
 | 1199 |         return PyList_New(0); | 
 | 1200 |  | 
 | 1201 |     list = PyList_New(self->extra->length); | 
 | 1202 |     if (!list) | 
 | 1203 |         return NULL; | 
 | 1204 |  | 
 | 1205 |     for (i = 0; i < self->extra->length; i++) { | 
 | 1206 |         PyObject* item = self->extra->children[i]; | 
 | 1207 |         Py_INCREF(item); | 
 | 1208 |         PyList_SET_ITEM(list, i, item); | 
 | 1209 |     } | 
 | 1210 |  | 
 | 1211 |     return list; | 
 | 1212 | } | 
 | 1213 |  | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 1214 |  | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1215 | static PyObject * | 
 | 1216 | create_elementiter(ElementObject *self, PyObject *tag, int gettext); | 
 | 1217 |  | 
 | 1218 |  | 
 | 1219 | static PyObject * | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 1220 | element_iter(ElementObject *self, PyObject *args, PyObject *kwds) | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1221 | { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1222 |     PyObject* tag = Py_None; | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 1223 |     static char* kwlist[] = {"tag", 0}; | 
 | 1224 |  | 
 | 1225 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag)) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1226 |         return NULL; | 
 | 1227 |  | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1228 |     return create_elementiter(self, tag, 0); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1229 | } | 
 | 1230 |  | 
 | 1231 |  | 
 | 1232 | static PyObject* | 
 | 1233 | element_itertext(ElementObject* self, PyObject* args) | 
 | 1234 | { | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1235 |     if (!PyArg_ParseTuple(args, ":itertext")) | 
 | 1236 |         return NULL; | 
 | 1237 |  | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1238 |     return create_elementiter(self, Py_None, 1); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1239 | } | 
 | 1240 |  | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1241 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1242 | static PyObject* | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1243 | element_getitem(PyObject* self_, Py_ssize_t index) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1244 | { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1245 |     ElementObject* self = (ElementObject*) self_; | 
 | 1246 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1247 |     if (!self->extra || index < 0 || index >= self->extra->length) { | 
 | 1248 |         PyErr_SetString( | 
 | 1249 |             PyExc_IndexError, | 
 | 1250 |             "child index out of range" | 
 | 1251 |             ); | 
 | 1252 |         return NULL; | 
 | 1253 |     } | 
 | 1254 |  | 
 | 1255 |     Py_INCREF(self->extra->children[index]); | 
 | 1256 |     return self->extra->children[index]; | 
 | 1257 | } | 
 | 1258 |  | 
 | 1259 | static PyObject* | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1260 | element_insert(ElementObject* self, PyObject* args) | 
 | 1261 | { | 
 | 1262 |     int i; | 
 | 1263 |  | 
 | 1264 |     int index; | 
 | 1265 |     PyObject* element; | 
 | 1266 |     if (!PyArg_ParseTuple(args, "iO!:insert", &index, | 
 | 1267 |                           &Element_Type, &element)) | 
 | 1268 |         return NULL; | 
 | 1269 |  | 
 | 1270 |     if (!self->extra) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1271 |         create_extra(self, NULL); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1272 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1273 |     if (index < 0) { | 
 | 1274 |         index += self->extra->length; | 
 | 1275 |         if (index < 0) | 
 | 1276 |             index = 0; | 
 | 1277 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1278 |     if (index > self->extra->length) | 
 | 1279 |         index = self->extra->length; | 
 | 1280 |  | 
 | 1281 |     if (element_resize(self, 1) < 0) | 
 | 1282 |         return NULL; | 
 | 1283 |  | 
 | 1284 |     for (i = self->extra->length; i > index; i--) | 
 | 1285 |         self->extra->children[i] = self->extra->children[i-1]; | 
 | 1286 |  | 
 | 1287 |     Py_INCREF(element); | 
 | 1288 |     self->extra->children[index] = element; | 
 | 1289 |  | 
 | 1290 |     self->extra->length++; | 
 | 1291 |  | 
 | 1292 |     Py_RETURN_NONE; | 
 | 1293 | } | 
 | 1294 |  | 
 | 1295 | static PyObject* | 
 | 1296 | element_items(ElementObject* self, PyObject* args) | 
 | 1297 | { | 
 | 1298 |     if (!PyArg_ParseTuple(args, ":items")) | 
 | 1299 |         return NULL; | 
 | 1300 |  | 
 | 1301 |     if (!self->extra || self->extra->attrib == Py_None) | 
 | 1302 |         return PyList_New(0); | 
 | 1303 |  | 
 | 1304 |     return PyDict_Items(self->extra->attrib); | 
 | 1305 | } | 
 | 1306 |  | 
 | 1307 | static PyObject* | 
 | 1308 | element_keys(ElementObject* self, PyObject* args) | 
 | 1309 | { | 
 | 1310 |     if (!PyArg_ParseTuple(args, ":keys")) | 
 | 1311 |         return NULL; | 
 | 1312 |  | 
 | 1313 |     if (!self->extra || self->extra->attrib == Py_None) | 
 | 1314 |         return PyList_New(0); | 
 | 1315 |  | 
 | 1316 |     return PyDict_Keys(self->extra->attrib); | 
 | 1317 | } | 
 | 1318 |  | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 1319 | static Py_ssize_t | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1320 | element_length(ElementObject* self) | 
 | 1321 | { | 
 | 1322 |     if (!self->extra) | 
 | 1323 |         return 0; | 
 | 1324 |  | 
 | 1325 |     return self->extra->length; | 
 | 1326 | } | 
 | 1327 |  | 
 | 1328 | static PyObject* | 
 | 1329 | element_makeelement(PyObject* self, PyObject* args, PyObject* kw) | 
 | 1330 | { | 
 | 1331 |     PyObject* elem; | 
 | 1332 |  | 
 | 1333 |     PyObject* tag; | 
 | 1334 |     PyObject* attrib; | 
 | 1335 |     if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib)) | 
 | 1336 |         return NULL; | 
 | 1337 |  | 
 | 1338 |     attrib = PyDict_Copy(attrib); | 
 | 1339 |     if (!attrib) | 
 | 1340 |         return NULL; | 
 | 1341 |  | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1342 |     elem = create_new_element(tag, attrib); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1343 |  | 
 | 1344 |     Py_DECREF(attrib); | 
 | 1345 |  | 
 | 1346 |     return elem; | 
 | 1347 | } | 
 | 1348 |  | 
 | 1349 | static PyObject* | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1350 | element_remove(ElementObject* self, PyObject* args) | 
 | 1351 | { | 
 | 1352 |     int i; | 
 | 1353 |  | 
 | 1354 |     PyObject* element; | 
 | 1355 |     if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element)) | 
 | 1356 |         return NULL; | 
 | 1357 |  | 
 | 1358 |     if (!self->extra) { | 
 | 1359 |         /* element has no children, so raise exception */ | 
 | 1360 |         PyErr_SetString( | 
 | 1361 |             PyExc_ValueError, | 
 | 1362 |             "list.remove(x): x not in list" | 
 | 1363 |             ); | 
 | 1364 |         return NULL; | 
 | 1365 |     } | 
 | 1366 |  | 
 | 1367 |     for (i = 0; i < self->extra->length; i++) { | 
 | 1368 |         if (self->extra->children[i] == element) | 
 | 1369 |             break; | 
| Mark Dickinson | 211c625 | 2009-02-01 10:28:51 +0000 | [diff] [blame] | 1370 |         if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1371 |             break; | 
 | 1372 |     } | 
 | 1373 |  | 
 | 1374 |     if (i == self->extra->length) { | 
 | 1375 |         /* element is not in children, so raise exception */ | 
 | 1376 |         PyErr_SetString( | 
 | 1377 |             PyExc_ValueError, | 
 | 1378 |             "list.remove(x): x not in list" | 
 | 1379 |             ); | 
 | 1380 |         return NULL; | 
 | 1381 |     } | 
 | 1382 |  | 
 | 1383 |     Py_DECREF(self->extra->children[i]); | 
 | 1384 |  | 
 | 1385 |     self->extra->length--; | 
 | 1386 |  | 
 | 1387 |     for (; i < self->extra->length; i++) | 
 | 1388 |         self->extra->children[i] = self->extra->children[i+1]; | 
 | 1389 |  | 
 | 1390 |     Py_RETURN_NONE; | 
 | 1391 | } | 
 | 1392 |  | 
 | 1393 | static PyObject* | 
 | 1394 | element_repr(ElementObject* self) | 
 | 1395 | { | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1396 |     if (self->tag) | 
 | 1397 |         return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self); | 
 | 1398 |     else | 
 | 1399 |         return PyUnicode_FromFormat("<Element at %p>", self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1400 | } | 
 | 1401 |  | 
 | 1402 | static PyObject* | 
 | 1403 | element_set(ElementObject* self, PyObject* args) | 
 | 1404 | { | 
 | 1405 |     PyObject* attrib; | 
 | 1406 |  | 
 | 1407 |     PyObject* key; | 
 | 1408 |     PyObject* value; | 
 | 1409 |     if (!PyArg_ParseTuple(args, "OO:set", &key, &value)) | 
 | 1410 |         return NULL; | 
 | 1411 |  | 
 | 1412 |     if (!self->extra) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1413 |         create_extra(self, NULL); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1414 |  | 
 | 1415 |     attrib = element_get_attrib(self); | 
 | 1416 |     if (!attrib) | 
 | 1417 |         return NULL; | 
 | 1418 |  | 
 | 1419 |     if (PyDict_SetItem(attrib, key, value) < 0) | 
 | 1420 |         return NULL; | 
 | 1421 |  | 
 | 1422 |     Py_RETURN_NONE; | 
 | 1423 | } | 
 | 1424 |  | 
 | 1425 | static int | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1426 | element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1427 | { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1428 |     ElementObject* self = (ElementObject*) self_; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1429 |     int i; | 
 | 1430 |     PyObject* old; | 
 | 1431 |  | 
 | 1432 |     if (!self->extra || index < 0 || index >= self->extra->length) { | 
 | 1433 |         PyErr_SetString( | 
 | 1434 |             PyExc_IndexError, | 
 | 1435 |             "child assignment index out of range"); | 
 | 1436 |         return -1; | 
 | 1437 |     } | 
 | 1438 |  | 
 | 1439 |     old = self->extra->children[index]; | 
 | 1440 |  | 
 | 1441 |     if (item) { | 
 | 1442 |         Py_INCREF(item); | 
 | 1443 |         self->extra->children[index] = item; | 
 | 1444 |     } else { | 
 | 1445 |         self->extra->length--; | 
 | 1446 |         for (i = index; i < self->extra->length; i++) | 
 | 1447 |             self->extra->children[i] = self->extra->children[i+1]; | 
 | 1448 |     } | 
 | 1449 |  | 
 | 1450 |     Py_DECREF(old); | 
 | 1451 |  | 
 | 1452 |     return 0; | 
 | 1453 | } | 
 | 1454 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1455 | static PyObject* | 
 | 1456 | element_subscr(PyObject* self_, PyObject* item) | 
 | 1457 | { | 
 | 1458 |     ElementObject* self = (ElementObject*) self_; | 
 | 1459 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1460 |     if (PyIndex_Check(item)) { | 
 | 1461 |         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1462 |  | 
 | 1463 |         if (i == -1 && PyErr_Occurred()) { | 
 | 1464 |             return NULL; | 
 | 1465 |         } | 
 | 1466 |         if (i < 0 && self->extra) | 
 | 1467 |             i += self->extra->length; | 
 | 1468 |         return element_getitem(self_, i); | 
 | 1469 |     } | 
 | 1470 |     else if (PySlice_Check(item)) { | 
 | 1471 |         Py_ssize_t start, stop, step, slicelen, cur, i; | 
 | 1472 |         PyObject* list; | 
 | 1473 |  | 
 | 1474 |         if (!self->extra) | 
 | 1475 |             return PyList_New(0); | 
 | 1476 |  | 
| Martin v. Löwis | 4d0d471 | 2010-12-03 20:14:31 +0000 | [diff] [blame] | 1477 |         if (PySlice_GetIndicesEx(item, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1478 |                 self->extra->length, | 
 | 1479 |                 &start, &stop, &step, &slicelen) < 0) { | 
 | 1480 |             return NULL; | 
 | 1481 |         } | 
 | 1482 |  | 
 | 1483 |         if (slicelen <= 0) | 
 | 1484 |             return PyList_New(0); | 
 | 1485 |         else { | 
 | 1486 |             list = PyList_New(slicelen); | 
 | 1487 |             if (!list) | 
 | 1488 |                 return NULL; | 
 | 1489 |  | 
 | 1490 |             for (cur = start, i = 0; i < slicelen; | 
 | 1491 |                  cur += step, i++) { | 
 | 1492 |                 PyObject* item = self->extra->children[cur]; | 
 | 1493 |                 Py_INCREF(item); | 
 | 1494 |                 PyList_SET_ITEM(list, i, item); | 
 | 1495 |             } | 
 | 1496 |  | 
 | 1497 |             return list; | 
 | 1498 |         } | 
 | 1499 |     } | 
 | 1500 |     else { | 
 | 1501 |         PyErr_SetString(PyExc_TypeError, | 
 | 1502 |                 "element indices must be integers"); | 
 | 1503 |         return NULL; | 
 | 1504 |     } | 
 | 1505 | } | 
 | 1506 |  | 
 | 1507 | static int | 
 | 1508 | element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) | 
 | 1509 | { | 
 | 1510 |     ElementObject* self = (ElementObject*) self_; | 
 | 1511 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1512 |     if (PyIndex_Check(item)) { | 
 | 1513 |         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1514 |  | 
 | 1515 |         if (i == -1 && PyErr_Occurred()) { | 
 | 1516 |             return -1; | 
 | 1517 |         } | 
 | 1518 |         if (i < 0 && self->extra) | 
 | 1519 |             i += self->extra->length; | 
 | 1520 |         return element_setitem(self_, i, value); | 
 | 1521 |     } | 
 | 1522 |     else if (PySlice_Check(item)) { | 
 | 1523 |         Py_ssize_t start, stop, step, slicelen, newlen, cur, i; | 
 | 1524 |  | 
 | 1525 |         PyObject* recycle = NULL; | 
 | 1526 |         PyObject* seq = NULL; | 
 | 1527 |  | 
 | 1528 |         if (!self->extra) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1529 |             create_extra(self, NULL); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1530 |  | 
| Martin v. Löwis | 4d0d471 | 2010-12-03 20:14:31 +0000 | [diff] [blame] | 1531 |         if (PySlice_GetIndicesEx(item, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1532 |                 self->extra->length, | 
 | 1533 |                 &start, &stop, &step, &slicelen) < 0) { | 
 | 1534 |             return -1; | 
 | 1535 |         } | 
 | 1536 |  | 
| Eli Bendersky | 865756a | 2012-03-09 13:38:15 +0200 | [diff] [blame] | 1537 |         if (value == NULL) { | 
 | 1538 |             /* Delete slice */ | 
 | 1539 |             size_t cur; | 
 | 1540 |             Py_ssize_t i; | 
 | 1541 |  | 
 | 1542 |             if (slicelen <= 0) | 
 | 1543 |                 return 0; | 
 | 1544 |  | 
 | 1545 |             /* Since we're deleting, the direction of the range doesn't matter, | 
 | 1546 |              * so for simplicity make it always ascending. | 
 | 1547 |             */ | 
 | 1548 |             if (step < 0) { | 
 | 1549 |                 stop = start + 1; | 
 | 1550 |                 start = stop + step * (slicelen - 1) - 1; | 
 | 1551 |                 step = -step; | 
 | 1552 |             } | 
 | 1553 |  | 
 | 1554 |             assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *)); | 
 | 1555 |  | 
 | 1556 |             /* recycle is a list that will contain all the children | 
 | 1557 |              * scheduled for removal. | 
 | 1558 |             */ | 
 | 1559 |             if (!(recycle = PyList_New(slicelen))) { | 
 | 1560 |                 PyErr_NoMemory(); | 
 | 1561 |                 return -1; | 
 | 1562 |             } | 
 | 1563 |  | 
 | 1564 |             /* This loop walks over all the children that have to be deleted, | 
 | 1565 |              * with cur pointing at them. num_moved is the amount of children | 
 | 1566 |              * until the next deleted child that have to be "shifted down" to | 
 | 1567 |              * occupy the deleted's places. | 
 | 1568 |              * Note that in the ith iteration, shifting is done i+i places down | 
 | 1569 |              * because i children were already removed. | 
 | 1570 |             */ | 
 | 1571 |             for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) { | 
 | 1572 |                 /* Compute how many children have to be moved, clipping at the | 
 | 1573 |                  * list end. | 
 | 1574 |                 */ | 
 | 1575 |                 Py_ssize_t num_moved = step - 1; | 
 | 1576 |                 if (cur + step >= (size_t)self->extra->length) { | 
 | 1577 |                     num_moved = self->extra->length - cur - 1; | 
 | 1578 |                 } | 
 | 1579 |  | 
 | 1580 |                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]); | 
 | 1581 |  | 
 | 1582 |                 memmove( | 
 | 1583 |                     self->extra->children + cur - i, | 
 | 1584 |                     self->extra->children + cur + 1, | 
 | 1585 |                     num_moved * sizeof(PyObject *)); | 
 | 1586 |             } | 
 | 1587 |  | 
 | 1588 |             /* Leftover "tail" after the last removed child */ | 
 | 1589 |             cur = start + (size_t)slicelen * step; | 
 | 1590 |             if (cur < (size_t)self->extra->length) { | 
 | 1591 |                 memmove( | 
 | 1592 |                     self->extra->children + cur - slicelen, | 
 | 1593 |                     self->extra->children + cur, | 
 | 1594 |                     (self->extra->length - cur) * sizeof(PyObject *)); | 
 | 1595 |             } | 
 | 1596 |  | 
 | 1597 |             self->extra->length -= slicelen; | 
 | 1598 |  | 
 | 1599 |             /* Discard the recycle list with all the deleted sub-elements */ | 
 | 1600 |             Py_XDECREF(recycle); | 
 | 1601 |             return 0; | 
 | 1602 |         } | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1603 |         else { | 
| Eli Bendersky | 865756a | 2012-03-09 13:38:15 +0200 | [diff] [blame] | 1604 |             /* A new slice is actually being assigned */ | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1605 |             seq = PySequence_Fast(value, ""); | 
 | 1606 |             if (!seq) { | 
 | 1607 |                 PyErr_Format( | 
 | 1608 |                     PyExc_TypeError, | 
 | 1609 |                     "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name | 
 | 1610 |                     ); | 
 | 1611 |                 return -1; | 
 | 1612 |             } | 
 | 1613 |             newlen = PySequence_Size(seq); | 
 | 1614 |         } | 
 | 1615 |  | 
 | 1616 |         if (step !=  1 && newlen != slicelen) | 
 | 1617 |         { | 
 | 1618 |             PyErr_Format(PyExc_ValueError, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1619 |                 "attempt to assign sequence of size %zd " | 
 | 1620 |                 "to extended slice of size %zd", | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1621 |                 newlen, slicelen | 
 | 1622 |                 ); | 
 | 1623 |             return -1; | 
 | 1624 |         } | 
 | 1625 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1626 |         /* Resize before creating the recycle bin, to prevent refleaks. */ | 
 | 1627 |         if (newlen > slicelen) { | 
 | 1628 |             if (element_resize(self, newlen - slicelen) < 0) { | 
 | 1629 |                 if (seq) { | 
 | 1630 |                     Py_DECREF(seq); | 
 | 1631 |                 } | 
 | 1632 |                 return -1; | 
 | 1633 |             } | 
 | 1634 |         } | 
 | 1635 |  | 
 | 1636 |         if (slicelen > 0) { | 
 | 1637 |             /* to avoid recursive calls to this method (via decref), move | 
 | 1638 |                old items to the recycle bin here, and get rid of them when | 
 | 1639 |                we're done modifying the element */ | 
 | 1640 |             recycle = PyList_New(slicelen); | 
 | 1641 |             if (!recycle) { | 
 | 1642 |                 if (seq) { | 
 | 1643 |                     Py_DECREF(seq); | 
 | 1644 |                 } | 
 | 1645 |                 return -1; | 
 | 1646 |             } | 
 | 1647 |             for (cur = start, i = 0; i < slicelen; | 
 | 1648 |                  cur += step, i++) | 
 | 1649 |                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]); | 
 | 1650 |         } | 
 | 1651 |  | 
 | 1652 |         if (newlen < slicelen) { | 
 | 1653 |             /* delete slice */ | 
 | 1654 |             for (i = stop; i < self->extra->length; i++) | 
 | 1655 |                 self->extra->children[i + newlen - slicelen] = self->extra->children[i]; | 
 | 1656 |         } else if (newlen > slicelen) { | 
 | 1657 |             /* insert slice */ | 
 | 1658 |             for (i = self->extra->length-1; i >= stop; i--) | 
 | 1659 |                 self->extra->children[i + newlen - slicelen] = self->extra->children[i]; | 
 | 1660 |         } | 
 | 1661 |  | 
 | 1662 |         /* replace the slice */ | 
 | 1663 |         for (cur = start, i = 0; i < newlen; | 
 | 1664 |              cur += step, i++) { | 
 | 1665 |             PyObject* element = PySequence_Fast_GET_ITEM(seq, i); | 
 | 1666 |             Py_INCREF(element); | 
 | 1667 |             self->extra->children[cur] = element; | 
 | 1668 |         } | 
 | 1669 |  | 
 | 1670 |         self->extra->length += newlen - slicelen; | 
 | 1671 |  | 
 | 1672 |         if (seq) { | 
 | 1673 |             Py_DECREF(seq); | 
 | 1674 |         } | 
 | 1675 |  | 
 | 1676 |         /* discard the recycle bin, and everything in it */ | 
 | 1677 |         Py_XDECREF(recycle); | 
 | 1678 |  | 
 | 1679 |         return 0; | 
 | 1680 |     } | 
 | 1681 |     else { | 
 | 1682 |         PyErr_SetString(PyExc_TypeError, | 
 | 1683 |                 "element indices must be integers"); | 
 | 1684 |         return -1; | 
 | 1685 |     } | 
 | 1686 | } | 
 | 1687 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1688 | static PyMethodDef element_methods[] = { | 
 | 1689 |  | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 1690 |     {"clear", (PyCFunction) element_clearmethod, METH_VARARGS}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1691 |  | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 1692 |     {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1693 |     {"set", (PyCFunction) element_set, METH_VARARGS}, | 
 | 1694 |  | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1695 |     {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS}, | 
 | 1696 |     {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS}, | 
 | 1697 |     {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1698 |  | 
 | 1699 |     {"append", (PyCFunction) element_append, METH_VARARGS}, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1700 |     {"extend", (PyCFunction) element_extend, METH_VARARGS}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1701 |     {"insert", (PyCFunction) element_insert, METH_VARARGS}, | 
 | 1702 |     {"remove", (PyCFunction) element_remove, METH_VARARGS}, | 
 | 1703 |  | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 1704 |     {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS}, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1705 |     {"itertext", (PyCFunction) element_itertext, METH_VARARGS}, | 
| Eli Bendersky | 737b173 | 2012-05-29 06:02:56 +0300 | [diff] [blame] | 1706 |     {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS}, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1707 |  | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 1708 |     {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1709 |     {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS}, | 
 | 1710 |  | 
 | 1711 |     {"items", (PyCFunction) element_items, METH_VARARGS}, | 
 | 1712 |     {"keys", (PyCFunction) element_keys, METH_VARARGS}, | 
 | 1713 |  | 
 | 1714 |     {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS}, | 
 | 1715 |  | 
 | 1716 |     {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, | 
 | 1717 |     {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, | 
| Martin v. Löwis | bce1666 | 2012-06-17 10:41:22 +0200 | [diff] [blame] | 1718 |     {"__sizeof__", element_sizeof, METH_NOARGS}, | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 1719 |     {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS}, | 
 | 1720 |     {"__setstate__", (PyCFunction)element_setstate, METH_O}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1721 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1722 |     {NULL, NULL} | 
 | 1723 | }; | 
 | 1724 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1725 | static PyObject* | 
| Amaury Forgeot d'Arc | ba4105c | 2008-07-02 21:41:01 +0000 | [diff] [blame] | 1726 | element_getattro(ElementObject* self, PyObject* nameobj) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1727 | { | 
 | 1728 |     PyObject* res; | 
| Amaury Forgeot d'Arc | ba4105c | 2008-07-02 21:41:01 +0000 | [diff] [blame] | 1729 |     char *name = ""; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1730 |  | 
| Amaury Forgeot d'Arc | ba4105c | 2008-07-02 21:41:01 +0000 | [diff] [blame] | 1731 |     if (PyUnicode_Check(nameobj)) | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1732 |         name = _PyUnicode_AsString(nameobj); | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 1733 |  | 
| Alexander Belopolsky | e239d23 | 2010-12-08 23:31:48 +0000 | [diff] [blame] | 1734 |     if (name == NULL) | 
 | 1735 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1736 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1737 |     /* handle common attributes first */ | 
 | 1738 |     if (strcmp(name, "tag") == 0) { | 
 | 1739 |         res = self->tag; | 
 | 1740 |         Py_INCREF(res); | 
 | 1741 |         return res; | 
 | 1742 |     } else if (strcmp(name, "text") == 0) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1743 |         res = element_get_text(self); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1744 |         Py_INCREF(res); | 
 | 1745 |         return res; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1746 |     } | 
 | 1747 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1748 |     /* methods */ | 
 | 1749 |     res = PyObject_GenericGetAttr((PyObject*) self, nameobj); | 
 | 1750 |     if (res) | 
 | 1751 |         return res; | 
 | 1752 |  | 
 | 1753 |     /* less common attributes */ | 
 | 1754 |     if (strcmp(name, "tail") == 0) { | 
 | 1755 |         PyErr_Clear(); | 
 | 1756 |         res = element_get_tail(self); | 
 | 1757 |     } else if (strcmp(name, "attrib") == 0) { | 
 | 1758 |         PyErr_Clear(); | 
 | 1759 |         if (!self->extra) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1760 |             create_extra(self, NULL); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1761 |         res = element_get_attrib(self); | 
 | 1762 |     } | 
 | 1763 |  | 
 | 1764 |     if (!res) | 
 | 1765 |         return NULL; | 
 | 1766 |  | 
 | 1767 |     Py_INCREF(res); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1768 |     return res; | 
 | 1769 | } | 
 | 1770 |  | 
| Eli Bendersky | b20df95 | 2012-05-20 06:33:29 +0300 | [diff] [blame] | 1771 | static PyObject* | 
 | 1772 | element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1773 | { | 
| Eli Bendersky | b20df95 | 2012-05-20 06:33:29 +0300 | [diff] [blame] | 1774 |     char *name = ""; | 
 | 1775 |     if (PyUnicode_Check(nameobj)) | 
 | 1776 |         name = _PyUnicode_AsString(nameobj); | 
 | 1777 |  | 
 | 1778 |     if (name == NULL) | 
 | 1779 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1780 |  | 
 | 1781 |     if (strcmp(name, "tag") == 0) { | 
 | 1782 |         Py_DECREF(self->tag); | 
 | 1783 |         self->tag = value; | 
 | 1784 |         Py_INCREF(self->tag); | 
 | 1785 |     } else if (strcmp(name, "text") == 0) { | 
 | 1786 |         Py_DECREF(JOIN_OBJ(self->text)); | 
 | 1787 |         self->text = value; | 
 | 1788 |         Py_INCREF(self->text); | 
 | 1789 |     } else if (strcmp(name, "tail") == 0) { | 
 | 1790 |         Py_DECREF(JOIN_OBJ(self->tail)); | 
 | 1791 |         self->tail = value; | 
 | 1792 |         Py_INCREF(self->tail); | 
 | 1793 |     } else if (strcmp(name, "attrib") == 0) { | 
 | 1794 |         if (!self->extra) | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1795 |             create_extra(self, NULL); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1796 |         Py_DECREF(self->extra->attrib); | 
 | 1797 |         self->extra->attrib = value; | 
 | 1798 |         Py_INCREF(self->extra->attrib); | 
 | 1799 |     } else { | 
 | 1800 |         PyErr_SetString(PyExc_AttributeError, name); | 
| Eli Bendersky | b20df95 | 2012-05-20 06:33:29 +0300 | [diff] [blame] | 1801 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1802 |     } | 
 | 1803 |  | 
| Eli Bendersky | b20df95 | 2012-05-20 06:33:29 +0300 | [diff] [blame] | 1804 |     return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1805 | } | 
 | 1806 |  | 
 | 1807 | static PySequenceMethods element_as_sequence = { | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 1808 |     (lenfunc) element_length, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1809 |     0, /* sq_concat */ | 
 | 1810 |     0, /* sq_repeat */ | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 1811 |     element_getitem, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1812 |     0, | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 1813 |     element_setitem, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 1814 |     0, | 
 | 1815 | }; | 
 | 1816 |  | 
 | 1817 | static PyMappingMethods element_as_mapping = { | 
 | 1818 |     (lenfunc) element_length, | 
 | 1819 |     (binaryfunc) element_subscr, | 
 | 1820 |     (objobjargproc) element_ass_subscr, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1821 | }; | 
 | 1822 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 1823 | static PyTypeObject Element_Type = { | 
| Martin v. Löwis | 9f2e346 | 2007-07-21 17:22:18 +0000 | [diff] [blame] | 1824 |     PyVarObject_HEAD_INIT(NULL, 0) | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 1825 |     "xml.etree.ElementTree.Element", sizeof(ElementObject), 0, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1826 |     /* methods */ | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1827 |     (destructor)element_dealloc,                    /* tp_dealloc */ | 
 | 1828 |     0,                                              /* tp_print */ | 
 | 1829 |     0,                                              /* tp_getattr */ | 
| Eli Bendersky | b20df95 | 2012-05-20 06:33:29 +0300 | [diff] [blame] | 1830 |     0,                                              /* tp_setattr */ | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1831 |     0,                                              /* tp_reserved */ | 
 | 1832 |     (reprfunc)element_repr,                         /* tp_repr */ | 
 | 1833 |     0,                                              /* tp_as_number */ | 
 | 1834 |     &element_as_sequence,                           /* tp_as_sequence */ | 
 | 1835 |     &element_as_mapping,                            /* tp_as_mapping */ | 
 | 1836 |     0,                                              /* tp_hash */ | 
 | 1837 |     0,                                              /* tp_call */ | 
 | 1838 |     0,                                              /* tp_str */ | 
 | 1839 |     (getattrofunc)element_getattro,                 /* tp_getattro */ | 
| Eli Bendersky | b20df95 | 2012-05-20 06:33:29 +0300 | [diff] [blame] | 1840 |     (setattrofunc)element_setattro,                 /* tp_setattro */ | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1841 |     0,                                              /* tp_as_buffer */ | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 1842 |     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, | 
 | 1843 |                                                     /* tp_flags */ | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1844 |     0,                                              /* tp_doc */ | 
| Eli Bendersky | 0192ba3 | 2012-03-30 16:38:33 +0300 | [diff] [blame] | 1845 |     (traverseproc)element_gc_traverse,              /* tp_traverse */ | 
 | 1846 |     (inquiry)element_gc_clear,                      /* tp_clear */ | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1847 |     0,                                              /* tp_richcompare */ | 
| Eli Bendersky | ebf37a2 | 2012-04-03 22:02:37 +0300 | [diff] [blame] | 1848 |     offsetof(ElementObject, weakreflist),           /* tp_weaklistoffset */ | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 1849 |     0,                                              /* tp_iter */ | 
 | 1850 |     0,                                              /* tp_iternext */ | 
 | 1851 |     element_methods,                                /* tp_methods */ | 
 | 1852 |     0,                                              /* tp_members */ | 
 | 1853 |     0,                                              /* tp_getset */ | 
 | 1854 |     0,                                              /* tp_base */ | 
 | 1855 |     0,                                              /* tp_dict */ | 
 | 1856 |     0,                                              /* tp_descr_get */ | 
 | 1857 |     0,                                              /* tp_descr_set */ | 
 | 1858 |     0,                                              /* tp_dictoffset */ | 
 | 1859 |     (initproc)element_init,                         /* tp_init */ | 
 | 1860 |     PyType_GenericAlloc,                            /* tp_alloc */ | 
 | 1861 |     element_new,                                    /* tp_new */ | 
 | 1862 |     0,                                              /* tp_free */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1863 | }; | 
 | 1864 |  | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1865 | /******************************* Element iterator ****************************/ | 
 | 1866 |  | 
 | 1867 | /* ElementIterObject represents the iteration state over an XML element in | 
 | 1868 |  * pre-order traversal. To keep track of which sub-element should be returned | 
 | 1869 |  * next, a stack of parents is maintained. This is a standard stack-based | 
 | 1870 |  * iterative pre-order traversal of a tree. | 
 | 1871 |  * The stack is managed using a single-linked list starting at parent_stack. | 
 | 1872 |  * Each stack node contains the saved parent to which we should return after | 
 | 1873 |  * the current one is exhausted, and the next child to examine in that parent. | 
 | 1874 |  */ | 
 | 1875 | typedef struct ParentLocator_t { | 
 | 1876 |     ElementObject *parent; | 
 | 1877 |     Py_ssize_t child_index; | 
 | 1878 |     struct ParentLocator_t *next; | 
 | 1879 | } ParentLocator; | 
 | 1880 |  | 
 | 1881 | typedef struct { | 
 | 1882 |     PyObject_HEAD | 
 | 1883 |     ParentLocator *parent_stack; | 
 | 1884 |     ElementObject *root_element; | 
 | 1885 |     PyObject *sought_tag; | 
 | 1886 |     int root_done; | 
 | 1887 |     int gettext; | 
 | 1888 | } ElementIterObject; | 
 | 1889 |  | 
 | 1890 |  | 
 | 1891 | static void | 
 | 1892 | elementiter_dealloc(ElementIterObject *it) | 
 | 1893 | { | 
 | 1894 |     ParentLocator *p = it->parent_stack; | 
 | 1895 |     while (p) { | 
 | 1896 |         ParentLocator *temp = p; | 
 | 1897 |         Py_XDECREF(p->parent); | 
 | 1898 |         p = p->next; | 
 | 1899 |         PyObject_Free(temp); | 
 | 1900 |     } | 
 | 1901 |  | 
 | 1902 |     Py_XDECREF(it->sought_tag); | 
 | 1903 |     Py_XDECREF(it->root_element); | 
 | 1904 |  | 
 | 1905 |     PyObject_GC_UnTrack(it); | 
 | 1906 |     PyObject_GC_Del(it); | 
 | 1907 | } | 
 | 1908 |  | 
 | 1909 | static int | 
 | 1910 | elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg) | 
 | 1911 | { | 
 | 1912 |     ParentLocator *p = it->parent_stack; | 
 | 1913 |     while (p) { | 
 | 1914 |         Py_VISIT(p->parent); | 
 | 1915 |         p = p->next; | 
 | 1916 |     } | 
 | 1917 |  | 
 | 1918 |     Py_VISIT(it->root_element); | 
 | 1919 |     Py_VISIT(it->sought_tag); | 
 | 1920 |     return 0; | 
 | 1921 | } | 
 | 1922 |  | 
 | 1923 | /* Helper function for elementiter_next. Add a new parent to the parent stack. | 
 | 1924 |  */ | 
 | 1925 | static ParentLocator * | 
 | 1926 | parent_stack_push_new(ParentLocator *stack, ElementObject *parent) | 
 | 1927 | { | 
 | 1928 |     ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator)); | 
 | 1929 |     if (new_node) { | 
 | 1930 |         new_node->parent = parent; | 
 | 1931 |         Py_INCREF(parent); | 
 | 1932 |         new_node->child_index = 0; | 
 | 1933 |         new_node->next = stack; | 
 | 1934 |     } | 
 | 1935 |     return new_node; | 
 | 1936 | } | 
 | 1937 |  | 
 | 1938 | static PyObject * | 
 | 1939 | elementiter_next(ElementIterObject *it) | 
 | 1940 | { | 
 | 1941 |     /* Sub-element iterator. | 
| Eli Bendersky | 4583990 | 2013-01-13 05:14:47 -0800 | [diff] [blame] | 1942 |      * | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1943 |      * A short note on gettext: this function serves both the iter() and | 
 | 1944 |      * itertext() methods to avoid code duplication. However, there are a few | 
 | 1945 |      * small differences in the way these iterations work. Namely: | 
 | 1946 |      *   - itertext() only yields text from nodes that have it, and continues | 
 | 1947 |      *     iterating when a node doesn't have text (so it doesn't return any | 
 | 1948 |      *     node like iter()) | 
 | 1949 |      *   - itertext() also has to handle tail, after finishing with all the | 
 | 1950 |      *     children of a node. | 
 | 1951 |      */ | 
| Eli Bendersky | 113da64 | 2012-06-15 07:52:49 +0300 | [diff] [blame] | 1952 |     ElementObject *cur_parent; | 
 | 1953 |     Py_ssize_t child_index; | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1954 |  | 
 | 1955 |     while (1) { | 
 | 1956 |         /* Handle the case reached in the beginning and end of iteration, where | 
 | 1957 |          * the parent stack is empty. The root_done flag gives us indication | 
 | 1958 |          * whether we've just started iterating (so root_done is 0), in which | 
 | 1959 |          * case the root is returned. If root_done is 1 and we're here, the | 
 | 1960 |          * iterator is exhausted. | 
 | 1961 |          */ | 
 | 1962 |         if (!it->parent_stack->parent) { | 
 | 1963 |             if (it->root_done) { | 
 | 1964 |                 PyErr_SetNone(PyExc_StopIteration); | 
 | 1965 |                 return NULL; | 
 | 1966 |             } else { | 
 | 1967 |                 it->parent_stack = parent_stack_push_new(it->parent_stack, | 
 | 1968 |                                                          it->root_element); | 
 | 1969 |                 if (!it->parent_stack) { | 
 | 1970 |                     PyErr_NoMemory(); | 
 | 1971 |                     return NULL; | 
 | 1972 |                 } | 
 | 1973 |  | 
 | 1974 |                 it->root_done = 1; | 
 | 1975 |                 if (it->sought_tag == Py_None || | 
 | 1976 |                     PyObject_RichCompareBool(it->root_element->tag, | 
 | 1977 |                                              it->sought_tag, Py_EQ) == 1) { | 
 | 1978 |                     if (it->gettext) { | 
| Eli Bendersky | e6174ca | 2013-01-10 06:27:53 -0800 | [diff] [blame] | 1979 |                         PyObject *text = element_get_text(it->root_element); | 
 | 1980 |                         if (!text) | 
 | 1981 |                             return NULL; | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1982 |                         if (PyObject_IsTrue(text)) { | 
 | 1983 |                             Py_INCREF(text); | 
 | 1984 |                             return text; | 
 | 1985 |                         } | 
 | 1986 |                     } else { | 
 | 1987 |                         Py_INCREF(it->root_element); | 
 | 1988 |                         return (PyObject *)it->root_element; | 
 | 1989 |                     } | 
 | 1990 |                 } | 
 | 1991 |             } | 
 | 1992 |         } | 
 | 1993 |  | 
 | 1994 |         /* See if there are children left to traverse in the current parent. If | 
 | 1995 |          * yes, visit the next child. If not, pop the stack and try again. | 
 | 1996 |          */ | 
| Eli Bendersky | 113da64 | 2012-06-15 07:52:49 +0300 | [diff] [blame] | 1997 |         cur_parent = it->parent_stack->parent; | 
 | 1998 |         child_index = it->parent_stack->child_index; | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 1999 |         if (cur_parent->extra && child_index < cur_parent->extra->length) { | 
 | 2000 |             ElementObject *child = (ElementObject *) | 
 | 2001 |                 cur_parent->extra->children[child_index]; | 
 | 2002 |             it->parent_stack->child_index++; | 
 | 2003 |             it->parent_stack = parent_stack_push_new(it->parent_stack, | 
 | 2004 |                                                      child); | 
 | 2005 |             if (!it->parent_stack) { | 
 | 2006 |                 PyErr_NoMemory(); | 
 | 2007 |                 return NULL; | 
 | 2008 |             } | 
 | 2009 |  | 
 | 2010 |             if (it->gettext) { | 
| Eli Bendersky | e6174ca | 2013-01-10 06:27:53 -0800 | [diff] [blame] | 2011 |                 PyObject *text = element_get_text(child); | 
 | 2012 |                 if (!text) | 
 | 2013 |                     return NULL; | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 2014 |                 if (PyObject_IsTrue(text)) { | 
 | 2015 |                     Py_INCREF(text); | 
 | 2016 |                     return text; | 
 | 2017 |                 } | 
 | 2018 |             } else if (it->sought_tag == Py_None || | 
 | 2019 |                 PyObject_RichCompareBool(child->tag, | 
 | 2020 |                                          it->sought_tag, Py_EQ) == 1) { | 
 | 2021 |                 Py_INCREF(child); | 
 | 2022 |                 return (PyObject *)child; | 
 | 2023 |             } | 
 | 2024 |             else | 
 | 2025 |                 continue; | 
 | 2026 |         } | 
 | 2027 |         else { | 
| Eli Bendersky | e6174ca | 2013-01-10 06:27:53 -0800 | [diff] [blame] | 2028 |             PyObject *tail; | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 2029 |             ParentLocator *next = it->parent_stack->next; | 
| Eli Bendersky | e6174ca | 2013-01-10 06:27:53 -0800 | [diff] [blame] | 2030 |             if (it->gettext) { | 
 | 2031 |                 tail = element_get_tail(cur_parent); | 
 | 2032 |                 if (!tail) | 
 | 2033 |                     return NULL; | 
 | 2034 |             } | 
 | 2035 |             else | 
 | 2036 |                 tail = Py_None; | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 2037 |             Py_XDECREF(it->parent_stack->parent); | 
 | 2038 |             PyObject_Free(it->parent_stack); | 
 | 2039 |             it->parent_stack = next; | 
 | 2040 |  | 
 | 2041 |             /* Note that extra condition on it->parent_stack->parent here; | 
 | 2042 |              * this is because itertext() is supposed to only return *inner* | 
 | 2043 |              * text, not text following the element it began iteration with. | 
 | 2044 |              */ | 
 | 2045 |             if (it->parent_stack->parent && PyObject_IsTrue(tail)) { | 
 | 2046 |                 Py_INCREF(tail); | 
 | 2047 |                 return tail; | 
 | 2048 |             } | 
 | 2049 |         } | 
 | 2050 |     } | 
 | 2051 |  | 
 | 2052 |     return NULL; | 
 | 2053 | } | 
 | 2054 |  | 
 | 2055 |  | 
 | 2056 | static PyTypeObject ElementIter_Type = { | 
 | 2057 |     PyVarObject_HEAD_INIT(NULL, 0) | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 2058 |     /* Using the module's name since the pure-Python implementation does not | 
 | 2059 |        have such a type. */ | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 2060 |     "_elementtree._element_iterator",           /* tp_name */ | 
 | 2061 |     sizeof(ElementIterObject),                  /* tp_basicsize */ | 
 | 2062 |     0,                                          /* tp_itemsize */ | 
 | 2063 |     /* methods */ | 
 | 2064 |     (destructor)elementiter_dealloc,            /* tp_dealloc */ | 
 | 2065 |     0,                                          /* tp_print */ | 
 | 2066 |     0,                                          /* tp_getattr */ | 
 | 2067 |     0,                                          /* tp_setattr */ | 
 | 2068 |     0,                                          /* tp_reserved */ | 
 | 2069 |     0,                                          /* tp_repr */ | 
 | 2070 |     0,                                          /* tp_as_number */ | 
 | 2071 |     0,                                          /* tp_as_sequence */ | 
 | 2072 |     0,                                          /* tp_as_mapping */ | 
 | 2073 |     0,                                          /* tp_hash */ | 
 | 2074 |     0,                                          /* tp_call */ | 
 | 2075 |     0,                                          /* tp_str */ | 
 | 2076 |     0,                                          /* tp_getattro */ | 
 | 2077 |     0,                                          /* tp_setattro */ | 
 | 2078 |     0,                                          /* tp_as_buffer */ | 
 | 2079 |     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */ | 
 | 2080 |     0,                                          /* tp_doc */ | 
 | 2081 |     (traverseproc)elementiter_traverse,         /* tp_traverse */ | 
 | 2082 |     0,                                          /* tp_clear */ | 
 | 2083 |     0,                                          /* tp_richcompare */ | 
 | 2084 |     0,                                          /* tp_weaklistoffset */ | 
 | 2085 |     PyObject_SelfIter,                          /* tp_iter */ | 
 | 2086 |     (iternextfunc)elementiter_next,             /* tp_iternext */ | 
 | 2087 |     0,                                          /* tp_methods */ | 
 | 2088 |     0,                                          /* tp_members */ | 
 | 2089 |     0,                                          /* tp_getset */ | 
 | 2090 |     0,                                          /* tp_base */ | 
 | 2091 |     0,                                          /* tp_dict */ | 
 | 2092 |     0,                                          /* tp_descr_get */ | 
 | 2093 |     0,                                          /* tp_descr_set */ | 
 | 2094 |     0,                                          /* tp_dictoffset */ | 
 | 2095 |     0,                                          /* tp_init */ | 
 | 2096 |     0,                                          /* tp_alloc */ | 
 | 2097 |     0,                                          /* tp_new */ | 
 | 2098 | }; | 
 | 2099 |  | 
 | 2100 |  | 
 | 2101 | static PyObject * | 
 | 2102 | create_elementiter(ElementObject *self, PyObject *tag, int gettext) | 
 | 2103 | { | 
 | 2104 |     ElementIterObject *it; | 
 | 2105 |     PyObject *star = NULL; | 
 | 2106 |  | 
 | 2107 |     it = PyObject_GC_New(ElementIterObject, &ElementIter_Type); | 
 | 2108 |     if (!it) | 
 | 2109 |         return NULL; | 
 | 2110 |     if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) { | 
 | 2111 |         PyObject_GC_Del(it); | 
 | 2112 |         return NULL; | 
 | 2113 |     } | 
 | 2114 |  | 
 | 2115 |     it->parent_stack->parent = NULL; | 
 | 2116 |     it->parent_stack->child_index = 0; | 
 | 2117 |     it->parent_stack->next = NULL; | 
 | 2118 |  | 
 | 2119 |     if (PyUnicode_Check(tag)) | 
 | 2120 |         star = PyUnicode_FromString("*"); | 
 | 2121 |     else if (PyBytes_Check(tag)) | 
 | 2122 |         star = PyBytes_FromString("*"); | 
 | 2123 |  | 
 | 2124 |     if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1) | 
 | 2125 |         tag = Py_None; | 
 | 2126 |  | 
 | 2127 |     Py_XDECREF(star); | 
 | 2128 |     it->sought_tag = tag; | 
 | 2129 |     it->root_done = 0; | 
 | 2130 |     it->gettext = gettext; | 
 | 2131 |     it->root_element = self; | 
 | 2132 |  | 
 | 2133 |     Py_INCREF(self); | 
 | 2134 |     Py_INCREF(tag); | 
 | 2135 |  | 
 | 2136 |     PyObject_GC_Track(it); | 
 | 2137 |     return (PyObject *)it; | 
 | 2138 | } | 
 | 2139 |  | 
 | 2140 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2141 | /* ==================================================================== */ | 
 | 2142 | /* the tree builder type */ | 
 | 2143 |  | 
 | 2144 | typedef struct { | 
 | 2145 |     PyObject_HEAD | 
 | 2146 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2147 |     PyObject *root; /* root node (first created node) */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2148 |  | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2149 |     PyObject *this; /* current node */ | 
 | 2150 |     PyObject *last; /* most recently created node */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2151 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2152 |     PyObject *data; /* data collector (string or list), or NULL */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2153 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2154 |     PyObject *stack; /* element stack */ | 
 | 2155 |     Py_ssize_t index; /* current stack size (0 means empty) */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2156 |  | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2157 |     PyObject *element_factory; | 
 | 2158 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2159 |     /* element tracing */ | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2160 |     PyObject *events; /* list of events, or NULL if not collecting */ | 
 | 2161 |     PyObject *start_event_obj; /* event objects (NULL to ignore) */ | 
 | 2162 |     PyObject *end_event_obj; | 
 | 2163 |     PyObject *start_ns_event_obj; | 
 | 2164 |     PyObject *end_ns_event_obj; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2165 | } TreeBuilderObject; | 
 | 2166 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 2167 | static PyTypeObject TreeBuilder_Type; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2168 |  | 
| Christian Heimes | 90aa764 | 2007-12-19 02:45:37 +0000 | [diff] [blame] | 2169 | #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2170 |  | 
 | 2171 | /* -------------------------------------------------------------------- */ | 
 | 2172 | /* constructor and destructor */ | 
 | 2173 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2174 | static PyObject * | 
 | 2175 | treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2176 | { | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2177 |     TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0); | 
 | 2178 |     if (t != NULL) { | 
 | 2179 |         t->root = NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2180 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2181 |         Py_INCREF(Py_None); | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2182 |         t->this = Py_None; | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2183 |         Py_INCREF(Py_None); | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2184 |         t->last = Py_None; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2185 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2186 |         t->data = NULL; | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2187 |         t->element_factory = NULL; | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2188 |         t->stack = PyList_New(20); | 
 | 2189 |         if (!t->stack) { | 
 | 2190 |             Py_DECREF(t->this); | 
 | 2191 |             Py_DECREF(t->last); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2192 |             Py_DECREF((PyObject *) t); | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2193 |             return NULL; | 
 | 2194 |         } | 
 | 2195 |         t->index = 0; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2196 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2197 |         t->events = NULL; | 
 | 2198 |         t->start_event_obj = t->end_event_obj = NULL; | 
 | 2199 |         t->start_ns_event_obj = t->end_ns_event_obj = NULL; | 
 | 2200 |     } | 
 | 2201 |     return (PyObject *)t; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2202 | } | 
 | 2203 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2204 | static int | 
 | 2205 | treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2206 | { | 
| Eli Bendersky | c68e136 | 2012-06-03 06:09:42 +0300 | [diff] [blame] | 2207 |     static char *kwlist[] = {"element_factory", 0}; | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2208 |     PyObject *element_factory = NULL; | 
 | 2209 |     TreeBuilderObject *self_tb = (TreeBuilderObject *)self; | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2210 |     PyObject *tmp; | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2211 |  | 
 | 2212 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist, | 
 | 2213 |                                      &element_factory)) { | 
 | 2214 |         return -1; | 
 | 2215 |     } | 
 | 2216 |  | 
 | 2217 |     if (element_factory) { | 
 | 2218 |         Py_INCREF(element_factory); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2219 |         tmp = self_tb->element_factory; | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2220 |         self_tb->element_factory = element_factory; | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2221 |         Py_XDECREF(tmp); | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2222 |     } | 
 | 2223 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2224 |     return 0; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2225 | } | 
 | 2226 |  | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2227 | static int | 
 | 2228 | treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) | 
 | 2229 | { | 
 | 2230 |     Py_VISIT(self->root); | 
 | 2231 |     Py_VISIT(self->this); | 
 | 2232 |     Py_VISIT(self->last); | 
 | 2233 |     Py_VISIT(self->data); | 
 | 2234 |     Py_VISIT(self->stack); | 
 | 2235 |     Py_VISIT(self->element_factory); | 
 | 2236 |     return 0; | 
 | 2237 | } | 
 | 2238 |  | 
 | 2239 | static int | 
 | 2240 | treebuilder_gc_clear(TreeBuilderObject *self) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2241 | { | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2242 |     Py_CLEAR(self->end_ns_event_obj); | 
 | 2243 |     Py_CLEAR(self->start_ns_event_obj); | 
 | 2244 |     Py_CLEAR(self->end_event_obj); | 
 | 2245 |     Py_CLEAR(self->start_event_obj); | 
 | 2246 |     Py_CLEAR(self->events); | 
 | 2247 |     Py_CLEAR(self->stack); | 
 | 2248 |     Py_CLEAR(self->data); | 
 | 2249 |     Py_CLEAR(self->last); | 
 | 2250 |     Py_CLEAR(self->this); | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2251 |     Py_CLEAR(self->element_factory); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2252 |     Py_CLEAR(self->root); | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2253 |     return 0; | 
 | 2254 | } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2255 |  | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2256 | static void | 
 | 2257 | treebuilder_dealloc(TreeBuilderObject *self) | 
 | 2258 | { | 
 | 2259 |     PyObject_GC_UnTrack(self); | 
 | 2260 |     treebuilder_gc_clear(self); | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2261 |     Py_TYPE(self)->tp_free((PyObject *)self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2262 | } | 
 | 2263 |  | 
 | 2264 | /* -------------------------------------------------------------------- */ | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2265 | /* helpers for handling of arbitrary element-like objects */ | 
 | 2266 |  | 
 | 2267 | static int | 
 | 2268 | treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data, | 
 | 2269 |                                      PyObject **dest, _Py_Identifier *name) | 
 | 2270 | { | 
 | 2271 |     if (Element_CheckExact(element)) { | 
 | 2272 |         Py_DECREF(JOIN_OBJ(*dest)); | 
 | 2273 |         *dest = JOIN_SET(data, PyList_CheckExact(data)); | 
 | 2274 |         return 0; | 
 | 2275 |     } | 
 | 2276 |     else { | 
 | 2277 |         PyObject *joined = list_join(data); | 
 | 2278 |         int r; | 
 | 2279 |         if (joined == NULL) | 
 | 2280 |             return -1; | 
 | 2281 |         r = _PyObject_SetAttrId(element, name, joined); | 
 | 2282 |         Py_DECREF(joined); | 
 | 2283 |         return r; | 
 | 2284 |     } | 
 | 2285 | } | 
 | 2286 |  | 
 | 2287 | /* These two functions steal a reference to data */ | 
 | 2288 | static int | 
 | 2289 | treebuilder_set_element_text(PyObject *element, PyObject *data) | 
 | 2290 | { | 
 | 2291 |     _Py_IDENTIFIER(text); | 
 | 2292 |     return treebuilder_set_element_text_or_tail( | 
 | 2293 |         element, data, &((ElementObject *) element)->text, &PyId_text); | 
 | 2294 | } | 
 | 2295 |  | 
 | 2296 | static int | 
 | 2297 | treebuilder_set_element_tail(PyObject *element, PyObject *data) | 
 | 2298 | { | 
 | 2299 |     _Py_IDENTIFIER(tail); | 
 | 2300 |     return treebuilder_set_element_text_or_tail( | 
 | 2301 |         element, data, &((ElementObject *) element)->tail, &PyId_tail); | 
 | 2302 | } | 
 | 2303 |  | 
 | 2304 | static int | 
 | 2305 | treebuilder_add_subelement(PyObject *element, PyObject *child) | 
 | 2306 | { | 
 | 2307 |     _Py_IDENTIFIER(append); | 
 | 2308 |     if (Element_CheckExact(element)) { | 
 | 2309 |         ElementObject *elem = (ElementObject *) element; | 
 | 2310 |         return element_add_subelement(elem, child); | 
 | 2311 |     } | 
 | 2312 |     else { | 
 | 2313 |         PyObject *res; | 
 | 2314 |         res = _PyObject_CallMethodId(element, &PyId_append, "O", child); | 
 | 2315 |         if (res == NULL) | 
 | 2316 |             return -1; | 
 | 2317 |         Py_DECREF(res); | 
 | 2318 |         return 0; | 
 | 2319 |     } | 
 | 2320 | } | 
 | 2321 |  | 
 | 2322 | /* -------------------------------------------------------------------- */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2323 | /* handlers */ | 
 | 2324 |  | 
 | 2325 | LOCAL(PyObject*) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2326 | treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, | 
 | 2327 |                          PyObject* attrib) | 
 | 2328 | { | 
 | 2329 |     PyObject* node; | 
 | 2330 |     PyObject* this; | 
 | 2331 |  | 
 | 2332 |     if (self->data) { | 
 | 2333 |         if (self->this == self->last) { | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2334 |             if (treebuilder_set_element_text(self->last, self->data)) | 
 | 2335 |                 return NULL; | 
 | 2336 |         } | 
 | 2337 |         else { | 
 | 2338 |             if (treebuilder_set_element_tail(self->last, self->data)) | 
 | 2339 |                 return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2340 |         } | 
 | 2341 |         self->data = NULL; | 
 | 2342 |     } | 
 | 2343 |  | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2344 |     if (self->element_factory) { | 
 | 2345 |         node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib); | 
 | 2346 |     } else { | 
 | 2347 |         node = create_new_element(tag, attrib); | 
 | 2348 |     } | 
 | 2349 |     if (!node) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2350 |         return NULL; | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2351 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2352 |  | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2353 |     this = self->this; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2354 |  | 
 | 2355 |     if (this != Py_None) { | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2356 |         if (treebuilder_add_subelement(this, node) < 0) | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2357 |             goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2358 |     } else { | 
 | 2359 |         if (self->root) { | 
 | 2360 |             PyErr_SetString( | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2361 |                 elementtree_parseerror_obj, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2362 |                 "multiple elements on top level" | 
 | 2363 |                 ); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2364 |             goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2365 |         } | 
 | 2366 |         Py_INCREF(node); | 
 | 2367 |         self->root = node; | 
 | 2368 |     } | 
 | 2369 |  | 
 | 2370 |     if (self->index < PyList_GET_SIZE(self->stack)) { | 
 | 2371 |         if (PyList_SetItem(self->stack, self->index, this) < 0) | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2372 |             goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2373 |         Py_INCREF(this); | 
 | 2374 |     } else { | 
 | 2375 |         if (PyList_Append(self->stack, this) < 0) | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2376 |             goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2377 |     } | 
 | 2378 |     self->index++; | 
 | 2379 |  | 
 | 2380 |     Py_DECREF(this); | 
 | 2381 |     Py_INCREF(node); | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2382 |     self->this = node; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2383 |  | 
 | 2384 |     Py_DECREF(self->last); | 
 | 2385 |     Py_INCREF(node); | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2386 |     self->last = node; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2387 |  | 
 | 2388 |     if (self->start_event_obj) { | 
 | 2389 |         PyObject* res; | 
 | 2390 |         PyObject* action = self->start_event_obj; | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2391 |         res = PyTuple_Pack(2, action, node); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2392 |         if (res) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2393 |             PyList_Append(self->events, res); | 
 | 2394 |             Py_DECREF(res); | 
 | 2395 |         } else | 
 | 2396 |             PyErr_Clear(); /* FIXME: propagate error */ | 
 | 2397 |     } | 
 | 2398 |  | 
 | 2399 |     return node; | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2400 |  | 
 | 2401 |   error: | 
 | 2402 |     Py_DECREF(node); | 
 | 2403 |     return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2404 | } | 
 | 2405 |  | 
 | 2406 | LOCAL(PyObject*) | 
 | 2407 | treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) | 
 | 2408 | { | 
 | 2409 |     if (!self->data) { | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2410 |         if (self->last == Py_None) { | 
| Thomas Wouters | 00ee7ba | 2006-08-21 19:07:27 +0000 | [diff] [blame] | 2411 |             /* ignore calls to data before the first call to start */ | 
 | 2412 |             Py_RETURN_NONE; | 
 | 2413 |         } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2414 |         /* store the first item as is */ | 
 | 2415 |         Py_INCREF(data); self->data = data; | 
 | 2416 |     } else { | 
 | 2417 |         /* more than one item; use a list to collect items */ | 
| Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame] | 2418 |         if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && | 
 | 2419 |             PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) { | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2420 |             /* XXX this code path unused in Python 3? */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2421 |             /* expat often generates single character data sections; handle | 
 | 2422 |                the most common case by resizing the existing string... */ | 
| Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame] | 2423 |             Py_ssize_t size = PyBytes_GET_SIZE(self->data); | 
 | 2424 |             if (_PyBytes_Resize(&self->data, size + 1) < 0) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2425 |                 return NULL; | 
| Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame] | 2426 |             PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0]; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2427 |         } else if (PyList_CheckExact(self->data)) { | 
 | 2428 |             if (PyList_Append(self->data, data) < 0) | 
 | 2429 |                 return NULL; | 
 | 2430 |         } else { | 
 | 2431 |             PyObject* list = PyList_New(2); | 
 | 2432 |             if (!list) | 
 | 2433 |                 return NULL; | 
 | 2434 |             PyList_SET_ITEM(list, 0, self->data); | 
 | 2435 |             Py_INCREF(data); PyList_SET_ITEM(list, 1, data); | 
 | 2436 |             self->data = list; | 
 | 2437 |         } | 
 | 2438 |     } | 
 | 2439 |  | 
 | 2440 |     Py_RETURN_NONE; | 
 | 2441 | } | 
 | 2442 |  | 
 | 2443 | LOCAL(PyObject*) | 
 | 2444 | treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) | 
 | 2445 | { | 
 | 2446 |     PyObject* item; | 
 | 2447 |  | 
 | 2448 |     if (self->data) { | 
 | 2449 |         if (self->this == self->last) { | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2450 |             if (treebuilder_set_element_text(self->last, self->data)) | 
 | 2451 |                 return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2452 |         } else { | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2453 |             if (treebuilder_set_element_tail(self->last, self->data)) | 
 | 2454 |                 return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2455 |         } | 
 | 2456 |         self->data = NULL; | 
 | 2457 |     } | 
 | 2458 |  | 
 | 2459 |     if (self->index == 0) { | 
 | 2460 |         PyErr_SetString( | 
 | 2461 |             PyExc_IndexError, | 
 | 2462 |             "pop from empty stack" | 
 | 2463 |             ); | 
 | 2464 |         return NULL; | 
 | 2465 |     } | 
 | 2466 |  | 
 | 2467 |     self->index--; | 
 | 2468 |  | 
 | 2469 |     item = PyList_GET_ITEM(self->stack, self->index); | 
 | 2470 |     Py_INCREF(item); | 
 | 2471 |  | 
 | 2472 |     Py_DECREF(self->last); | 
 | 2473 |  | 
| Antoine Pitrou | ee32931 | 2012-10-04 19:53:29 +0200 | [diff] [blame] | 2474 |     self->last = self->this; | 
 | 2475 |     self->this = item; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2476 |  | 
 | 2477 |     if (self->end_event_obj) { | 
 | 2478 |         PyObject* res; | 
 | 2479 |         PyObject* action = self->end_event_obj; | 
 | 2480 |         PyObject* node = (PyObject*) self->last; | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2481 |         res = PyTuple_Pack(2, action, node); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2482 |         if (res) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2483 |             PyList_Append(self->events, res); | 
 | 2484 |             Py_DECREF(res); | 
 | 2485 |         } else | 
 | 2486 |             PyErr_Clear(); /* FIXME: propagate error */ | 
 | 2487 |     } | 
 | 2488 |  | 
 | 2489 |     Py_INCREF(self->last); | 
 | 2490 |     return (PyObject*) self->last; | 
 | 2491 | } | 
 | 2492 |  | 
 | 2493 | LOCAL(void) | 
 | 2494 | treebuilder_handle_namespace(TreeBuilderObject* self, int start, | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2495 |                              PyObject *prefix, PyObject *uri) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2496 | { | 
 | 2497 |     PyObject* res; | 
 | 2498 |     PyObject* action; | 
 | 2499 |     PyObject* parcel; | 
 | 2500 |  | 
 | 2501 |     if (!self->events) | 
 | 2502 |         return; | 
 | 2503 |  | 
 | 2504 |     if (start) { | 
 | 2505 |         if (!self->start_ns_event_obj) | 
 | 2506 |             return; | 
 | 2507 |         action = self->start_ns_event_obj; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2508 |         parcel = Py_BuildValue("OO", prefix, uri); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2509 |         if (!parcel) | 
 | 2510 |             return; | 
 | 2511 |         Py_INCREF(action); | 
 | 2512 |     } else { | 
 | 2513 |         if (!self->end_ns_event_obj) | 
 | 2514 |             return; | 
 | 2515 |         action = self->end_ns_event_obj; | 
 | 2516 |         Py_INCREF(action); | 
 | 2517 |         parcel = Py_None; | 
 | 2518 |         Py_INCREF(parcel); | 
 | 2519 |     } | 
 | 2520 |  | 
 | 2521 |     res = PyTuple_New(2); | 
 | 2522 |  | 
 | 2523 |     if (res) { | 
 | 2524 |         PyTuple_SET_ITEM(res, 0, action); | 
 | 2525 |         PyTuple_SET_ITEM(res, 1, parcel); | 
 | 2526 |         PyList_Append(self->events, res); | 
 | 2527 |         Py_DECREF(res); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2528 |     } | 
 | 2529 |     else { | 
 | 2530 |         Py_DECREF(action); | 
 | 2531 |         Py_DECREF(parcel); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2532 |         PyErr_Clear(); /* FIXME: propagate error */ | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2533 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2534 | } | 
 | 2535 |  | 
 | 2536 | /* -------------------------------------------------------------------- */ | 
 | 2537 | /* methods (in alphabetical order) */ | 
 | 2538 |  | 
 | 2539 | static PyObject* | 
 | 2540 | treebuilder_data(TreeBuilderObject* self, PyObject* args) | 
 | 2541 | { | 
 | 2542 |     PyObject* data; | 
 | 2543 |     if (!PyArg_ParseTuple(args, "O:data", &data)) | 
 | 2544 |         return NULL; | 
 | 2545 |  | 
 | 2546 |     return treebuilder_handle_data(self, data); | 
 | 2547 | } | 
 | 2548 |  | 
 | 2549 | static PyObject* | 
 | 2550 | treebuilder_end(TreeBuilderObject* self, PyObject* args) | 
 | 2551 | { | 
 | 2552 |     PyObject* tag; | 
 | 2553 |     if (!PyArg_ParseTuple(args, "O:end", &tag)) | 
 | 2554 |         return NULL; | 
 | 2555 |  | 
 | 2556 |     return treebuilder_handle_end(self, tag); | 
 | 2557 | } | 
 | 2558 |  | 
 | 2559 | LOCAL(PyObject*) | 
 | 2560 | treebuilder_done(TreeBuilderObject* self) | 
 | 2561 | { | 
 | 2562 |     PyObject* res; | 
 | 2563 |  | 
 | 2564 |     /* FIXME: check stack size? */ | 
 | 2565 |  | 
 | 2566 |     if (self->root) | 
 | 2567 |         res = self->root; | 
 | 2568 |     else | 
 | 2569 |         res = Py_None; | 
 | 2570 |  | 
 | 2571 |     Py_INCREF(res); | 
 | 2572 |     return res; | 
 | 2573 | } | 
 | 2574 |  | 
 | 2575 | static PyObject* | 
 | 2576 | treebuilder_close(TreeBuilderObject* self, PyObject* args) | 
 | 2577 | { | 
 | 2578 |     if (!PyArg_ParseTuple(args, ":close")) | 
 | 2579 |         return NULL; | 
 | 2580 |  | 
 | 2581 |     return treebuilder_done(self); | 
 | 2582 | } | 
 | 2583 |  | 
 | 2584 | static PyObject* | 
 | 2585 | treebuilder_start(TreeBuilderObject* self, PyObject* args) | 
 | 2586 | { | 
 | 2587 |     PyObject* tag; | 
 | 2588 |     PyObject* attrib = Py_None; | 
 | 2589 |     if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib)) | 
 | 2590 |         return NULL; | 
 | 2591 |  | 
 | 2592 |     return treebuilder_handle_start(self, tag, attrib); | 
 | 2593 | } | 
 | 2594 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2595 | static PyMethodDef treebuilder_methods[] = { | 
 | 2596 |     {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, | 
 | 2597 |     {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, | 
 | 2598 |     {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2599 |     {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, | 
 | 2600 |     {NULL, NULL} | 
 | 2601 | }; | 
 | 2602 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 2603 | static PyTypeObject TreeBuilder_Type = { | 
| Martin v. Löwis | 9f2e346 | 2007-07-21 17:22:18 +0000 | [diff] [blame] | 2604 |     PyVarObject_HEAD_INIT(NULL, 0) | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 2605 |     "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2606 |     /* methods */ | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2607 |     (destructor)treebuilder_dealloc,                /* tp_dealloc */ | 
 | 2608 |     0,                                              /* tp_print */ | 
 | 2609 |     0,                                              /* tp_getattr */ | 
 | 2610 |     0,                                              /* tp_setattr */ | 
 | 2611 |     0,                                              /* tp_reserved */ | 
 | 2612 |     0,                                              /* tp_repr */ | 
 | 2613 |     0,                                              /* tp_as_number */ | 
 | 2614 |     0,                                              /* tp_as_sequence */ | 
 | 2615 |     0,                                              /* tp_as_mapping */ | 
 | 2616 |     0,                                              /* tp_hash */ | 
 | 2617 |     0,                                              /* tp_call */ | 
 | 2618 |     0,                                              /* tp_str */ | 
 | 2619 |     0,                                              /* tp_getattro */ | 
 | 2620 |     0,                                              /* tp_setattro */ | 
 | 2621 |     0,                                              /* tp_as_buffer */ | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2622 |     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, | 
 | 2623 |                                                     /* tp_flags */ | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2624 |     0,                                              /* tp_doc */ | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2625 |     (traverseproc)treebuilder_gc_traverse,          /* tp_traverse */ | 
 | 2626 |     (inquiry)treebuilder_gc_clear,                  /* tp_clear */ | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 2627 |     0,                                              /* tp_richcompare */ | 
 | 2628 |     0,                                              /* tp_weaklistoffset */ | 
 | 2629 |     0,                                              /* tp_iter */ | 
 | 2630 |     0,                                              /* tp_iternext */ | 
 | 2631 |     treebuilder_methods,                            /* tp_methods */ | 
 | 2632 |     0,                                              /* tp_members */ | 
 | 2633 |     0,                                              /* tp_getset */ | 
 | 2634 |     0,                                              /* tp_base */ | 
 | 2635 |     0,                                              /* tp_dict */ | 
 | 2636 |     0,                                              /* tp_descr_get */ | 
 | 2637 |     0,                                              /* tp_descr_set */ | 
 | 2638 |     0,                                              /* tp_dictoffset */ | 
 | 2639 |     (initproc)treebuilder_init,                     /* tp_init */ | 
 | 2640 |     PyType_GenericAlloc,                            /* tp_alloc */ | 
 | 2641 |     treebuilder_new,                                /* tp_new */ | 
 | 2642 |     0,                                              /* tp_free */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2643 | }; | 
 | 2644 |  | 
 | 2645 | /* ==================================================================== */ | 
 | 2646 | /* the expat interface */ | 
 | 2647 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2648 | #include "expat.h" | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2649 | #include "pyexpat.h" | 
| Eli Bendersky | 20d4174 | 2012-06-01 09:48:37 +0300 | [diff] [blame] | 2650 | static struct PyExpat_CAPI *expat_capi; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2651 | #define EXPAT(func) (expat_capi->func) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2652 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 2653 | static XML_Memory_Handling_Suite ExpatMemoryHandler = { | 
 | 2654 |     PyObject_Malloc, PyObject_Realloc, PyObject_Free}; | 
 | 2655 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2656 | typedef struct { | 
 | 2657 |     PyObject_HEAD | 
 | 2658 |  | 
 | 2659 |     XML_Parser parser; | 
 | 2660 |  | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 2661 |     PyObject *target; | 
 | 2662 |     PyObject *entity; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2663 |  | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 2664 |     PyObject *names; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2665 |  | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 2666 |     PyObject *handle_start; | 
 | 2667 |     PyObject *handle_data; | 
 | 2668 |     PyObject *handle_end; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2669 |  | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 2670 |     PyObject *handle_comment; | 
 | 2671 |     PyObject *handle_pi; | 
 | 2672 |     PyObject *handle_doctype; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2673 |  | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 2674 |     PyObject *handle_close; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2675 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2676 | } XMLParserObject; | 
 | 2677 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 2678 | static PyTypeObject XMLParser_Type; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2679 |  | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 2680 | #define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type) | 
 | 2681 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2682 | /* helpers */ | 
 | 2683 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2684 | LOCAL(PyObject*) | 
 | 2685 | makeuniversal(XMLParserObject* self, const char* string) | 
 | 2686 | { | 
 | 2687 |     /* convert a UTF-8 tag/attribute name from the expat parser | 
 | 2688 |        to a universal name string */ | 
 | 2689 |  | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2690 |     Py_ssize_t size = (Py_ssize_t) strlen(string); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2691 |     PyObject* key; | 
 | 2692 |     PyObject* value; | 
 | 2693 |  | 
 | 2694 |     /* look the 'raw' name up in the names dictionary */ | 
| Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame] | 2695 |     key = PyBytes_FromStringAndSize(string, size); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2696 |     if (!key) | 
 | 2697 |         return NULL; | 
 | 2698 |  | 
 | 2699 |     value = PyDict_GetItem(self->names, key); | 
 | 2700 |  | 
 | 2701 |     if (value) { | 
 | 2702 |         Py_INCREF(value); | 
 | 2703 |     } else { | 
 | 2704 |         /* new name.  convert to universal name, and decode as | 
 | 2705 |            necessary */ | 
 | 2706 |  | 
 | 2707 |         PyObject* tag; | 
 | 2708 |         char* p; | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2709 |         Py_ssize_t i; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2710 |  | 
 | 2711 |         /* look for namespace separator */ | 
 | 2712 |         for (i = 0; i < size; i++) | 
 | 2713 |             if (string[i] == '}') | 
 | 2714 |                 break; | 
 | 2715 |         if (i != size) { | 
 | 2716 |             /* convert to universal name */ | 
| Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame] | 2717 |             tag = PyBytes_FromStringAndSize(NULL, size+1); | 
 | 2718 |             p = PyBytes_AS_STRING(tag); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2719 |             p[0] = '{'; | 
 | 2720 |             memcpy(p+1, string, size); | 
 | 2721 |             size++; | 
 | 2722 |         } else { | 
 | 2723 |             /* plain name; use key as tag */ | 
 | 2724 |             Py_INCREF(key); | 
 | 2725 |             tag = key; | 
 | 2726 |         } | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 2727 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2728 |         /* decode universal name */ | 
| Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame] | 2729 |         p = PyBytes_AS_STRING(tag); | 
| Neal Norwitz | 0269b91 | 2007-08-08 06:56:02 +0000 | [diff] [blame] | 2730 |         value = PyUnicode_DecodeUTF8(p, size, "strict"); | 
 | 2731 |         Py_DECREF(tag); | 
 | 2732 |         if (!value) { | 
 | 2733 |             Py_DECREF(key); | 
 | 2734 |             return NULL; | 
 | 2735 |         } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2736 |  | 
 | 2737 |         /* add to names dictionary */ | 
 | 2738 |         if (PyDict_SetItem(self->names, key, value) < 0) { | 
 | 2739 |             Py_DECREF(key); | 
 | 2740 |             Py_DECREF(value); | 
 | 2741 |             return NULL; | 
 | 2742 |         } | 
 | 2743 |     } | 
 | 2744 |  | 
 | 2745 |     Py_DECREF(key); | 
 | 2746 |     return value; | 
 | 2747 | } | 
 | 2748 |  | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 2749 | /* Set the ParseError exception with the given parameters. | 
 | 2750 |  * If message is not NULL, it's used as the error string. Otherwise, the | 
 | 2751 |  * message string is the default for the given error_code. | 
 | 2752 | */ | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2753 | static void | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 2754 | expat_set_error(enum XML_Error error_code, int line, int column, char *message) | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2755 | { | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 2756 |     PyObject *errmsg, *error, *position, *code; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2757 |  | 
| Victor Stinner | 499dfcf | 2011-03-21 13:26:24 +0100 | [diff] [blame] | 2758 |     errmsg = PyUnicode_FromFormat("%s: line %d, column %d", | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 2759 |                 message ? message : EXPAT(ErrorString)(error_code), | 
 | 2760 |                 line, column); | 
| Victor Stinner | 499dfcf | 2011-03-21 13:26:24 +0100 | [diff] [blame] | 2761 |     if (errmsg == NULL) | 
 | 2762 |         return; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2763 |  | 
| Victor Stinner | 499dfcf | 2011-03-21 13:26:24 +0100 | [diff] [blame] | 2764 |     error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg); | 
 | 2765 |     Py_DECREF(errmsg); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2766 |     if (!error) | 
 | 2767 |         return; | 
 | 2768 |  | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 2769 |     /* Add code and position attributes */ | 
 | 2770 |     code = PyLong_FromLong((long)error_code); | 
 | 2771 |     if (!code) { | 
 | 2772 |         Py_DECREF(error); | 
 | 2773 |         return; | 
 | 2774 |     } | 
 | 2775 |     if (PyObject_SetAttrString(error, "code", code) == -1) { | 
 | 2776 |         Py_DECREF(error); | 
 | 2777 |         Py_DECREF(code); | 
 | 2778 |         return; | 
 | 2779 |     } | 
 | 2780 |     Py_DECREF(code); | 
 | 2781 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2782 |     position = Py_BuildValue("(ii)", line, column); | 
 | 2783 |     if (!position) { | 
 | 2784 |         Py_DECREF(error); | 
 | 2785 |         return; | 
 | 2786 |     } | 
 | 2787 |     if (PyObject_SetAttrString(error, "position", position) == -1) { | 
 | 2788 |         Py_DECREF(error); | 
 | 2789 |         Py_DECREF(position); | 
 | 2790 |         return; | 
 | 2791 |     } | 
 | 2792 |     Py_DECREF(position); | 
 | 2793 |  | 
 | 2794 |     PyErr_SetObject(elementtree_parseerror_obj, error); | 
 | 2795 |     Py_DECREF(error); | 
 | 2796 | } | 
 | 2797 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2798 | /* -------------------------------------------------------------------- */ | 
 | 2799 | /* handlers */ | 
 | 2800 |  | 
 | 2801 | static void | 
 | 2802 | expat_default_handler(XMLParserObject* self, const XML_Char* data_in, | 
 | 2803 |                       int data_len) | 
 | 2804 | { | 
 | 2805 |     PyObject* key; | 
 | 2806 |     PyObject* value; | 
 | 2807 |     PyObject* res; | 
 | 2808 |  | 
 | 2809 |     if (data_len < 2 || data_in[0] != '&') | 
 | 2810 |         return; | 
 | 2811 |  | 
| Neal Norwitz | 0269b91 | 2007-08-08 06:56:02 +0000 | [diff] [blame] | 2812 |     key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2813 |     if (!key) | 
 | 2814 |         return; | 
 | 2815 |  | 
 | 2816 |     value = PyDict_GetItem(self->entity, key); | 
 | 2817 |  | 
 | 2818 |     if (value) { | 
 | 2819 |         if (TreeBuilder_CheckExact(self->target)) | 
 | 2820 |             res = treebuilder_handle_data( | 
 | 2821 |                 (TreeBuilderObject*) self->target, value | 
 | 2822 |                 ); | 
 | 2823 |         else if (self->handle_data) | 
 | 2824 |             res = PyObject_CallFunction(self->handle_data, "O", value); | 
 | 2825 |         else | 
 | 2826 |             res = NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2827 |         Py_XDECREF(res); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2828 |     } else if (!PyErr_Occurred()) { | 
 | 2829 |         /* Report the first error, not the last */ | 
| Alexander Belopolsky | e239d23 | 2010-12-08 23:31:48 +0000 | [diff] [blame] | 2830 |         char message[128] = "undefined entity "; | 
 | 2831 |         strncat(message, data_in, data_len < 100?data_len:100); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2832 |         expat_set_error( | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 2833 |             XML_ERROR_UNDEFINED_ENTITY, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2834 |             EXPAT(GetErrorLineNumber)(self->parser), | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 2835 |             EXPAT(GetErrorColumnNumber)(self->parser), | 
 | 2836 |             message | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2837 |             ); | 
 | 2838 |     } | 
 | 2839 |  | 
 | 2840 |     Py_DECREF(key); | 
 | 2841 | } | 
 | 2842 |  | 
 | 2843 | static void | 
 | 2844 | expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, | 
 | 2845 |                     const XML_Char **attrib_in) | 
 | 2846 | { | 
 | 2847 |     PyObject* res; | 
 | 2848 |     PyObject* tag; | 
 | 2849 |     PyObject* attrib; | 
 | 2850 |     int ok; | 
 | 2851 |  | 
 | 2852 |     /* tag name */ | 
 | 2853 |     tag = makeuniversal(self, tag_in); | 
 | 2854 |     if (!tag) | 
 | 2855 |         return; /* parser will look for errors */ | 
 | 2856 |  | 
 | 2857 |     /* attributes */ | 
 | 2858 |     if (attrib_in[0]) { | 
 | 2859 |         attrib = PyDict_New(); | 
 | 2860 |         if (!attrib) | 
 | 2861 |             return; | 
 | 2862 |         while (attrib_in[0] && attrib_in[1]) { | 
 | 2863 |             PyObject* key = makeuniversal(self, attrib_in[0]); | 
| Neal Norwitz | 0269b91 | 2007-08-08 06:56:02 +0000 | [diff] [blame] | 2864 |             PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2865 |             if (!key || !value) { | 
 | 2866 |                 Py_XDECREF(value); | 
 | 2867 |                 Py_XDECREF(key); | 
 | 2868 |                 Py_DECREF(attrib); | 
 | 2869 |                 return; | 
 | 2870 |             } | 
 | 2871 |             ok = PyDict_SetItem(attrib, key, value); | 
 | 2872 |             Py_DECREF(value); | 
 | 2873 |             Py_DECREF(key); | 
 | 2874 |             if (ok < 0) { | 
 | 2875 |                 Py_DECREF(attrib); | 
 | 2876 |                 return; | 
 | 2877 |             } | 
 | 2878 |             attrib_in += 2; | 
 | 2879 |         } | 
 | 2880 |     } else { | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 2881 |         /* Pass an empty dictionary on */ | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2882 |         attrib = PyDict_New(); | 
 | 2883 |         if (!attrib) | 
 | 2884 |             return; | 
 | 2885 |     } | 
 | 2886 |  | 
 | 2887 |     if (TreeBuilder_CheckExact(self->target)) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2888 |         /* shortcut */ | 
 | 2889 |         res = treebuilder_handle_start((TreeBuilderObject*) self->target, | 
 | 2890 |                                        tag, attrib); | 
| Eli Bendersky | 48d358b | 2012-05-30 17:57:50 +0300 | [diff] [blame] | 2891 |     } | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2892 |     else if (self->handle_start) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2893 |         res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2894 |     } else | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2895 |         res = NULL; | 
 | 2896 |  | 
 | 2897 |     Py_DECREF(tag); | 
 | 2898 |     Py_DECREF(attrib); | 
 | 2899 |  | 
 | 2900 |     Py_XDECREF(res); | 
 | 2901 | } | 
 | 2902 |  | 
 | 2903 | static void | 
 | 2904 | expat_data_handler(XMLParserObject* self, const XML_Char* data_in, | 
 | 2905 |                    int data_len) | 
 | 2906 | { | 
 | 2907 |     PyObject* data; | 
 | 2908 |     PyObject* res; | 
 | 2909 |  | 
| Neal Norwitz | 0269b91 | 2007-08-08 06:56:02 +0000 | [diff] [blame] | 2910 |     data = PyUnicode_DecodeUTF8(data_in, data_len, "strict"); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2911 |     if (!data) | 
 | 2912 |         return; /* parser will look for errors */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2913 |  | 
 | 2914 |     if (TreeBuilder_CheckExact(self->target)) | 
 | 2915 |         /* shortcut */ | 
 | 2916 |         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); | 
 | 2917 |     else if (self->handle_data) | 
 | 2918 |         res = PyObject_CallFunction(self->handle_data, "O", data); | 
 | 2919 |     else | 
 | 2920 |         res = NULL; | 
 | 2921 |  | 
 | 2922 |     Py_DECREF(data); | 
 | 2923 |  | 
 | 2924 |     Py_XDECREF(res); | 
 | 2925 | } | 
 | 2926 |  | 
 | 2927 | static void | 
 | 2928 | expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) | 
 | 2929 | { | 
 | 2930 |     PyObject* tag; | 
 | 2931 |     PyObject* res = NULL; | 
 | 2932 |  | 
 | 2933 |     if (TreeBuilder_CheckExact(self->target)) | 
 | 2934 |         /* shortcut */ | 
 | 2935 |         /* the standard tree builder doesn't look at the end tag */ | 
 | 2936 |         res = treebuilder_handle_end( | 
 | 2937 |             (TreeBuilderObject*) self->target, Py_None | 
 | 2938 |             ); | 
 | 2939 |     else if (self->handle_end) { | 
 | 2940 |         tag = makeuniversal(self, tag_in); | 
 | 2941 |         if (tag) { | 
 | 2942 |             res = PyObject_CallFunction(self->handle_end, "O", tag); | 
 | 2943 |             Py_DECREF(tag); | 
 | 2944 |         } | 
 | 2945 |     } | 
 | 2946 |  | 
 | 2947 |     Py_XDECREF(res); | 
 | 2948 | } | 
 | 2949 |  | 
 | 2950 | static void | 
 | 2951 | expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, | 
 | 2952 |                        const XML_Char *uri) | 
 | 2953 | { | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2954 |     PyObject* sprefix = NULL; | 
 | 2955 |     PyObject* suri = NULL; | 
 | 2956 |  | 
 | 2957 |     suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict"); | 
 | 2958 |     if (!suri) | 
 | 2959 |         return; | 
 | 2960 |  | 
 | 2961 |     if (prefix) | 
 | 2962 |         sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict"); | 
 | 2963 |     else | 
 | 2964 |         sprefix = PyUnicode_FromString(""); | 
 | 2965 |     if (!sprefix) { | 
 | 2966 |         Py_DECREF(suri); | 
 | 2967 |         return; | 
 | 2968 |     } | 
 | 2969 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2970 |     treebuilder_handle_namespace( | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2971 |         (TreeBuilderObject*) self->target, 1, sprefix, suri | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2972 |         ); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 2973 |  | 
 | 2974 |     Py_DECREF(sprefix); | 
 | 2975 |     Py_DECREF(suri); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2976 | } | 
 | 2977 |  | 
 | 2978 | static void | 
 | 2979 | expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) | 
 | 2980 | { | 
 | 2981 |     treebuilder_handle_namespace( | 
 | 2982 |         (TreeBuilderObject*) self->target, 0, NULL, NULL | 
 | 2983 |         ); | 
 | 2984 | } | 
 | 2985 |  | 
 | 2986 | static void | 
 | 2987 | expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) | 
 | 2988 | { | 
 | 2989 |     PyObject* comment; | 
 | 2990 |     PyObject* res; | 
 | 2991 |  | 
 | 2992 |     if (self->handle_comment) { | 
| Neal Norwitz | 0269b91 | 2007-08-08 06:56:02 +0000 | [diff] [blame] | 2993 |         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2994 |         if (comment) { | 
 | 2995 |             res = PyObject_CallFunction(self->handle_comment, "O", comment); | 
 | 2996 |             Py_XDECREF(res); | 
 | 2997 |             Py_DECREF(comment); | 
 | 2998 |         } | 
 | 2999 |     } | 
 | 3000 | } | 
 | 3001 |  | 
| Eli Bendersky | 4583990 | 2013-01-13 05:14:47 -0800 | [diff] [blame] | 3002 | static void | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 3003 | expat_start_doctype_handler(XMLParserObject *self, | 
 | 3004 |                             const XML_Char *doctype_name, | 
 | 3005 |                             const XML_Char *sysid, | 
 | 3006 |                             const XML_Char *pubid, | 
 | 3007 |                             int has_internal_subset) | 
 | 3008 | { | 
 | 3009 |     PyObject *self_pyobj = (PyObject *)self; | 
 | 3010 |     PyObject *doctype_name_obj, *sysid_obj, *pubid_obj; | 
 | 3011 |     PyObject *parser_doctype = NULL; | 
 | 3012 |     PyObject *res = NULL; | 
 | 3013 |  | 
 | 3014 |     doctype_name_obj = makeuniversal(self, doctype_name); | 
 | 3015 |     if (!doctype_name_obj) | 
 | 3016 |         return; | 
 | 3017 |  | 
 | 3018 |     if (sysid) { | 
 | 3019 |         sysid_obj = makeuniversal(self, sysid); | 
 | 3020 |         if (!sysid_obj) { | 
 | 3021 |             Py_DECREF(doctype_name_obj); | 
 | 3022 |             return; | 
 | 3023 |         } | 
 | 3024 |     } else { | 
 | 3025 |         Py_INCREF(Py_None); | 
 | 3026 |         sysid_obj = Py_None; | 
 | 3027 |     } | 
 | 3028 |  | 
 | 3029 |     if (pubid) { | 
 | 3030 |         pubid_obj = makeuniversal(self, pubid); | 
 | 3031 |         if (!pubid_obj) { | 
 | 3032 |             Py_DECREF(doctype_name_obj); | 
 | 3033 |             Py_DECREF(sysid_obj); | 
 | 3034 |             return; | 
 | 3035 |         } | 
 | 3036 |     } else { | 
 | 3037 |         Py_INCREF(Py_None); | 
 | 3038 |         pubid_obj = Py_None; | 
 | 3039 |     } | 
 | 3040 |  | 
 | 3041 |     /* If the target has a handler for doctype, call it. */ | 
 | 3042 |     if (self->handle_doctype) { | 
 | 3043 |         res = PyObject_CallFunction(self->handle_doctype, "OOO", | 
 | 3044 |                                     doctype_name_obj, pubid_obj, sysid_obj); | 
 | 3045 |         Py_CLEAR(res); | 
 | 3046 |     } | 
 | 3047 |  | 
 | 3048 |     /* Now see if the parser itself has a doctype method. If yes and it's | 
 | 3049 |      * a subclass, call it but warn about deprecation. If it's not a subclass | 
 | 3050 |      * (i.e. vanilla XMLParser), do nothing. | 
 | 3051 |      */ | 
 | 3052 |     parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype"); | 
 | 3053 |     if (parser_doctype) { | 
 | 3054 |         if (!XMLParser_CheckExact(self_pyobj)) { | 
 | 3055 |             if (PyErr_WarnEx(PyExc_DeprecationWarning, | 
 | 3056 |                             "This method of XMLParser is deprecated.  Define" | 
 | 3057 |                             " doctype() method on the TreeBuilder target.", | 
 | 3058 |                             1) < 0) { | 
 | 3059 |                 goto clear; | 
 | 3060 |             } | 
 | 3061 |             res = PyObject_CallFunction(parser_doctype, "OOO", | 
 | 3062 |                                         doctype_name_obj, pubid_obj, sysid_obj); | 
 | 3063 |             Py_CLEAR(res); | 
 | 3064 |         } | 
 | 3065 |     } | 
 | 3066 |  | 
 | 3067 | clear: | 
 | 3068 |     Py_XDECREF(parser_doctype); | 
 | 3069 |     Py_DECREF(doctype_name_obj); | 
 | 3070 |     Py_DECREF(pubid_obj); | 
 | 3071 |     Py_DECREF(sysid_obj); | 
 | 3072 | } | 
 | 3073 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3074 | static void | 
 | 3075 | expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, | 
 | 3076 |                  const XML_Char* data_in) | 
 | 3077 | { | 
 | 3078 |     PyObject* target; | 
 | 3079 |     PyObject* data; | 
 | 3080 |     PyObject* res; | 
 | 3081 |  | 
 | 3082 |     if (self->handle_pi) { | 
| Neal Norwitz | 0269b91 | 2007-08-08 06:56:02 +0000 | [diff] [blame] | 3083 |         target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); | 
 | 3084 |         data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3085 |         if (target && data) { | 
 | 3086 |             res = PyObject_CallFunction(self->handle_pi, "OO", target, data); | 
 | 3087 |             Py_XDECREF(res); | 
 | 3088 |             Py_DECREF(data); | 
 | 3089 |             Py_DECREF(target); | 
 | 3090 |         } else { | 
 | 3091 |             Py_XDECREF(data); | 
 | 3092 |             Py_XDECREF(target); | 
 | 3093 |         } | 
 | 3094 |     } | 
 | 3095 | } | 
 | 3096 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3097 | static int | 
 | 3098 | expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, | 
 | 3099 |                                XML_Encoding *info) | 
 | 3100 | { | 
 | 3101 |     PyObject* u; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3102 |     unsigned char s[256]; | 
 | 3103 |     int i; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 3104 |     void *data; | 
 | 3105 |     unsigned int kind; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3106 |  | 
 | 3107 |     memset(info, 0, sizeof(XML_Encoding)); | 
 | 3108 |  | 
 | 3109 |     for (i = 0; i < 256; i++) | 
 | 3110 |         s[i] = i; | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 3111 |  | 
| Fredrik Lundh | c338999 | 2005-12-25 11:40:19 +0000 | [diff] [blame] | 3112 |     u = PyUnicode_Decode((char*) s, 256, name, "replace"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3113 |     if (!u) | 
 | 3114 |         return XML_STATUS_ERROR; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 3115 |     if (PyUnicode_READY(u)) | 
 | 3116 |         return XML_STATUS_ERROR; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3117 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 3118 |     if (PyUnicode_GET_LENGTH(u) != 256) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3119 |         Py_DECREF(u); | 
 | 3120 |         return XML_STATUS_ERROR; | 
 | 3121 |     } | 
 | 3122 |  | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 3123 |     kind = PyUnicode_KIND(u); | 
 | 3124 |     data = PyUnicode_DATA(u); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3125 |     for (i = 0; i < 256; i++) { | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 3126 |         Py_UCS4 ch = PyUnicode_READ(kind, data, i); | 
 | 3127 |         if (ch != Py_UNICODE_REPLACEMENT_CHARACTER) | 
 | 3128 |             info->map[i] = ch; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3129 |         else | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3130 |             info->map[i] = -1; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3131 |     } | 
 | 3132 |  | 
 | 3133 |     Py_DECREF(u); | 
 | 3134 |  | 
 | 3135 |     return XML_STATUS_OK; | 
 | 3136 | } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3137 |  | 
 | 3138 | /* -------------------------------------------------------------------- */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3139 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3140 | static PyObject * | 
 | 3141 | xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3142 | { | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3143 |     XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0); | 
 | 3144 |     if (self) { | 
 | 3145 |         self->parser = NULL; | 
 | 3146 |         self->target = self->entity = self->names = NULL; | 
 | 3147 |         self->handle_start = self->handle_data = self->handle_end = NULL; | 
 | 3148 |         self->handle_comment = self->handle_pi = self->handle_close = NULL; | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 3149 |         self->handle_doctype = NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3150 |     } | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3151 |     return (PyObject *)self; | 
 | 3152 | } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3153 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3154 | static int | 
 | 3155 | xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds) | 
 | 3156 | { | 
 | 3157 |     XMLParserObject *self_xp = (XMLParserObject *)self; | 
 | 3158 |     PyObject *target = NULL, *html = NULL; | 
 | 3159 |     char *encoding = NULL; | 
| Eli Bendersky | c68e136 | 2012-06-03 06:09:42 +0300 | [diff] [blame] | 3160 |     static char *kwlist[] = {"html", "target", "encoding", 0}; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3161 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3162 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist, | 
 | 3163 |                                      &html, &target, &encoding)) { | 
 | 3164 |         return -1; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3165 |     } | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 3166 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3167 |     self_xp->entity = PyDict_New(); | 
 | 3168 |     if (!self_xp->entity) | 
 | 3169 |         return -1; | 
 | 3170 |  | 
 | 3171 |     self_xp->names = PyDict_New(); | 
 | 3172 |     if (!self_xp->names) { | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 3173 |         Py_CLEAR(self_xp->entity); | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3174 |         return -1; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3175 |     } | 
 | 3176 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3177 |     self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}"); | 
 | 3178 |     if (!self_xp->parser) { | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 3179 |         Py_CLEAR(self_xp->entity); | 
 | 3180 |         Py_CLEAR(self_xp->names); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3181 |         PyErr_NoMemory(); | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3182 |         return -1; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3183 |     } | 
 | 3184 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3185 |     if (target) { | 
 | 3186 |         Py_INCREF(target); | 
 | 3187 |     } else { | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 3188 |         target = treebuilder_new(&TreeBuilder_Type, NULL, NULL); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3189 |         if (!target) { | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 3190 |             Py_CLEAR(self_xp->entity); | 
 | 3191 |             Py_CLEAR(self_xp->names); | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3192 |             EXPAT(ParserFree)(self_xp->parser); | 
 | 3193 |             return -1; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3194 |         } | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3195 |     } | 
 | 3196 |     self_xp->target = target; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3197 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3198 |     self_xp->handle_start = PyObject_GetAttrString(target, "start"); | 
 | 3199 |     self_xp->handle_data = PyObject_GetAttrString(target, "data"); | 
 | 3200 |     self_xp->handle_end = PyObject_GetAttrString(target, "end"); | 
 | 3201 |     self_xp->handle_comment = PyObject_GetAttrString(target, "comment"); | 
 | 3202 |     self_xp->handle_pi = PyObject_GetAttrString(target, "pi"); | 
 | 3203 |     self_xp->handle_close = PyObject_GetAttrString(target, "close"); | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 3204 |     self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3205 |  | 
 | 3206 |     PyErr_Clear(); | 
| Eli Bendersky | 4583990 | 2013-01-13 05:14:47 -0800 | [diff] [blame] | 3207 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3208 |     /* configure parser */ | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3209 |     EXPAT(SetUserData)(self_xp->parser, self_xp); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3210 |     EXPAT(SetElementHandler)( | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3211 |         self_xp->parser, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3212 |         (XML_StartElementHandler) expat_start_handler, | 
 | 3213 |         (XML_EndElementHandler) expat_end_handler | 
 | 3214 |         ); | 
 | 3215 |     EXPAT(SetDefaultHandlerExpand)( | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3216 |         self_xp->parser, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3217 |         (XML_DefaultHandler) expat_default_handler | 
 | 3218 |         ); | 
 | 3219 |     EXPAT(SetCharacterDataHandler)( | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3220 |         self_xp->parser, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3221 |         (XML_CharacterDataHandler) expat_data_handler | 
 | 3222 |         ); | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3223 |     if (self_xp->handle_comment) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3224 |         EXPAT(SetCommentHandler)( | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3225 |             self_xp->parser, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3226 |             (XML_CommentHandler) expat_comment_handler | 
 | 3227 |             ); | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3228 |     if (self_xp->handle_pi) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3229 |         EXPAT(SetProcessingInstructionHandler)( | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3230 |             self_xp->parser, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3231 |             (XML_ProcessingInstructionHandler) expat_pi_handler | 
 | 3232 |             ); | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 3233 |     EXPAT(SetStartDoctypeDeclHandler)( | 
 | 3234 |         self_xp->parser, | 
 | 3235 |         (XML_StartDoctypeDeclHandler) expat_start_doctype_handler | 
 | 3236 |         ); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3237 |     EXPAT(SetUnknownEncodingHandler)( | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3238 |         self_xp->parser, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3239 |         (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL | 
 | 3240 |         ); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3241 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3242 |     return 0; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3243 | } | 
 | 3244 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3245 | static int | 
 | 3246 | xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg) | 
 | 3247 | { | 
 | 3248 |     Py_VISIT(self->handle_close); | 
 | 3249 |     Py_VISIT(self->handle_pi); | 
 | 3250 |     Py_VISIT(self->handle_comment); | 
 | 3251 |     Py_VISIT(self->handle_end); | 
 | 3252 |     Py_VISIT(self->handle_data); | 
 | 3253 |     Py_VISIT(self->handle_start); | 
 | 3254 |  | 
 | 3255 |     Py_VISIT(self->target); | 
 | 3256 |     Py_VISIT(self->entity); | 
 | 3257 |     Py_VISIT(self->names); | 
 | 3258 |  | 
 | 3259 |     return 0; | 
 | 3260 | } | 
 | 3261 |  | 
 | 3262 | static int | 
 | 3263 | xmlparser_gc_clear(XMLParserObject *self) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3264 | { | 
 | 3265 |     EXPAT(ParserFree)(self->parser); | 
 | 3266 |  | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 3267 |     Py_CLEAR(self->handle_close); | 
 | 3268 |     Py_CLEAR(self->handle_pi); | 
 | 3269 |     Py_CLEAR(self->handle_comment); | 
 | 3270 |     Py_CLEAR(self->handle_end); | 
 | 3271 |     Py_CLEAR(self->handle_data); | 
 | 3272 |     Py_CLEAR(self->handle_start); | 
 | 3273 |     Py_CLEAR(self->handle_doctype); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3274 |  | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 3275 |     Py_CLEAR(self->target); | 
 | 3276 |     Py_CLEAR(self->entity); | 
 | 3277 |     Py_CLEAR(self->names); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3278 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3279 |     return 0; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3280 | } | 
 | 3281 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3282 | static void | 
 | 3283 | xmlparser_dealloc(XMLParserObject* self) | 
 | 3284 | { | 
 | 3285 |     PyObject_GC_UnTrack(self); | 
 | 3286 |     xmlparser_gc_clear(self); | 
 | 3287 |     Py_TYPE(self)->tp_free((PyObject *)self); | 
 | 3288 | } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3289 |  | 
 | 3290 | LOCAL(PyObject*) | 
 | 3291 | expat_parse(XMLParserObject* self, char* data, int data_len, int final) | 
 | 3292 | { | 
 | 3293 |     int ok; | 
 | 3294 |  | 
 | 3295 |     ok = EXPAT(Parse)(self->parser, data, data_len, final); | 
 | 3296 |  | 
 | 3297 |     if (PyErr_Occurred()) | 
 | 3298 |         return NULL; | 
 | 3299 |  | 
 | 3300 |     if (!ok) { | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3301 |         expat_set_error( | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 3302 |             EXPAT(GetErrorCode)(self->parser), | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3303 |             EXPAT(GetErrorLineNumber)(self->parser), | 
| Eli Bendersky | 5b77d81 | 2012-03-16 08:20:05 +0200 | [diff] [blame] | 3304 |             EXPAT(GetErrorColumnNumber)(self->parser), | 
 | 3305 |             NULL | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3306 |             ); | 
 | 3307 |         return NULL; | 
 | 3308 |     } | 
 | 3309 |  | 
 | 3310 |     Py_RETURN_NONE; | 
 | 3311 | } | 
 | 3312 |  | 
 | 3313 | static PyObject* | 
 | 3314 | xmlparser_close(XMLParserObject* self, PyObject* args) | 
 | 3315 | { | 
 | 3316 |     /* end feeding data to parser */ | 
 | 3317 |  | 
 | 3318 |     PyObject* res; | 
 | 3319 |     if (!PyArg_ParseTuple(args, ":close")) | 
 | 3320 |         return NULL; | 
 | 3321 |  | 
 | 3322 |     res = expat_parse(self, "", 0, 1); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3323 |     if (!res) | 
 | 3324 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3325 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3326 |     if (TreeBuilder_CheckExact(self->target)) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3327 |         Py_DECREF(res); | 
 | 3328 |         return treebuilder_done((TreeBuilderObject*) self->target); | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3329 |     } if (self->handle_close) { | 
 | 3330 |         Py_DECREF(res); | 
 | 3331 |         return PyObject_CallFunction(self->handle_close, ""); | 
 | 3332 |     } else | 
 | 3333 |         return res; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3334 | } | 
 | 3335 |  | 
 | 3336 | static PyObject* | 
 | 3337 | xmlparser_feed(XMLParserObject* self, PyObject* args) | 
 | 3338 | { | 
 | 3339 |     /* feed data to parser */ | 
 | 3340 |  | 
 | 3341 |     char* data; | 
 | 3342 |     int data_len; | 
 | 3343 |     if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len)) | 
 | 3344 |         return NULL; | 
 | 3345 |  | 
 | 3346 |     return expat_parse(self, data, data_len, 0); | 
 | 3347 | } | 
 | 3348 |  | 
 | 3349 | static PyObject* | 
 | 3350 | xmlparser_parse(XMLParserObject* self, PyObject* args) | 
 | 3351 | { | 
 | 3352 |     /* (internal) parse until end of input stream */ | 
 | 3353 |  | 
 | 3354 |     PyObject* reader; | 
 | 3355 |     PyObject* buffer; | 
| Eli Bendersky | f996e77 | 2012-03-16 05:53:30 +0200 | [diff] [blame] | 3356 |     PyObject* temp; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3357 |     PyObject* res; | 
 | 3358 |  | 
 | 3359 |     PyObject* fileobj; | 
 | 3360 |     if (!PyArg_ParseTuple(args, "O:_parse", &fileobj)) | 
 | 3361 |         return NULL; | 
 | 3362 |  | 
 | 3363 |     reader = PyObject_GetAttrString(fileobj, "read"); | 
 | 3364 |     if (!reader) | 
 | 3365 |         return NULL; | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 3366 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3367 |     /* read from open file object */ | 
 | 3368 |     for (;;) { | 
 | 3369 |  | 
 | 3370 |         buffer = PyObject_CallFunction(reader, "i", 64*1024); | 
 | 3371 |  | 
 | 3372 |         if (!buffer) { | 
 | 3373 |             /* read failed (e.g. due to KeyboardInterrupt) */ | 
 | 3374 |             Py_DECREF(reader); | 
 | 3375 |             return NULL; | 
 | 3376 |         } | 
 | 3377 |  | 
| Eli Bendersky | f996e77 | 2012-03-16 05:53:30 +0200 | [diff] [blame] | 3378 |         if (PyUnicode_CheckExact(buffer)) { | 
 | 3379 |             /* A unicode object is encoded into bytes using UTF-8 */ | 
 | 3380 |             if (PyUnicode_GET_SIZE(buffer) == 0) { | 
 | 3381 |                 Py_DECREF(buffer); | 
 | 3382 |                 break; | 
 | 3383 |             } | 
 | 3384 |             temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass"); | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 3385 |             Py_DECREF(buffer); | 
| Eli Bendersky | f996e77 | 2012-03-16 05:53:30 +0200 | [diff] [blame] | 3386 |             if (!temp) { | 
 | 3387 |                 /* Propagate exception from PyUnicode_AsEncodedString */ | 
| Eli Bendersky | f996e77 | 2012-03-16 05:53:30 +0200 | [diff] [blame] | 3388 |                 Py_DECREF(reader); | 
 | 3389 |                 return NULL; | 
 | 3390 |             } | 
| Eli Bendersky | f996e77 | 2012-03-16 05:53:30 +0200 | [diff] [blame] | 3391 |             buffer = temp; | 
 | 3392 |         } | 
 | 3393 |         else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3394 |             Py_DECREF(buffer); | 
 | 3395 |             break; | 
 | 3396 |         } | 
 | 3397 |  | 
 | 3398 |         res = expat_parse( | 
| Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame] | 3399 |             self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0 | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3400 |             ); | 
 | 3401 |  | 
 | 3402 |         Py_DECREF(buffer); | 
 | 3403 |  | 
 | 3404 |         if (!res) { | 
 | 3405 |             Py_DECREF(reader); | 
 | 3406 |             return NULL; | 
 | 3407 |         } | 
 | 3408 |         Py_DECREF(res); | 
 | 3409 |  | 
 | 3410 |     } | 
 | 3411 |  | 
 | 3412 |     Py_DECREF(reader); | 
 | 3413 |  | 
 | 3414 |     res = expat_parse(self, "", 0, 1); | 
 | 3415 |  | 
 | 3416 |     if (res && TreeBuilder_CheckExact(self->target)) { | 
 | 3417 |         Py_DECREF(res); | 
 | 3418 |         return treebuilder_done((TreeBuilderObject*) self->target); | 
 | 3419 |     } | 
 | 3420 |  | 
 | 3421 |     return res; | 
 | 3422 | } | 
 | 3423 |  | 
 | 3424 | static PyObject* | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 3425 | xmlparser_doctype(XMLParserObject *self, PyObject *args) | 
 | 3426 | { | 
 | 3427 |     Py_RETURN_NONE; | 
 | 3428 | } | 
 | 3429 |  | 
 | 3430 | static PyObject* | 
 | 3431 | xmlparser_setevents(XMLParserObject *self, PyObject* args) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3432 | { | 
 | 3433 |     /* activate element event reporting */ | 
 | 3434 |  | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 3435 |     Py_ssize_t i; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3436 |     TreeBuilderObject* target; | 
 | 3437 |  | 
 | 3438 |     PyObject* events; /* event collector */ | 
 | 3439 |     PyObject* event_set = Py_None; | 
 | 3440 |     if (!PyArg_ParseTuple(args, "O!|O:_setevents",  &PyList_Type, &events, | 
 | 3441 |                           &event_set)) | 
 | 3442 |         return NULL; | 
 | 3443 |  | 
 | 3444 |     if (!TreeBuilder_CheckExact(self->target)) { | 
 | 3445 |         PyErr_SetString( | 
 | 3446 |             PyExc_TypeError, | 
| Florent Xicluna | a72a98f | 2012-02-13 11:03:30 +0100 | [diff] [blame] | 3447 |             "event handling only supported for ElementTree.TreeBuilder " | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3448 |             "targets" | 
 | 3449 |             ); | 
 | 3450 |         return NULL; | 
 | 3451 |     } | 
 | 3452 |  | 
 | 3453 |     target = (TreeBuilderObject*) self->target; | 
 | 3454 |  | 
 | 3455 |     Py_INCREF(events); | 
 | 3456 |     Py_XDECREF(target->events); | 
 | 3457 |     target->events = events; | 
 | 3458 |  | 
 | 3459 |     /* clear out existing events */ | 
| Antoine Pitrou | c194884 | 2012-10-01 23:40:37 +0200 | [diff] [blame] | 3460 |     Py_CLEAR(target->start_event_obj); | 
 | 3461 |     Py_CLEAR(target->end_event_obj); | 
 | 3462 |     Py_CLEAR(target->start_ns_event_obj); | 
 | 3463 |     Py_CLEAR(target->end_ns_event_obj); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3464 |  | 
 | 3465 |     if (event_set == Py_None) { | 
 | 3466 |         /* default is "end" only */ | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3467 |         target->end_event_obj = PyUnicode_FromString("end"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3468 |         Py_RETURN_NONE; | 
 | 3469 |     } | 
 | 3470 |  | 
 | 3471 |     if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */ | 
 | 3472 |         goto error; | 
 | 3473 |  | 
 | 3474 |     for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) { | 
 | 3475 |         PyObject* item = PyTuple_GET_ITEM(event_set, i); | 
 | 3476 |         char* event; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3477 |         if (PyUnicode_Check(item)) { | 
 | 3478 |             event = _PyUnicode_AsString(item); | 
| Victor Stinner | 0477bf3 | 2010-03-22 12:11:44 +0000 | [diff] [blame] | 3479 |             if (event == NULL) | 
 | 3480 |                 goto error; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3481 |         } else if (PyBytes_Check(item)) | 
 | 3482 |             event = PyBytes_AS_STRING(item); | 
 | 3483 |         else { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3484 |             goto error; | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3485 |         } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3486 |         if (strcmp(event, "start") == 0) { | 
 | 3487 |             Py_INCREF(item); | 
 | 3488 |             target->start_event_obj = item; | 
 | 3489 |         } else if (strcmp(event, "end") == 0) { | 
 | 3490 |             Py_INCREF(item); | 
 | 3491 |             Py_XDECREF(target->end_event_obj); | 
 | 3492 |             target->end_event_obj = item; | 
 | 3493 |         } else if (strcmp(event, "start-ns") == 0) { | 
 | 3494 |             Py_INCREF(item); | 
 | 3495 |             Py_XDECREF(target->start_ns_event_obj); | 
 | 3496 |             target->start_ns_event_obj = item; | 
 | 3497 |             EXPAT(SetNamespaceDeclHandler)( | 
 | 3498 |                 self->parser, | 
 | 3499 |                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, | 
 | 3500 |                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler | 
 | 3501 |                 ); | 
 | 3502 |         } else if (strcmp(event, "end-ns") == 0) { | 
 | 3503 |             Py_INCREF(item); | 
 | 3504 |             Py_XDECREF(target->end_ns_event_obj); | 
 | 3505 |             target->end_ns_event_obj = item; | 
 | 3506 |             EXPAT(SetNamespaceDeclHandler)( | 
 | 3507 |                 self->parser, | 
 | 3508 |                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, | 
 | 3509 |                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler | 
 | 3510 |                 ); | 
 | 3511 |         } else { | 
 | 3512 |             PyErr_Format( | 
 | 3513 |                 PyExc_ValueError, | 
 | 3514 |                 "unknown event '%s'", event | 
 | 3515 |                 ); | 
 | 3516 |             return NULL; | 
 | 3517 |         } | 
 | 3518 |     } | 
 | 3519 |  | 
 | 3520 |     Py_RETURN_NONE; | 
 | 3521 |  | 
 | 3522 |   error: | 
 | 3523 |     PyErr_SetString( | 
 | 3524 |         PyExc_TypeError, | 
 | 3525 |         "invalid event tuple" | 
 | 3526 |         ); | 
 | 3527 |     return NULL; | 
 | 3528 | } | 
 | 3529 |  | 
 | 3530 | static PyMethodDef xmlparser_methods[] = { | 
 | 3531 |     {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS}, | 
 | 3532 |     {"close", (PyCFunction) xmlparser_close, METH_VARARGS}, | 
 | 3533 |     {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS}, | 
 | 3534 |     {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS}, | 
| Eli Bendersky | 2b6b73e | 2012-06-01 11:32:34 +0300 | [diff] [blame] | 3535 |     {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3536 |     {NULL, NULL} | 
 | 3537 | }; | 
 | 3538 |  | 
| Victor Stinner | bfc7bf0 | 2011-03-21 13:23:42 +0100 | [diff] [blame] | 3539 | static PyObject* | 
| Amaury Forgeot d'Arc | ba4105c | 2008-07-02 21:41:01 +0000 | [diff] [blame] | 3540 | xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3541 | { | 
| Alexander Belopolsky | e239d23 | 2010-12-08 23:31:48 +0000 | [diff] [blame] | 3542 |     if (PyUnicode_Check(nameobj)) { | 
 | 3543 |         PyObject* res; | 
 | 3544 |         if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0) | 
 | 3545 |             res = self->entity; | 
 | 3546 |         else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0) | 
 | 3547 |             res = self->target; | 
 | 3548 |         else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) { | 
 | 3549 |             return PyUnicode_FromFormat( | 
 | 3550 |                 "Expat %d.%d.%d", XML_MAJOR_VERSION, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3551 |                 XML_MINOR_VERSION, XML_MICRO_VERSION); | 
| Alexander Belopolsky | e239d23 | 2010-12-08 23:31:48 +0000 | [diff] [blame] | 3552 |         } | 
 | 3553 |         else | 
 | 3554 |             goto generic; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3555 |  | 
| Alexander Belopolsky | e239d23 | 2010-12-08 23:31:48 +0000 | [diff] [blame] | 3556 |         Py_INCREF(res); | 
 | 3557 |         return res; | 
 | 3558 |     } | 
 | 3559 |   generic: | 
 | 3560 |     return PyObject_GenericGetAttr((PyObject*) self, nameobj); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3561 | } | 
 | 3562 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 3563 | static PyTypeObject XMLParser_Type = { | 
| Martin v. Löwis | 9f2e346 | 2007-07-21 17:22:18 +0000 | [diff] [blame] | 3564 |     PyVarObject_HEAD_INIT(NULL, 0) | 
| Eli Bendersky | 698bdb2 | 2013-01-10 06:01:06 -0800 | [diff] [blame] | 3565 |     "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3566 |     /* methods */ | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3567 |     (destructor)xmlparser_dealloc,                  /* tp_dealloc */ | 
 | 3568 |     0,                                              /* tp_print */ | 
 | 3569 |     0,                                              /* tp_getattr */ | 
 | 3570 |     0,                                              /* tp_setattr */ | 
 | 3571 |     0,                                              /* tp_reserved */ | 
 | 3572 |     0,                                              /* tp_repr */ | 
 | 3573 |     0,                                              /* tp_as_number */ | 
 | 3574 |     0,                                              /* tp_as_sequence */ | 
 | 3575 |     0,                                              /* tp_as_mapping */ | 
 | 3576 |     0,                                              /* tp_hash */ | 
 | 3577 |     0,                                              /* tp_call */ | 
 | 3578 |     0,                                              /* tp_str */ | 
 | 3579 |     (getattrofunc)xmlparser_getattro,               /* tp_getattro */ | 
 | 3580 |     0,                                              /* tp_setattro */ | 
 | 3581 |     0,                                              /* tp_as_buffer */ | 
 | 3582 |     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, | 
 | 3583 |                                                     /* tp_flags */ | 
 | 3584 |     0,                                              /* tp_doc */ | 
 | 3585 |     (traverseproc)xmlparser_gc_traverse,            /* tp_traverse */ | 
 | 3586 |     (inquiry)xmlparser_gc_clear,                    /* tp_clear */ | 
 | 3587 |     0,                                              /* tp_richcompare */ | 
 | 3588 |     0,                                              /* tp_weaklistoffset */ | 
 | 3589 |     0,                                              /* tp_iter */ | 
 | 3590 |     0,                                              /* tp_iternext */ | 
 | 3591 |     xmlparser_methods,                              /* tp_methods */ | 
 | 3592 |     0,                                              /* tp_members */ | 
 | 3593 |     0,                                              /* tp_getset */ | 
 | 3594 |     0,                                              /* tp_base */ | 
 | 3595 |     0,                                              /* tp_dict */ | 
 | 3596 |     0,                                              /* tp_descr_get */ | 
 | 3597 |     0,                                              /* tp_descr_set */ | 
 | 3598 |     0,                                              /* tp_dictoffset */ | 
 | 3599 |     (initproc)xmlparser_init,                       /* tp_init */ | 
 | 3600 |     PyType_GenericAlloc,                            /* tp_alloc */ | 
 | 3601 |     xmlparser_new,                                  /* tp_new */ | 
 | 3602 |     0,                                              /* tp_free */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3603 | }; | 
 | 3604 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3605 | /* ==================================================================== */ | 
 | 3606 | /* python module interface */ | 
 | 3607 |  | 
 | 3608 | static PyMethodDef _functions[] = { | 
| Eli Bendersky | a873690 | 2013-01-05 06:26:39 -0800 | [diff] [blame] | 3609 |     {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS}, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3610 |     {NULL, NULL} | 
 | 3611 | }; | 
 | 3612 |  | 
| Martin v. Löwis | 1a21451 | 2008-06-11 05:26:20 +0000 | [diff] [blame] | 3613 |  | 
 | 3614 | static struct PyModuleDef _elementtreemodule = { | 
| Alexander Belopolsky | f0f4514 | 2010-08-11 17:31:17 +0000 | [diff] [blame] | 3615 |         PyModuleDef_HEAD_INIT, | 
 | 3616 |         "_elementtree", | 
 | 3617 |         NULL, | 
 | 3618 |         -1, | 
 | 3619 |         _functions, | 
 | 3620 |         NULL, | 
 | 3621 |         NULL, | 
 | 3622 |         NULL, | 
 | 3623 |         NULL | 
| Martin v. Löwis | 1a21451 | 2008-06-11 05:26:20 +0000 | [diff] [blame] | 3624 | }; | 
 | 3625 |  | 
| Neal Norwitz | f6657e6 | 2006-12-28 04:47:50 +0000 | [diff] [blame] | 3626 | PyMODINIT_FUNC | 
| Martin v. Löwis | 1a21451 | 2008-06-11 05:26:20 +0000 | [diff] [blame] | 3627 | PyInit__elementtree(void) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3628 | { | 
| Eli Bendersky | 64d11e6 | 2012-06-15 07:42:50 +0300 | [diff] [blame] | 3629 |     PyObject *m, *temp; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3630 |  | 
| Amaury Forgeot d'Arc | ba4105c | 2008-07-02 21:41:01 +0000 | [diff] [blame] | 3631 |     /* Initialize object types */ | 
 | 3632 |     if (PyType_Ready(&TreeBuilder_Type) < 0) | 
| Alexander Belopolsky | f0f4514 | 2010-08-11 17:31:17 +0000 | [diff] [blame] | 3633 |         return NULL; | 
| Amaury Forgeot d'Arc | ba4105c | 2008-07-02 21:41:01 +0000 | [diff] [blame] | 3634 |     if (PyType_Ready(&Element_Type) < 0) | 
| Alexander Belopolsky | f0f4514 | 2010-08-11 17:31:17 +0000 | [diff] [blame] | 3635 |         return NULL; | 
| Amaury Forgeot d'Arc | ba4105c | 2008-07-02 21:41:01 +0000 | [diff] [blame] | 3636 |     if (PyType_Ready(&XMLParser_Type) < 0) | 
| Alexander Belopolsky | f0f4514 | 2010-08-11 17:31:17 +0000 | [diff] [blame] | 3637 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3638 |  | 
| Martin v. Löwis | 1a21451 | 2008-06-11 05:26:20 +0000 | [diff] [blame] | 3639 |     m = PyModule_Create(&_elementtreemodule); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 3640 |     if (!m) | 
| Martin v. Löwis | 1a21451 | 2008-06-11 05:26:20 +0000 | [diff] [blame] | 3641 |         return NULL; | 
 | 3642 |  | 
| Eli Bendersky | 828efde | 2012-04-05 05:40:58 +0300 | [diff] [blame] | 3643 |     if (!(temp = PyImport_ImportModule("copy"))) | 
 | 3644 |         return NULL; | 
 | 3645 |     elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy"); | 
 | 3646 |     Py_XDECREF(temp); | 
 | 3647 |  | 
 | 3648 |     if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath"))) | 
 | 3649 |         return NULL; | 
 | 3650 |  | 
| Eli Bendersky | 20d4174 | 2012-06-01 09:48:37 +0300 | [diff] [blame] | 3651 |     /* link against pyexpat */ | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3652 |     expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); | 
 | 3653 |     if (expat_capi) { | 
 | 3654 |         /* check that it's usable */ | 
 | 3655 |         if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 || | 
 | 3656 |             expat_capi->size < sizeof(struct PyExpat_CAPI) || | 
 | 3657 |             expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || | 
 | 3658 |             expat_capi->MINOR_VERSION != XML_MINOR_VERSION || | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3659 |             expat_capi->MICRO_VERSION != XML_MICRO_VERSION) { | 
| Eli Bendersky | ef391ac | 2012-07-21 20:28:46 +0300 | [diff] [blame] | 3660 |             PyErr_SetString(PyExc_ImportError, | 
 | 3661 |                             "pyexpat version is incompatible"); | 
 | 3662 |             return NULL; | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3663 |         } | 
| Eli Bendersky | ef391ac | 2012-07-21 20:28:46 +0300 | [diff] [blame] | 3664 |     } else { | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3665 |         return NULL; | 
| Eli Bendersky | ef391ac | 2012-07-21 20:28:46 +0300 | [diff] [blame] | 3666 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3667 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3668 |     elementtree_parseerror_obj = PyErr_NewException( | 
| Florent Xicluna | a72a98f | 2012-02-13 11:03:30 +0100 | [diff] [blame] | 3669 |         "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3670 |         ); | 
 | 3671 |     Py_INCREF(elementtree_parseerror_obj); | 
 | 3672 |     PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj); | 
 | 3673 |  | 
| Eli Bendersky | 092af1f | 2012-03-04 07:14:03 +0200 | [diff] [blame] | 3674 |     Py_INCREF((PyObject *)&Element_Type); | 
 | 3675 |     PyModule_AddObject(m, "Element", (PyObject *)&Element_Type); | 
 | 3676 |  | 
| Eli Bendersky | 58d548d | 2012-05-29 15:45:16 +0300 | [diff] [blame] | 3677 |     Py_INCREF((PyObject *)&TreeBuilder_Type); | 
 | 3678 |     PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type); | 
 | 3679 |  | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3680 |     Py_INCREF((PyObject *)&XMLParser_Type); | 
 | 3681 |     PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type); | 
| Eli Bendersky | 52467b1 | 2012-06-01 07:13:08 +0300 | [diff] [blame] | 3682 |  | 
| Florent Xicluna | f15351d | 2010-03-13 23:24:31 +0000 | [diff] [blame] | 3683 |     return m; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 3684 | } |