| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * ElementTree | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 3 |  * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 4 |  * | 
 | 5 |  * elementtree accelerator | 
 | 6 |  * | 
 | 7 |  * History: | 
 | 8 |  * 1999-06-20 fl  created (as part of sgmlop) | 
 | 9 |  * 2001-05-29 fl  effdom edition | 
 | 10 |  * 2001-06-05 fl  backported to unix; fixed bogus free in clear | 
 | 11 |  * 2001-07-10 fl  added findall helper | 
 | 12 |  * 2003-02-27 fl  elementtree edition (alpha) | 
 | 13 |  * 2004-06-03 fl  updates for elementtree 1.2 | 
 | 14 |  * 2005-01-05 fl  added universal name cache, Element/SubElement factories | 
 | 15 |  * 2005-01-06 fl  moved python helpers into C module; removed 1.5.2 support | 
 | 16 |  * 2005-01-07 fl  added 2.1 support; work around broken __copy__ in 2.3 | 
 | 17 |  * 2005-01-08 fl  added makeelement method; fixed path support | 
 | 18 |  * 2005-01-10 fl  optimized memory usage | 
 | 19 |  * 2005-01-11 fl  first public release (cElementTree 0.8) | 
 | 20 |  * 2005-01-12 fl  split element object into base and extras | 
 | 21 |  * 2005-01-13 fl  use tagged pointers for tail/text (cElementTree 0.9) | 
 | 22 |  * 2005-01-17 fl  added treebuilder close method | 
 | 23 |  * 2005-01-17 fl  fixed crash in getchildren | 
 | 24 |  * 2005-01-18 fl  removed observer api, added iterparse (cElementTree 0.9.3) | 
 | 25 |  * 2005-01-23 fl  revised iterparse api; added namespace event support (0.9.8) | 
 | 26 |  * 2005-01-26 fl  added VERSION module property (cElementTree 1.0) | 
 | 27 |  * 2005-01-28 fl  added remove method (1.0.1) | 
 | 28 |  * 2005-03-01 fl  added iselement function; fixed makeelement aliasing (1.0.2) | 
 | 29 |  * 2005-03-13 fl  export Comment and ProcessingInstruction/PI helpers | 
 | 30 |  * 2005-03-26 fl  added Comment and PI support to XMLParser | 
 | 31 |  * 2005-03-27 fl  event optimizations; complain about bogus events | 
 | 32 |  * 2005-08-08 fl  fixed read error handling in parse | 
 | 33 |  * 2005-08-11 fl  added runtime test for copy workaround (1.0.3) | 
 | 34 |  * 2005-12-13 fl  added expat_capi support (for xml.etree) (1.0.4) | 
 | 35 |  * 2005-12-16 fl  added support for non-standard encodings | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 36 |  * 2006-03-08 fl  fixed a couple of potential null-refs and leaks | 
 | 37 |  * 2006-03-12 fl  merge in 2.5 ssize_t changes | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 38 |  * | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 39 |  * Copyright (c) 1999-2006 by Secret Labs AB.  All rights reserved. | 
 | 40 |  * Copyright (c) 1999-2006 by Fredrik Lundh. | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 41 |  * | 
 | 42 |  * info@pythonware.com | 
 | 43 |  * http://www.pythonware.com | 
 | 44 |  */ | 
 | 45 |  | 
| Fredrik Lundh | 6d52b55 | 2005-12-16 22:06:43 +0000 | [diff] [blame] | 46 | /* Licensed to PSF under a Contributor Agreement. */ | 
 | 47 | /* See http://www.python.org/2.4/license for licensing details. */ | 
 | 48 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 49 | #include "Python.h" | 
 | 50 |  | 
| Thomas Wouters | 00ee7ba | 2006-08-21 19:07:27 +0000 | [diff] [blame] | 51 | #define VERSION "1.0.6" | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 52 |  | 
 | 53 | /* -------------------------------------------------------------------- */ | 
 | 54 | /* configuration */ | 
 | 55 |  | 
 | 56 | /* Leave defined to include the expat-based XMLParser type */ | 
 | 57 | #define USE_EXPAT | 
 | 58 |  | 
 | 59 | /* Define to to all expat calls via pyexpat's embedded expat library */ | 
 | 60 | /* #define USE_PYEXPAT_CAPI */ | 
 | 61 |  | 
 | 62 | /* An element can hold this many children without extra memory | 
 | 63 |    allocations. */ | 
 | 64 | #define STATIC_CHILDREN 4 | 
 | 65 |  | 
 | 66 | /* For best performance, chose a value so that 80-90% of all nodes | 
 | 67 |    have no more than the given number of children.  Set this to zero | 
 | 68 |    to minimize the size of the element structure itself (this only | 
 | 69 |    helps if you have lots of leaf nodes with attributes). */ | 
 | 70 |  | 
 | 71 | /* Also note that pymalloc always allocates blocks in multiples of | 
 | 72 |    eight bytes.  For the current version of cElementTree, this means | 
 | 73 |    that the number of children should be an even number, at least on | 
 | 74 |    32-bit platforms. */ | 
 | 75 |  | 
 | 76 | /* -------------------------------------------------------------------- */ | 
 | 77 |  | 
 | 78 | #if 0 | 
 | 79 | static int memory = 0; | 
 | 80 | #define ALLOC(size, comment)\ | 
 | 81 | do { memory += size; printf("%8d - %s\n", memory, comment); } while (0) | 
 | 82 | #define RELEASE(size, comment)\ | 
 | 83 | do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) | 
 | 84 | #else | 
 | 85 | #define ALLOC(size, comment) | 
 | 86 | #define RELEASE(size, comment) | 
 | 87 | #endif | 
 | 88 |  | 
 | 89 | /* compiler tweaks */ | 
 | 90 | #if defined(_MSC_VER) | 
 | 91 | #define LOCAL(type) static __inline type __fastcall | 
 | 92 | #else | 
 | 93 | #define LOCAL(type) static type | 
 | 94 | #endif | 
 | 95 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 96 | /* macros used to store 'join' flags in string object pointers.  note | 
 | 97 |    that all use of text and tail as object pointers must be wrapped in | 
 | 98 |    JOIN_OBJ.  see comments in the ElementObject definition for more | 
 | 99 |    info. */ | 
 | 100 | #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1) | 
 | 101 | #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag))) | 
 | 102 | #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1)) | 
 | 103 |  | 
 | 104 | /* glue functions (see the init function for details) */ | 
 | 105 | static PyObject* elementtree_copyelement_obj; | 
 | 106 | static PyObject* elementtree_deepcopy_obj; | 
 | 107 | static PyObject* elementtree_getiterator_obj; | 
 | 108 | static PyObject* elementpath_obj; | 
 | 109 |  | 
 | 110 | /* helpers */ | 
 | 111 |  | 
 | 112 | LOCAL(PyObject*) | 
 | 113 | deepcopy(PyObject* object, PyObject* memo) | 
 | 114 | { | 
 | 115 |     /* do a deep copy of the given object */ | 
 | 116 |  | 
 | 117 |     PyObject* args; | 
 | 118 |     PyObject* result; | 
 | 119 |  | 
 | 120 |     if (!elementtree_deepcopy_obj) { | 
 | 121 |         PyErr_SetString( | 
 | 122 |             PyExc_RuntimeError, | 
 | 123 |             "deepcopy helper not found" | 
 | 124 |             ); | 
 | 125 |         return NULL; | 
 | 126 |     } | 
 | 127 |  | 
 | 128 |     args = PyTuple_New(2); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 129 |     if (!args) | 
 | 130 |         return NULL; | 
 | 131 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 132 |     Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object); | 
 | 133 |     Py_INCREF(memo);   PyTuple_SET_ITEM(args, 1, (PyObject*) memo); | 
 | 134 |  | 
 | 135 |     result = PyObject_CallObject(elementtree_deepcopy_obj, args); | 
 | 136 |  | 
 | 137 |     Py_DECREF(args); | 
 | 138 |  | 
 | 139 |     return result; | 
 | 140 | } | 
 | 141 |  | 
 | 142 | LOCAL(PyObject*) | 
 | 143 | list_join(PyObject* list) | 
 | 144 | { | 
 | 145 |     /* join list elements (destroying the list in the process) */ | 
 | 146 |  | 
 | 147 |     PyObject* joiner; | 
 | 148 |     PyObject* function; | 
 | 149 |     PyObject* args; | 
 | 150 |     PyObject* result; | 
 | 151 |  | 
 | 152 |     switch (PyList_GET_SIZE(list)) { | 
 | 153 |     case 0: | 
 | 154 |         Py_DECREF(list); | 
 | 155 |         return PyString_FromString(""); | 
 | 156 |     case 1: | 
 | 157 |         result = PyList_GET_ITEM(list, 0); | 
 | 158 |         Py_INCREF(result); | 
 | 159 |         Py_DECREF(list); | 
 | 160 |         return result; | 
 | 161 |     } | 
 | 162 |  | 
 | 163 |     /* two or more elements: slice out a suitable separator from the | 
 | 164 |        first member, and use that to join the entire list */ | 
 | 165 |  | 
 | 166 |     joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0); | 
 | 167 |     if (!joiner) | 
 | 168 |         return NULL; | 
 | 169 |  | 
 | 170 |     function = PyObject_GetAttrString(joiner, "join"); | 
 | 171 |     if (!function) { | 
 | 172 |         Py_DECREF(joiner); | 
 | 173 |         return NULL; | 
 | 174 |     } | 
 | 175 |  | 
 | 176 |     args = PyTuple_New(1); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 177 |     if (!args) | 
 | 178 |         return NULL; | 
 | 179 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 180 |     PyTuple_SET_ITEM(args, 0, list); | 
 | 181 |  | 
 | 182 |     result = PyObject_CallObject(function, args); | 
 | 183 |  | 
 | 184 |     Py_DECREF(args); /* also removes list */ | 
 | 185 |     Py_DECREF(function); | 
 | 186 |     Py_DECREF(joiner); | 
 | 187 |  | 
 | 188 |     return result; | 
 | 189 | } | 
 | 190 |  | 
 | 191 | #if (PY_VERSION_HEX < 0x02020000) | 
 | 192 | LOCAL(int) | 
 | 193 | PyDict_Update(PyObject* dict, PyObject* other) | 
 | 194 | { | 
 | 195 |     /* PyDict_Update emulation for 2.1 and earlier */ | 
 | 196 |  | 
 | 197 |     PyObject* res; | 
 | 198 |      | 
 | 199 |     res = PyObject_CallMethod(dict, "update", "O", other); | 
 | 200 |     if (!res) | 
 | 201 |         return -1; | 
 | 202 |  | 
 | 203 |     Py_DECREF(res); | 
 | 204 |     return 0; | 
 | 205 | } | 
 | 206 | #endif | 
 | 207 |  | 
 | 208 | /* -------------------------------------------------------------------- */ | 
 | 209 | /* the element type */ | 
 | 210 |  | 
 | 211 | typedef struct { | 
 | 212 |  | 
 | 213 |     /* attributes (a dictionary object), or None if no attributes */ | 
 | 214 |     PyObject* attrib; | 
 | 215 |  | 
 | 216 |     /* child elements */ | 
 | 217 |     int length; /* actual number of items */ | 
 | 218 |     int allocated; /* allocated items */ | 
 | 219 |  | 
 | 220 |     /* this either points to _children or to a malloced buffer */ | 
 | 221 |     PyObject* *children; | 
 | 222 |  | 
 | 223 |     PyObject* _children[STATIC_CHILDREN]; | 
 | 224 |      | 
 | 225 | } ElementObjectExtra; | 
 | 226 |  | 
 | 227 | typedef struct { | 
 | 228 |     PyObject_HEAD | 
 | 229 |  | 
 | 230 |     /* element tag (a string). */ | 
 | 231 |     PyObject* tag; | 
 | 232 |  | 
 | 233 |     /* text before first child.  note that this is a tagged pointer; | 
 | 234 |        use JOIN_OBJ to get the object pointer.  the join flag is used | 
 | 235 |        to distinguish lists created by the tree builder from lists | 
 | 236 |        assigned to the attribute by application code; the former | 
 | 237 |        should be joined before being returned to the user, the latter | 
 | 238 |        should be left intact. */ | 
 | 239 |     PyObject* text; | 
 | 240 |  | 
 | 241 |     /* text after this element, in parent.  note that this is a tagged | 
 | 242 |        pointer; use JOIN_OBJ to get the object pointer. */ | 
 | 243 |     PyObject* tail; | 
 | 244 |  | 
 | 245 |     ElementObjectExtra* extra; | 
 | 246 |  | 
 | 247 | } ElementObject; | 
 | 248 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 249 | static PyTypeObject Element_Type; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 250 |  | 
 | 251 | #define Element_CheckExact(op) ((op)->ob_type == &Element_Type) | 
 | 252 |  | 
 | 253 | /* -------------------------------------------------------------------- */ | 
 | 254 | /* element constructor and destructor */ | 
 | 255 |  | 
 | 256 | LOCAL(int) | 
 | 257 | element_new_extra(ElementObject* self, PyObject* attrib) | 
 | 258 | { | 
 | 259 |     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); | 
 | 260 |     if (!self->extra) | 
 | 261 |         return -1; | 
 | 262 |  | 
 | 263 |     if (!attrib) | 
 | 264 |         attrib = Py_None; | 
 | 265 |  | 
 | 266 |     Py_INCREF(attrib); | 
 | 267 |     self->extra->attrib = attrib; | 
 | 268 |  | 
 | 269 |     self->extra->length = 0; | 
 | 270 |     self->extra->allocated = STATIC_CHILDREN; | 
 | 271 |     self->extra->children = self->extra->_children; | 
 | 272 |  | 
 | 273 |     return 0; | 
 | 274 | } | 
 | 275 |  | 
 | 276 | LOCAL(void) | 
 | 277 | element_dealloc_extra(ElementObject* self) | 
 | 278 | { | 
 | 279 |     int i; | 
 | 280 |  | 
 | 281 |     Py_DECREF(self->extra->attrib); | 
 | 282 |  | 
 | 283 |     for (i = 0; i < self->extra->length; i++) | 
 | 284 |         Py_DECREF(self->extra->children[i]); | 
 | 285 |  | 
 | 286 |     if (self->extra->children != self->extra->_children) | 
 | 287 |         PyObject_Free(self->extra->children); | 
 | 288 |  | 
 | 289 |     PyObject_Free(self->extra); | 
 | 290 | } | 
 | 291 |  | 
 | 292 | LOCAL(PyObject*) | 
 | 293 | element_new(PyObject* tag, PyObject* attrib) | 
 | 294 | { | 
 | 295 |     ElementObject* self; | 
 | 296 |  | 
 | 297 |     self = PyObject_New(ElementObject, &Element_Type); | 
 | 298 |     if (self == NULL) | 
 | 299 |         return NULL; | 
 | 300 |  | 
 | 301 |     /* use None for empty dictionaries */ | 
 | 302 |     if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib)) | 
 | 303 |         attrib = Py_None; | 
 | 304 |  | 
 | 305 |     self->extra = NULL; | 
 | 306 |  | 
 | 307 |     if (attrib != Py_None) { | 
 | 308 |  | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 309 |         if (element_new_extra(self, attrib) < 0) { | 
 | 310 |             PyObject_Del(self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 311 |             return NULL; | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 312 | 	} | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 313 |  | 
 | 314 |         self->extra->length = 0; | 
 | 315 |         self->extra->allocated = STATIC_CHILDREN; | 
 | 316 |         self->extra->children = self->extra->_children; | 
 | 317 |  | 
 | 318 |     } | 
 | 319 |  | 
 | 320 |     Py_INCREF(tag); | 
 | 321 |     self->tag = tag; | 
 | 322 |  | 
 | 323 |     Py_INCREF(Py_None); | 
 | 324 |     self->text = Py_None; | 
 | 325 |  | 
 | 326 |     Py_INCREF(Py_None); | 
 | 327 |     self->tail = Py_None; | 
 | 328 |  | 
 | 329 |     ALLOC(sizeof(ElementObject), "create element"); | 
 | 330 |  | 
 | 331 |     return (PyObject*) self; | 
 | 332 | } | 
 | 333 |  | 
 | 334 | LOCAL(int) | 
 | 335 | element_resize(ElementObject* self, int extra) | 
 | 336 | { | 
 | 337 |     int size; | 
 | 338 |     PyObject* *children; | 
 | 339 |  | 
 | 340 |     /* make sure self->children can hold the given number of extra | 
 | 341 |        elements.  set an exception and return -1 if allocation failed */ | 
 | 342 |  | 
 | 343 |     if (!self->extra) | 
 | 344 |         element_new_extra(self, NULL); | 
 | 345 |  | 
 | 346 |     size = self->extra->length + extra; | 
 | 347 |  | 
 | 348 |     if (size > self->extra->allocated) { | 
 | 349 |         /* use Python 2.4's list growth strategy */ | 
 | 350 |         size = (size >> 3) + (size < 9 ? 3 : 6) + size; | 
 | 351 |         if (self->extra->children != self->extra->_children) { | 
 | 352 |             children = PyObject_Realloc(self->extra->children, | 
 | 353 |                                         size * sizeof(PyObject*)); | 
 | 354 |             if (!children) | 
 | 355 |                 goto nomemory; | 
 | 356 |         } else { | 
 | 357 |             children = PyObject_Malloc(size * sizeof(PyObject*)); | 
 | 358 |             if (!children) | 
 | 359 |                 goto nomemory; | 
 | 360 |             /* copy existing children from static area to malloc buffer */ | 
 | 361 |             memcpy(children, self->extra->children, | 
 | 362 |                    self->extra->length * sizeof(PyObject*)); | 
 | 363 |         } | 
 | 364 |         self->extra->children = children; | 
 | 365 |         self->extra->allocated = size; | 
 | 366 |     } | 
 | 367 |  | 
 | 368 |     return 0; | 
 | 369 |  | 
 | 370 |   nomemory: | 
 | 371 |     PyErr_NoMemory(); | 
 | 372 |     return -1; | 
 | 373 | } | 
 | 374 |  | 
 | 375 | LOCAL(int) | 
 | 376 | element_add_subelement(ElementObject* self, PyObject* element) | 
 | 377 | { | 
 | 378 |     /* add a child element to a parent */ | 
 | 379 |  | 
 | 380 |     if (element_resize(self, 1) < 0) | 
 | 381 |         return -1; | 
 | 382 |  | 
 | 383 |     Py_INCREF(element); | 
 | 384 |     self->extra->children[self->extra->length] = element; | 
 | 385 |  | 
 | 386 |     self->extra->length++; | 
 | 387 |  | 
 | 388 |     return 0; | 
 | 389 | } | 
 | 390 |  | 
 | 391 | LOCAL(PyObject*) | 
 | 392 | element_get_attrib(ElementObject* self) | 
 | 393 | { | 
 | 394 |     /* return borrowed reference to attrib dictionary */ | 
 | 395 |     /* note: this function assumes that the extra section exists */ | 
 | 396 |  | 
 | 397 |     PyObject* res = self->extra->attrib; | 
 | 398 |  | 
 | 399 |     if (res == Py_None) { | 
 | 400 |         /* create missing dictionary */ | 
 | 401 |         res = PyDict_New(); | 
 | 402 |         if (!res) | 
 | 403 |             return NULL; | 
 | 404 |         self->extra->attrib = res; | 
 | 405 |     } | 
 | 406 |  | 
 | 407 |     return res; | 
 | 408 | } | 
 | 409 |  | 
 | 410 | LOCAL(PyObject*) | 
 | 411 | element_get_text(ElementObject* self) | 
 | 412 | { | 
 | 413 |     /* return borrowed reference to text attribute */ | 
 | 414 |  | 
 | 415 |     PyObject* res = self->text; | 
 | 416 |  | 
 | 417 |     if (JOIN_GET(res)) { | 
 | 418 |         res = JOIN_OBJ(res); | 
 | 419 |         if (PyList_CheckExact(res)) { | 
 | 420 |             res = list_join(res); | 
 | 421 |             if (!res) | 
 | 422 |                 return NULL; | 
 | 423 |             self->text = res; | 
 | 424 |         } | 
 | 425 |     } | 
 | 426 |  | 
 | 427 |     return res; | 
 | 428 | } | 
 | 429 |  | 
 | 430 | LOCAL(PyObject*) | 
 | 431 | element_get_tail(ElementObject* self) | 
 | 432 | { | 
 | 433 |     /* return borrowed reference to text attribute */ | 
 | 434 |  | 
 | 435 |     PyObject* res = self->tail; | 
 | 436 |  | 
 | 437 |     if (JOIN_GET(res)) { | 
 | 438 |         res = JOIN_OBJ(res); | 
 | 439 |         if (PyList_CheckExact(res)) { | 
 | 440 |             res = list_join(res); | 
 | 441 |             if (!res) | 
 | 442 |                 return NULL; | 
 | 443 |             self->tail = res; | 
 | 444 |         } | 
 | 445 |     } | 
 | 446 |  | 
 | 447 |     return res; | 
 | 448 | } | 
 | 449 |  | 
 | 450 | static PyObject* | 
 | 451 | element(PyObject* self, PyObject* args, PyObject* kw) | 
 | 452 | { | 
 | 453 |     PyObject* elem; | 
 | 454 |  | 
 | 455 |     PyObject* tag; | 
 | 456 |     PyObject* attrib = NULL; | 
 | 457 |     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, | 
 | 458 |                           &PyDict_Type, &attrib)) | 
 | 459 |         return NULL; | 
 | 460 |  | 
 | 461 |     if (attrib || kw) { | 
 | 462 |         attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); | 
 | 463 |         if (!attrib) | 
 | 464 |             return NULL; | 
 | 465 |         if (kw) | 
 | 466 |             PyDict_Update(attrib, kw); | 
 | 467 |     } else { | 
 | 468 |         Py_INCREF(Py_None); | 
 | 469 |         attrib = Py_None; | 
 | 470 |     } | 
 | 471 |  | 
 | 472 |     elem = element_new(tag, attrib); | 
 | 473 |  | 
 | 474 |     Py_DECREF(attrib); | 
 | 475 |  | 
 | 476 |     return elem; | 
 | 477 | } | 
 | 478 |  | 
 | 479 | static PyObject* | 
 | 480 | subelement(PyObject* self, PyObject* args, PyObject* kw) | 
 | 481 | { | 
 | 482 |     PyObject* elem; | 
 | 483 |  | 
 | 484 |     ElementObject* parent; | 
 | 485 |     PyObject* tag; | 
 | 486 |     PyObject* attrib = NULL; | 
 | 487 |     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", | 
 | 488 |                           &Element_Type, &parent, &tag, | 
 | 489 |                           &PyDict_Type, &attrib)) | 
 | 490 |         return NULL; | 
 | 491 |  | 
 | 492 |     if (attrib || kw) { | 
 | 493 |         attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); | 
 | 494 |         if (!attrib) | 
 | 495 |             return NULL; | 
 | 496 |         if (kw) | 
 | 497 |             PyDict_Update(attrib, kw); | 
 | 498 |     } else { | 
 | 499 |         Py_INCREF(Py_None); | 
 | 500 |         attrib = Py_None; | 
 | 501 |     } | 
 | 502 |  | 
 | 503 |     elem = element_new(tag, attrib); | 
 | 504 |  | 
 | 505 |     Py_DECREF(attrib); | 
 | 506 |  | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 507 |     if (element_add_subelement(parent, elem) < 0) { | 
 | 508 |         Py_DECREF(elem); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 509 |         return NULL; | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 510 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 511 |  | 
 | 512 |     return elem; | 
 | 513 | } | 
 | 514 |  | 
 | 515 | static void | 
 | 516 | element_dealloc(ElementObject* self) | 
 | 517 | { | 
 | 518 |     if (self->extra) | 
 | 519 |         element_dealloc_extra(self); | 
 | 520 |  | 
 | 521 |     /* discard attributes */ | 
 | 522 |     Py_DECREF(self->tag); | 
 | 523 |     Py_DECREF(JOIN_OBJ(self->text)); | 
 | 524 |     Py_DECREF(JOIN_OBJ(self->tail)); | 
 | 525 |  | 
 | 526 |     RELEASE(sizeof(ElementObject), "destroy element"); | 
 | 527 |  | 
 | 528 |     PyObject_Del(self); | 
 | 529 | } | 
 | 530 |  | 
 | 531 | /* -------------------------------------------------------------------- */ | 
 | 532 | /* methods (in alphabetical order) */ | 
 | 533 |  | 
 | 534 | static PyObject* | 
 | 535 | element_append(ElementObject* self, PyObject* args) | 
 | 536 | { | 
 | 537 |     PyObject* element; | 
 | 538 |     if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element)) | 
 | 539 |         return NULL; | 
 | 540 |  | 
 | 541 |     if (element_add_subelement(self, element) < 0) | 
 | 542 |         return NULL; | 
 | 543 |  | 
 | 544 |     Py_RETURN_NONE; | 
 | 545 | } | 
 | 546 |  | 
 | 547 | static PyObject* | 
 | 548 | element_clear(ElementObject* self, PyObject* args) | 
 | 549 | { | 
 | 550 |     if (!PyArg_ParseTuple(args, ":clear")) | 
 | 551 |         return NULL; | 
 | 552 |  | 
 | 553 |     if (self->extra) { | 
 | 554 |         element_dealloc_extra(self); | 
 | 555 |         self->extra = NULL; | 
 | 556 |     } | 
 | 557 |  | 
 | 558 |     Py_INCREF(Py_None); | 
 | 559 |     Py_DECREF(JOIN_OBJ(self->text)); | 
 | 560 |     self->text = Py_None; | 
 | 561 |  | 
 | 562 |     Py_INCREF(Py_None); | 
 | 563 |     Py_DECREF(JOIN_OBJ(self->tail)); | 
 | 564 |     self->tail = Py_None; | 
 | 565 |  | 
 | 566 |     Py_RETURN_NONE; | 
 | 567 | } | 
 | 568 |  | 
 | 569 | static PyObject* | 
 | 570 | element_copy(ElementObject* self, PyObject* args) | 
 | 571 | { | 
 | 572 |     int i; | 
 | 573 |     ElementObject* element; | 
 | 574 |  | 
 | 575 |     if (!PyArg_ParseTuple(args, ":__copy__")) | 
 | 576 |         return NULL; | 
 | 577 |  | 
 | 578 |     element = (ElementObject*) element_new( | 
 | 579 |         self->tag, (self->extra) ? self->extra->attrib : Py_None | 
 | 580 |         ); | 
 | 581 |     if (!element) | 
 | 582 |         return NULL; | 
 | 583 |  | 
 | 584 |     Py_DECREF(JOIN_OBJ(element->text)); | 
 | 585 |     element->text = self->text; | 
 | 586 |     Py_INCREF(JOIN_OBJ(element->text)); | 
 | 587 |  | 
 | 588 |     Py_DECREF(JOIN_OBJ(element->tail)); | 
 | 589 |     element->tail = self->tail; | 
 | 590 |     Py_INCREF(JOIN_OBJ(element->tail)); | 
 | 591 |  | 
 | 592 |     if (self->extra) { | 
 | 593 |          | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 594 |         if (element_resize(element, self->extra->length) < 0) { | 
 | 595 |             Py_DECREF(element); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 596 |             return NULL; | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 597 |         } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 598 |  | 
 | 599 |         for (i = 0; i < self->extra->length; i++) { | 
 | 600 |             Py_INCREF(self->extra->children[i]); | 
 | 601 |             element->extra->children[i] = self->extra->children[i]; | 
 | 602 |         } | 
 | 603 |  | 
 | 604 |         element->extra->length = self->extra->length; | 
 | 605 |          | 
 | 606 |     } | 
 | 607 |  | 
 | 608 |     return (PyObject*) element; | 
 | 609 | } | 
 | 610 |  | 
 | 611 | static PyObject* | 
 | 612 | element_deepcopy(ElementObject* self, PyObject* args) | 
 | 613 | { | 
 | 614 |     int i; | 
 | 615 |     ElementObject* element; | 
 | 616 |     PyObject* tag; | 
 | 617 |     PyObject* attrib; | 
 | 618 |     PyObject* text; | 
 | 619 |     PyObject* tail; | 
 | 620 |     PyObject* id; | 
 | 621 |  | 
 | 622 |     PyObject* memo; | 
 | 623 |     if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) | 
 | 624 |         return NULL; | 
 | 625 |  | 
 | 626 |     tag = deepcopy(self->tag, memo); | 
 | 627 |     if (!tag) | 
 | 628 |         return NULL; | 
 | 629 |  | 
 | 630 |     if (self->extra) { | 
 | 631 |         attrib = deepcopy(self->extra->attrib, memo); | 
 | 632 |         if (!attrib) { | 
 | 633 |             Py_DECREF(tag); | 
 | 634 |             return NULL; | 
 | 635 |         } | 
 | 636 |     } else { | 
 | 637 |         Py_INCREF(Py_None); | 
 | 638 |         attrib = Py_None; | 
 | 639 |     } | 
 | 640 |  | 
 | 641 |     element = (ElementObject*) element_new(tag, attrib); | 
 | 642 |  | 
 | 643 |     Py_DECREF(tag); | 
 | 644 |     Py_DECREF(attrib); | 
 | 645 |  | 
 | 646 |     if (!element) | 
 | 647 |         return NULL; | 
 | 648 |      | 
 | 649 |     text = deepcopy(JOIN_OBJ(self->text), memo); | 
 | 650 |     if (!text) | 
 | 651 |         goto error; | 
 | 652 |     Py_DECREF(element->text); | 
 | 653 |     element->text = JOIN_SET(text, JOIN_GET(self->text)); | 
 | 654 |  | 
 | 655 |     tail = deepcopy(JOIN_OBJ(self->tail), memo); | 
 | 656 |     if (!tail) | 
 | 657 |         goto error; | 
 | 658 |     Py_DECREF(element->tail); | 
 | 659 |     element->tail = JOIN_SET(tail, JOIN_GET(self->tail)); | 
 | 660 |  | 
 | 661 |     if (self->extra) { | 
 | 662 |          | 
 | 663 |         if (element_resize(element, self->extra->length) < 0) | 
 | 664 |             goto error; | 
 | 665 |  | 
 | 666 |         for (i = 0; i < self->extra->length; i++) { | 
 | 667 |             PyObject* child = deepcopy(self->extra->children[i], memo); | 
 | 668 |             if (!child) { | 
 | 669 |                 element->extra->length = i; | 
 | 670 |                 goto error; | 
 | 671 |             } | 
 | 672 |             element->extra->children[i] = child; | 
 | 673 |         } | 
 | 674 |  | 
 | 675 |         element->extra->length = self->extra->length; | 
 | 676 |          | 
 | 677 |     } | 
 | 678 |  | 
 | 679 |     /* add object to memo dictionary (so deepcopy won't visit it again) */ | 
 | 680 |     id = PyInt_FromLong((Py_uintptr_t) self); | 
 | 681 |  | 
 | 682 |     i = PyDict_SetItem(memo, id, (PyObject*) element); | 
 | 683 |  | 
 | 684 |     Py_DECREF(id); | 
 | 685 |  | 
 | 686 |     if (i < 0) | 
 | 687 |         goto error; | 
 | 688 |  | 
 | 689 |     return (PyObject*) element; | 
 | 690 |  | 
 | 691 |   error: | 
 | 692 |     Py_DECREF(element); | 
 | 693 |     return NULL; | 
 | 694 | } | 
 | 695 |  | 
 | 696 | LOCAL(int) | 
 | 697 | checkpath(PyObject* tag) | 
 | 698 | { | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 699 |     Py_ssize_t i; | 
 | 700 |     int check = 1; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 701 |  | 
 | 702 |     /* check if a tag contains an xpath character */ | 
 | 703 |  | 
 | 704 | #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@') | 
 | 705 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 706 |     if (PyUnicode_Check(tag)) { | 
 | 707 |         Py_UNICODE *p = PyUnicode_AS_UNICODE(tag); | 
 | 708 |         for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) { | 
 | 709 |             if (p[i] == '{') | 
 | 710 |                 check = 0; | 
 | 711 |             else if (p[i] == '}') | 
 | 712 |                 check = 1; | 
 | 713 |             else if (check && PATHCHAR(p[i])) | 
 | 714 |                 return 1; | 
 | 715 |         } | 
 | 716 |         return 0; | 
 | 717 |     } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 718 |     if (PyString_Check(tag)) { | 
 | 719 |         char *p = PyString_AS_STRING(tag); | 
 | 720 |         for (i = 0; i < PyString_GET_SIZE(tag); i++) { | 
 | 721 |             if (p[i] == '{') | 
 | 722 |                 check = 0; | 
 | 723 |             else if (p[i] == '}') | 
 | 724 |                 check = 1; | 
 | 725 |             else if (check && PATHCHAR(p[i])) | 
 | 726 |                 return 1; | 
 | 727 |         } | 
 | 728 |         return 0; | 
 | 729 |     } | 
 | 730 |  | 
 | 731 |     return 1; /* unknown type; might be path expression */ | 
 | 732 | } | 
 | 733 |  | 
 | 734 | static PyObject* | 
 | 735 | element_find(ElementObject* self, PyObject* args) | 
 | 736 | { | 
 | 737 |     int i; | 
 | 738 |  | 
 | 739 |     PyObject* tag; | 
 | 740 |     if (!PyArg_ParseTuple(args, "O:find", &tag)) | 
 | 741 |         return NULL; | 
 | 742 |  | 
 | 743 |     if (checkpath(tag)) | 
 | 744 |         return PyObject_CallMethod( | 
 | 745 |             elementpath_obj, "find", "OO", self, tag | 
 | 746 |             ); | 
 | 747 |  | 
 | 748 |     if (!self->extra) | 
 | 749 |         Py_RETURN_NONE; | 
 | 750 |          | 
 | 751 |     for (i = 0; i < self->extra->length; i++) { | 
 | 752 |         PyObject* item = self->extra->children[i]; | 
 | 753 |         if (Element_CheckExact(item) && | 
 | 754 |             PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { | 
 | 755 |             Py_INCREF(item); | 
 | 756 |             return item; | 
 | 757 |         } | 
 | 758 |     } | 
 | 759 |  | 
 | 760 |     Py_RETURN_NONE; | 
 | 761 | } | 
 | 762 |  | 
 | 763 | static PyObject* | 
 | 764 | element_findtext(ElementObject* self, PyObject* args) | 
 | 765 | { | 
 | 766 |     int i; | 
 | 767 |  | 
 | 768 |     PyObject* tag; | 
 | 769 |     PyObject* default_value = Py_None; | 
 | 770 |     if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value)) | 
 | 771 |         return NULL; | 
 | 772 |  | 
 | 773 |     if (checkpath(tag)) | 
 | 774 |         return PyObject_CallMethod( | 
 | 775 |             elementpath_obj, "findtext", "OOO", self, tag, default_value | 
 | 776 |             ); | 
 | 777 |  | 
 | 778 |     if (!self->extra) { | 
 | 779 |         Py_INCREF(default_value); | 
 | 780 |         return default_value; | 
 | 781 |     } | 
 | 782 |  | 
 | 783 |     for (i = 0; i < self->extra->length; i++) { | 
 | 784 |         ElementObject* item = (ElementObject*) self->extra->children[i]; | 
 | 785 |         if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) { | 
 | 786 |             PyObject* text = element_get_text(item); | 
 | 787 |             if (text == Py_None) | 
 | 788 |                 return PyString_FromString(""); | 
| Thomas Wouters | 00ee7ba | 2006-08-21 19:07:27 +0000 | [diff] [blame] | 789 |             Py_XINCREF(text); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 790 |             return text; | 
 | 791 |         } | 
 | 792 |     } | 
 | 793 |  | 
 | 794 |     Py_INCREF(default_value); | 
 | 795 |     return default_value; | 
 | 796 | } | 
 | 797 |  | 
 | 798 | static PyObject* | 
 | 799 | element_findall(ElementObject* self, PyObject* args) | 
 | 800 | { | 
 | 801 |     int i; | 
 | 802 |     PyObject* out; | 
 | 803 |  | 
 | 804 |     PyObject* tag; | 
 | 805 |     if (!PyArg_ParseTuple(args, "O:findall", &tag)) | 
 | 806 |         return NULL; | 
 | 807 |  | 
 | 808 |     if (checkpath(tag)) | 
 | 809 |         return PyObject_CallMethod( | 
 | 810 |             elementpath_obj, "findall", "OO", self, tag | 
 | 811 |             ); | 
 | 812 |  | 
 | 813 |     out = PyList_New(0); | 
 | 814 |     if (!out) | 
 | 815 |         return NULL; | 
 | 816 |  | 
 | 817 |     if (!self->extra) | 
 | 818 |         return out; | 
 | 819 |  | 
 | 820 |     for (i = 0; i < self->extra->length; i++) { | 
 | 821 |         PyObject* item = self->extra->children[i]; | 
 | 822 |         if (Element_CheckExact(item) && | 
 | 823 |             PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) { | 
 | 824 |             if (PyList_Append(out, item) < 0) { | 
 | 825 |                 Py_DECREF(out); | 
 | 826 |                 return NULL; | 
 | 827 |             } | 
 | 828 |         } | 
 | 829 |     } | 
 | 830 |  | 
 | 831 |     return out; | 
 | 832 | } | 
 | 833 |  | 
 | 834 | static PyObject* | 
 | 835 | element_get(ElementObject* self, PyObject* args) | 
 | 836 | { | 
 | 837 |     PyObject* value; | 
 | 838 |  | 
 | 839 |     PyObject* key; | 
 | 840 |     PyObject* default_value = Py_None; | 
 | 841 |     if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value)) | 
 | 842 |         return NULL; | 
 | 843 |  | 
 | 844 |     if (!self->extra || self->extra->attrib == Py_None) | 
 | 845 |         value = default_value; | 
 | 846 |     else { | 
 | 847 |         value = PyDict_GetItem(self->extra->attrib, key); | 
 | 848 |         if (!value) | 
 | 849 |             value = default_value; | 
 | 850 |     } | 
 | 851 |  | 
 | 852 |     Py_INCREF(value); | 
 | 853 |     return value; | 
 | 854 | } | 
 | 855 |  | 
 | 856 | static PyObject* | 
 | 857 | element_getchildren(ElementObject* self, PyObject* args) | 
 | 858 | { | 
 | 859 |     int i; | 
 | 860 |     PyObject* list; | 
 | 861 |  | 
 | 862 |     if (!PyArg_ParseTuple(args, ":getchildren")) | 
 | 863 |         return NULL; | 
 | 864 |  | 
 | 865 |     if (!self->extra) | 
 | 866 |         return PyList_New(0); | 
 | 867 |  | 
 | 868 |     list = PyList_New(self->extra->length); | 
 | 869 |     if (!list) | 
 | 870 |         return NULL; | 
 | 871 |  | 
 | 872 |     for (i = 0; i < self->extra->length; i++) { | 
 | 873 |         PyObject* item = self->extra->children[i]; | 
 | 874 |         Py_INCREF(item); | 
 | 875 |         PyList_SET_ITEM(list, i, item); | 
 | 876 |     } | 
 | 877 |  | 
 | 878 |     return list; | 
 | 879 | } | 
 | 880 |  | 
 | 881 | static PyObject* | 
 | 882 | element_getiterator(ElementObject* self, PyObject* args) | 
 | 883 | { | 
 | 884 |     PyObject* result; | 
 | 885 |      | 
 | 886 |     PyObject* tag = Py_None; | 
 | 887 |     if (!PyArg_ParseTuple(args, "|O:getiterator", &tag)) | 
 | 888 |         return NULL; | 
 | 889 |  | 
 | 890 |     if (!elementtree_getiterator_obj) { | 
 | 891 |         PyErr_SetString( | 
 | 892 |             PyExc_RuntimeError, | 
 | 893 |             "getiterator helper not found" | 
 | 894 |             ); | 
 | 895 |         return NULL; | 
 | 896 |     } | 
 | 897 |  | 
 | 898 |     args = PyTuple_New(2); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 899 |     if (!args) | 
 | 900 |         return NULL; | 
| Neal Norwitz | 02876df | 2006-02-07 06:58:52 +0000 | [diff] [blame] | 901 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 902 |     Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); | 
 | 903 |     Py_INCREF(tag);  PyTuple_SET_ITEM(args, 1, (PyObject*) tag); | 
 | 904 |  | 
 | 905 |     result = PyObject_CallObject(elementtree_getiterator_obj, args); | 
 | 906 |  | 
 | 907 |     Py_DECREF(args); | 
 | 908 |  | 
 | 909 |     return result; | 
 | 910 | } | 
 | 911 |  | 
 | 912 | static PyObject* | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 913 | element_getitem(PyObject* self_, Py_ssize_t index) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 914 | { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 915 |     ElementObject* self = (ElementObject*) self_; | 
 | 916 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 917 |     if (!self->extra || index < 0 || index >= self->extra->length) { | 
 | 918 |         PyErr_SetString( | 
 | 919 |             PyExc_IndexError, | 
 | 920 |             "child index out of range" | 
 | 921 |             ); | 
 | 922 |         return NULL; | 
 | 923 |     } | 
 | 924 |  | 
 | 925 |     Py_INCREF(self->extra->children[index]); | 
 | 926 |     return self->extra->children[index]; | 
 | 927 | } | 
 | 928 |  | 
 | 929 | static PyObject* | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 930 | element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 931 | { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 932 |     ElementObject* self = (ElementObject*) self_; | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 933 |     Py_ssize_t i; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 934 |     PyObject* list; | 
 | 935 |  | 
 | 936 |     if (!self->extra) | 
 | 937 |         return PyList_New(0); | 
 | 938 |  | 
 | 939 |     /* standard clamping */ | 
 | 940 |     if (start < 0) | 
 | 941 |         start = 0; | 
 | 942 |     if (end < 0) | 
 | 943 |         end = 0; | 
 | 944 |     if (end > self->extra->length) | 
 | 945 |         end = self->extra->length; | 
 | 946 |     if (start > end) | 
 | 947 |         start = end; | 
 | 948 |  | 
 | 949 |     list = PyList_New(end - start); | 
 | 950 |     if (!list) | 
 | 951 |         return NULL; | 
 | 952 |  | 
 | 953 |     for (i = start; i < end; i++) { | 
 | 954 |         PyObject* item = self->extra->children[i]; | 
 | 955 |         Py_INCREF(item); | 
 | 956 |         PyList_SET_ITEM(list, i - start, item); | 
 | 957 |     } | 
 | 958 |  | 
 | 959 |     return list; | 
 | 960 | } | 
 | 961 |  | 
 | 962 | static PyObject* | 
 | 963 | element_insert(ElementObject* self, PyObject* args) | 
 | 964 | { | 
 | 965 |     int i; | 
 | 966 |  | 
 | 967 |     int index; | 
 | 968 |     PyObject* element; | 
 | 969 |     if (!PyArg_ParseTuple(args, "iO!:insert", &index, | 
 | 970 |                           &Element_Type, &element)) | 
 | 971 |         return NULL; | 
 | 972 |  | 
 | 973 |     if (!self->extra) | 
 | 974 |         element_new_extra(self, NULL); | 
 | 975 |  | 
 | 976 |     if (index < 0) | 
 | 977 |         index = 0; | 
 | 978 |     if (index > self->extra->length) | 
 | 979 |         index = self->extra->length; | 
 | 980 |  | 
 | 981 |     if (element_resize(self, 1) < 0) | 
 | 982 |         return NULL; | 
 | 983 |  | 
 | 984 |     for (i = self->extra->length; i > index; i--) | 
 | 985 |         self->extra->children[i] = self->extra->children[i-1]; | 
 | 986 |  | 
 | 987 |     Py_INCREF(element); | 
 | 988 |     self->extra->children[index] = element; | 
 | 989 |  | 
 | 990 |     self->extra->length++; | 
 | 991 |  | 
 | 992 |     Py_RETURN_NONE; | 
 | 993 | } | 
 | 994 |  | 
 | 995 | static PyObject* | 
 | 996 | element_items(ElementObject* self, PyObject* args) | 
 | 997 | { | 
 | 998 |     if (!PyArg_ParseTuple(args, ":items")) | 
 | 999 |         return NULL; | 
 | 1000 |  | 
 | 1001 |     if (!self->extra || self->extra->attrib == Py_None) | 
 | 1002 |         return PyList_New(0); | 
 | 1003 |  | 
 | 1004 |     return PyDict_Items(self->extra->attrib); | 
 | 1005 | } | 
 | 1006 |  | 
 | 1007 | static PyObject* | 
 | 1008 | element_keys(ElementObject* self, PyObject* args) | 
 | 1009 | { | 
 | 1010 |     if (!PyArg_ParseTuple(args, ":keys")) | 
 | 1011 |         return NULL; | 
 | 1012 |  | 
 | 1013 |     if (!self->extra || self->extra->attrib == Py_None) | 
 | 1014 |         return PyList_New(0); | 
 | 1015 |  | 
 | 1016 |     return PyDict_Keys(self->extra->attrib); | 
 | 1017 | } | 
 | 1018 |  | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 1019 | static Py_ssize_t | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1020 | element_length(ElementObject* self) | 
 | 1021 | { | 
 | 1022 |     if (!self->extra) | 
 | 1023 |         return 0; | 
 | 1024 |  | 
 | 1025 |     return self->extra->length; | 
 | 1026 | } | 
 | 1027 |  | 
 | 1028 | static PyObject* | 
 | 1029 | element_makeelement(PyObject* self, PyObject* args, PyObject* kw) | 
 | 1030 | { | 
 | 1031 |     PyObject* elem; | 
 | 1032 |  | 
 | 1033 |     PyObject* tag; | 
 | 1034 |     PyObject* attrib; | 
 | 1035 |     if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib)) | 
 | 1036 |         return NULL; | 
 | 1037 |  | 
 | 1038 |     attrib = PyDict_Copy(attrib); | 
 | 1039 |     if (!attrib) | 
 | 1040 |         return NULL; | 
 | 1041 |  | 
 | 1042 |     elem = element_new(tag, attrib); | 
 | 1043 |  | 
 | 1044 |     Py_DECREF(attrib); | 
 | 1045 |  | 
 | 1046 |     return elem; | 
 | 1047 | } | 
 | 1048 |  | 
 | 1049 | static PyObject* | 
 | 1050 | element_reduce(ElementObject* self, PyObject* args) | 
 | 1051 | { | 
 | 1052 |     if (!PyArg_ParseTuple(args, ":__reduce__")) | 
 | 1053 |         return NULL; | 
 | 1054 |  | 
 | 1055 |     /* Hack alert: This method is used to work around a __copy__ | 
 | 1056 |        problem on certain 2.3 and 2.4 versions.  To save time and | 
 | 1057 |        simplify the code, we create the copy in here, and use a dummy | 
 | 1058 |        copyelement helper to trick the copy module into doing the | 
 | 1059 |        right thing. */ | 
 | 1060 |  | 
 | 1061 |     if (!elementtree_copyelement_obj) { | 
 | 1062 |         PyErr_SetString( | 
 | 1063 |             PyExc_RuntimeError, | 
 | 1064 |             "copyelement helper not found" | 
 | 1065 |             ); | 
 | 1066 |         return NULL; | 
 | 1067 |     } | 
 | 1068 |  | 
 | 1069 |     return Py_BuildValue( | 
 | 1070 |         "O(N)", elementtree_copyelement_obj, element_copy(self, args) | 
 | 1071 |         ); | 
 | 1072 | } | 
 | 1073 |  | 
 | 1074 | static PyObject* | 
 | 1075 | element_remove(ElementObject* self, PyObject* args) | 
 | 1076 | { | 
 | 1077 |     int i; | 
 | 1078 |  | 
 | 1079 |     PyObject* element; | 
 | 1080 |     if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element)) | 
 | 1081 |         return NULL; | 
 | 1082 |  | 
 | 1083 |     if (!self->extra) { | 
 | 1084 |         /* element has no children, so raise exception */ | 
 | 1085 |         PyErr_SetString( | 
 | 1086 |             PyExc_ValueError, | 
 | 1087 |             "list.remove(x): x not in list" | 
 | 1088 |             ); | 
 | 1089 |         return NULL; | 
 | 1090 |     } | 
 | 1091 |  | 
 | 1092 |     for (i = 0; i < self->extra->length; i++) { | 
 | 1093 |         if (self->extra->children[i] == element) | 
 | 1094 |             break; | 
 | 1095 |         if (PyObject_Compare(self->extra->children[i], element) == 0) | 
 | 1096 |             break; | 
 | 1097 |     } | 
 | 1098 |  | 
 | 1099 |     if (i == self->extra->length) { | 
 | 1100 |         /* element is not in children, so raise exception */ | 
 | 1101 |         PyErr_SetString( | 
 | 1102 |             PyExc_ValueError, | 
 | 1103 |             "list.remove(x): x not in list" | 
 | 1104 |             ); | 
 | 1105 |         return NULL; | 
 | 1106 |     } | 
 | 1107 |  | 
 | 1108 |     Py_DECREF(self->extra->children[i]); | 
 | 1109 |  | 
 | 1110 |     self->extra->length--; | 
 | 1111 |  | 
 | 1112 |     for (; i < self->extra->length; i++) | 
 | 1113 |         self->extra->children[i] = self->extra->children[i+1]; | 
 | 1114 |  | 
 | 1115 |     Py_RETURN_NONE; | 
 | 1116 | } | 
 | 1117 |  | 
 | 1118 | static PyObject* | 
 | 1119 | element_repr(ElementObject* self) | 
 | 1120 | { | 
 | 1121 |     PyObject* repr; | 
 | 1122 |     char buffer[100]; | 
 | 1123 |      | 
 | 1124 |     repr = PyString_FromString("<Element "); | 
 | 1125 |  | 
 | 1126 |     PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag)); | 
 | 1127 |  | 
 | 1128 |     sprintf(buffer, " at %p>", self); | 
 | 1129 |     PyString_ConcatAndDel(&repr, PyString_FromString(buffer)); | 
 | 1130 |  | 
 | 1131 |     return repr; | 
 | 1132 | } | 
 | 1133 |  | 
 | 1134 | static PyObject* | 
 | 1135 | element_set(ElementObject* self, PyObject* args) | 
 | 1136 | { | 
 | 1137 |     PyObject* attrib; | 
 | 1138 |  | 
 | 1139 |     PyObject* key; | 
 | 1140 |     PyObject* value; | 
 | 1141 |     if (!PyArg_ParseTuple(args, "OO:set", &key, &value)) | 
 | 1142 |         return NULL; | 
 | 1143 |  | 
 | 1144 |     if (!self->extra) | 
 | 1145 |         element_new_extra(self, NULL); | 
 | 1146 |  | 
 | 1147 |     attrib = element_get_attrib(self); | 
 | 1148 |     if (!attrib) | 
 | 1149 |         return NULL; | 
 | 1150 |  | 
 | 1151 |     if (PyDict_SetItem(attrib, key, value) < 0) | 
 | 1152 |         return NULL; | 
 | 1153 |  | 
 | 1154 |     Py_RETURN_NONE; | 
 | 1155 | } | 
 | 1156 |  | 
 | 1157 | static int | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1158 | element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1159 | { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1160 |     ElementObject* self = (ElementObject*) self_; | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 1161 |     Py_ssize_t i, new, old; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1162 |     PyObject* recycle = NULL; | 
 | 1163 |  | 
 | 1164 |     if (!self->extra) | 
 | 1165 |         element_new_extra(self, NULL); | 
 | 1166 |  | 
 | 1167 |     /* standard clamping */ | 
 | 1168 |     if (start < 0) | 
 | 1169 |         start = 0; | 
 | 1170 |     if (end < 0) | 
 | 1171 |         end = 0; | 
 | 1172 |     if (end > self->extra->length) | 
 | 1173 |         end = self->extra->length; | 
 | 1174 |     if (start > end) | 
 | 1175 |         start = end; | 
 | 1176 |  | 
 | 1177 |     old = end - start; | 
 | 1178 |  | 
 | 1179 |     if (item == NULL) | 
 | 1180 |         new = 0; | 
 | 1181 |     else if (PyList_CheckExact(item)) { | 
 | 1182 |         new = PyList_GET_SIZE(item); | 
 | 1183 |     } else { | 
 | 1184 |         /* FIXME: support arbitrary sequences? */ | 
 | 1185 |         PyErr_Format( | 
 | 1186 |             PyExc_TypeError, | 
 | 1187 |             "expected list, not \"%.200s\"", item->ob_type->tp_name | 
 | 1188 |             ); | 
 | 1189 |         return -1; | 
 | 1190 |     } | 
 | 1191 |  | 
 | 1192 |     if (old > 0) { | 
 | 1193 |         /* to avoid recursive calls to this method (via decref), move | 
 | 1194 |            old items to the recycle bin here, and get rid of them when | 
 | 1195 |            we're done modifying the element */ | 
 | 1196 |         recycle = PyList_New(old); | 
 | 1197 |         for (i = 0; i < old; i++) | 
 | 1198 |             PyList_SET_ITEM(recycle, i, self->extra->children[i + start]); | 
 | 1199 |     } | 
 | 1200 |  | 
 | 1201 |     if (new < old) { | 
 | 1202 |         /* delete slice */ | 
 | 1203 |         for (i = end; i < self->extra->length; i++) | 
 | 1204 |             self->extra->children[i + new - old] = self->extra->children[i]; | 
 | 1205 |     } else if (new > old) { | 
 | 1206 |         /* insert slice */ | 
 | 1207 |         if (element_resize(self, new - old) < 0) | 
 | 1208 |             return -1; | 
 | 1209 |         for (i = self->extra->length-1; i >= end; i--) | 
 | 1210 |             self->extra->children[i + new - old] = self->extra->children[i]; | 
 | 1211 |     } | 
 | 1212 |  | 
 | 1213 |     /* replace the slice */ | 
 | 1214 |     for (i = 0; i < new; i++) { | 
 | 1215 |         PyObject* element = PyList_GET_ITEM(item, i); | 
 | 1216 |         Py_INCREF(element); | 
 | 1217 |         self->extra->children[i + start] = element; | 
 | 1218 |     } | 
 | 1219 |  | 
 | 1220 |     self->extra->length += new - old; | 
 | 1221 |  | 
 | 1222 |     /* discard the recycle bin, and everything in it */ | 
 | 1223 |     Py_XDECREF(recycle); | 
 | 1224 |  | 
 | 1225 |     return 0; | 
 | 1226 | } | 
 | 1227 |  | 
 | 1228 | static int | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1229 | element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1230 | { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1231 |     ElementObject* self = (ElementObject*) self_; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1232 |     int i; | 
 | 1233 |     PyObject* old; | 
 | 1234 |  | 
 | 1235 |     if (!self->extra || index < 0 || index >= self->extra->length) { | 
 | 1236 |         PyErr_SetString( | 
 | 1237 |             PyExc_IndexError, | 
 | 1238 |             "child assignment index out of range"); | 
 | 1239 |         return -1; | 
 | 1240 |     } | 
 | 1241 |  | 
 | 1242 |     old = self->extra->children[index]; | 
 | 1243 |  | 
 | 1244 |     if (item) { | 
 | 1245 |         Py_INCREF(item); | 
 | 1246 |         self->extra->children[index] = item; | 
 | 1247 |     } else { | 
 | 1248 |         self->extra->length--; | 
 | 1249 |         for (i = index; i < self->extra->length; i++) | 
 | 1250 |             self->extra->children[i] = self->extra->children[i+1]; | 
 | 1251 |     } | 
 | 1252 |  | 
 | 1253 |     Py_DECREF(old); | 
 | 1254 |  | 
 | 1255 |     return 0; | 
 | 1256 | } | 
 | 1257 |  | 
 | 1258 | static PyMethodDef element_methods[] = { | 
 | 1259 |  | 
 | 1260 |     {"clear", (PyCFunction) element_clear, METH_VARARGS}, | 
 | 1261 |  | 
 | 1262 |     {"get", (PyCFunction) element_get, METH_VARARGS}, | 
 | 1263 |     {"set", (PyCFunction) element_set, METH_VARARGS}, | 
 | 1264 |  | 
 | 1265 |     {"find", (PyCFunction) element_find, METH_VARARGS}, | 
 | 1266 |     {"findtext", (PyCFunction) element_findtext, METH_VARARGS}, | 
 | 1267 |     {"findall", (PyCFunction) element_findall, METH_VARARGS}, | 
 | 1268 |  | 
 | 1269 |     {"append", (PyCFunction) element_append, METH_VARARGS}, | 
 | 1270 |     {"insert", (PyCFunction) element_insert, METH_VARARGS}, | 
 | 1271 |     {"remove", (PyCFunction) element_remove, METH_VARARGS}, | 
 | 1272 |  | 
 | 1273 |     {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS}, | 
 | 1274 |     {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS}, | 
 | 1275 |  | 
 | 1276 |     {"items", (PyCFunction) element_items, METH_VARARGS}, | 
 | 1277 |     {"keys", (PyCFunction) element_keys, METH_VARARGS}, | 
 | 1278 |  | 
 | 1279 |     {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS}, | 
 | 1280 |  | 
 | 1281 |     {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, | 
 | 1282 |     {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, | 
 | 1283 |  | 
 | 1284 |     /* Some 2.3 and 2.4 versions do not handle the __copy__ method on | 
 | 1285 |        C objects correctly, so we have to fake it using a __reduce__- | 
 | 1286 |        based hack (see the element_reduce implementation above for | 
 | 1287 |        details). */ | 
 | 1288 |  | 
 | 1289 |     /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're | 
 | 1290 |        using a runtime test to figure out if we need to fake things | 
 | 1291 |        or now (see the init code below).  The following entry is | 
 | 1292 |        enabled only if the hack is needed. */ | 
 | 1293 |  | 
 | 1294 |     {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS}, | 
 | 1295 |  | 
 | 1296 |     {NULL, NULL} | 
 | 1297 | }; | 
 | 1298 |  | 
 | 1299 | static PyObject*   | 
 | 1300 | element_getattr(ElementObject* self, char* name) | 
 | 1301 | { | 
 | 1302 |     PyObject* res; | 
 | 1303 |  | 
 | 1304 |     res = Py_FindMethod(element_methods, (PyObject*) self, name); | 
 | 1305 |     if (res) | 
 | 1306 | 	return res; | 
 | 1307 |  | 
 | 1308 |     PyErr_Clear(); | 
 | 1309 |  | 
 | 1310 |     if (strcmp(name, "tag") == 0) | 
 | 1311 | 	res = self->tag; | 
 | 1312 |     else if (strcmp(name, "text") == 0) | 
 | 1313 |         res = element_get_text(self); | 
 | 1314 |     else if (strcmp(name, "tail") == 0) { | 
 | 1315 |         res = element_get_tail(self); | 
 | 1316 |     } else if (strcmp(name, "attrib") == 0) { | 
 | 1317 |         if (!self->extra) | 
 | 1318 |             element_new_extra(self, NULL); | 
 | 1319 | 	res = element_get_attrib(self); | 
 | 1320 |     } else { | 
 | 1321 |         PyErr_SetString(PyExc_AttributeError, name); | 
 | 1322 |         return NULL; | 
 | 1323 |     } | 
 | 1324 |  | 
 | 1325 |     if (!res) | 
 | 1326 |         return NULL; | 
 | 1327 |  | 
 | 1328 |     Py_INCREF(res); | 
 | 1329 |     return res; | 
 | 1330 | } | 
 | 1331 |  | 
 | 1332 | static int | 
 | 1333 | element_setattr(ElementObject* self, const char* name, PyObject* value) | 
 | 1334 | { | 
 | 1335 |     if (value == NULL) { | 
 | 1336 |         PyErr_SetString( | 
 | 1337 |             PyExc_AttributeError, | 
 | 1338 |             "can't delete element attributes" | 
 | 1339 |             ); | 
 | 1340 |         return -1; | 
 | 1341 |     } | 
 | 1342 |  | 
 | 1343 |     if (strcmp(name, "tag") == 0) { | 
 | 1344 |         Py_DECREF(self->tag); | 
 | 1345 |         self->tag = value; | 
 | 1346 |         Py_INCREF(self->tag); | 
 | 1347 |     } else if (strcmp(name, "text") == 0) { | 
 | 1348 |         Py_DECREF(JOIN_OBJ(self->text)); | 
 | 1349 |         self->text = value; | 
 | 1350 |         Py_INCREF(self->text); | 
 | 1351 |     } else if (strcmp(name, "tail") == 0) { | 
 | 1352 |         Py_DECREF(JOIN_OBJ(self->tail)); | 
 | 1353 |         self->tail = value; | 
 | 1354 |         Py_INCREF(self->tail); | 
 | 1355 |     } else if (strcmp(name, "attrib") == 0) { | 
 | 1356 |         if (!self->extra) | 
 | 1357 |             element_new_extra(self, NULL); | 
 | 1358 |         Py_DECREF(self->extra->attrib); | 
 | 1359 |         self->extra->attrib = value; | 
 | 1360 |         Py_INCREF(self->extra->attrib); | 
 | 1361 |     } else { | 
 | 1362 |         PyErr_SetString(PyExc_AttributeError, name); | 
 | 1363 |         return -1; | 
 | 1364 |     } | 
 | 1365 |  | 
 | 1366 |     return 0; | 
 | 1367 | } | 
 | 1368 |  | 
 | 1369 | static PySequenceMethods element_as_sequence = { | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 1370 |     (lenfunc) element_length, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1371 |     0, /* sq_concat */ | 
 | 1372 |     0, /* sq_repeat */ | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 1373 |     element_getitem, | 
 | 1374 |     element_getslice, | 
 | 1375 |     element_setitem, | 
 | 1376 |     element_setslice, | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1377 | }; | 
 | 1378 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 1379 | static PyTypeObject Element_Type = { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1380 |     PyObject_HEAD_INIT(NULL) | 
 | 1381 |     0, "Element", sizeof(ElementObject), 0, | 
 | 1382 |     /* methods */ | 
 | 1383 |     (destructor)element_dealloc, /* tp_dealloc */ | 
 | 1384 |     0, /* tp_print */ | 
 | 1385 |     (getattrfunc)element_getattr, /* tp_getattr */ | 
 | 1386 |     (setattrfunc)element_setattr, /* tp_setattr */ | 
 | 1387 |     0, /* tp_compare */ | 
 | 1388 |     (reprfunc)element_repr, /* tp_repr */ | 
 | 1389 |     0, /* tp_as_number */ | 
 | 1390 |     &element_as_sequence, /* tp_as_sequence */ | 
 | 1391 | }; | 
 | 1392 |  | 
 | 1393 | /* ==================================================================== */ | 
 | 1394 | /* the tree builder type */ | 
 | 1395 |  | 
 | 1396 | typedef struct { | 
 | 1397 |     PyObject_HEAD | 
 | 1398 |  | 
 | 1399 |     PyObject* root; /* root node (first created node) */ | 
 | 1400 |  | 
 | 1401 |     ElementObject* this; /* current node */ | 
 | 1402 |     ElementObject* last; /* most recently created node */ | 
 | 1403 |  | 
 | 1404 |     PyObject* data; /* data collector (string or list), or NULL */ | 
 | 1405 |  | 
 | 1406 |     PyObject* stack; /* element stack */ | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 1407 |     Py_ssize_t index; /* current stack size (0=empty) */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1408 |  | 
 | 1409 |     /* element tracing */ | 
 | 1410 |     PyObject* events; /* list of events, or NULL if not collecting */ | 
 | 1411 |     PyObject* start_event_obj; /* event objects (NULL to ignore) */ | 
 | 1412 |     PyObject* end_event_obj; | 
 | 1413 |     PyObject* start_ns_event_obj; | 
 | 1414 |     PyObject* end_ns_event_obj; | 
 | 1415 |  | 
 | 1416 | } TreeBuilderObject; | 
 | 1417 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 1418 | static PyTypeObject TreeBuilder_Type; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1419 |  | 
 | 1420 | #define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type) | 
 | 1421 |  | 
 | 1422 | /* -------------------------------------------------------------------- */ | 
 | 1423 | /* constructor and destructor */ | 
 | 1424 |  | 
 | 1425 | LOCAL(PyObject*) | 
 | 1426 | treebuilder_new(void) | 
 | 1427 | { | 
 | 1428 |     TreeBuilderObject* self; | 
 | 1429 |  | 
 | 1430 |     self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type); | 
 | 1431 |     if (self == NULL) | 
 | 1432 |         return NULL; | 
 | 1433 |  | 
 | 1434 |     self->root = NULL; | 
 | 1435 |  | 
 | 1436 |     Py_INCREF(Py_None); | 
 | 1437 |     self->this = (ElementObject*) Py_None; | 
 | 1438 |  | 
 | 1439 |     Py_INCREF(Py_None); | 
 | 1440 |     self->last = (ElementObject*) Py_None; | 
 | 1441 |  | 
 | 1442 |     self->data = NULL; | 
 | 1443 |  | 
 | 1444 |     self->stack = PyList_New(20); | 
 | 1445 |     self->index = 0; | 
 | 1446 |  | 
 | 1447 |     self->events = NULL; | 
 | 1448 |     self->start_event_obj = self->end_event_obj = NULL; | 
 | 1449 |     self->start_ns_event_obj = self->end_ns_event_obj = NULL; | 
 | 1450 |  | 
 | 1451 |     ALLOC(sizeof(TreeBuilderObject), "create treebuilder"); | 
 | 1452 |  | 
 | 1453 |     return (PyObject*) self; | 
 | 1454 | } | 
 | 1455 |  | 
 | 1456 | static PyObject* | 
| Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 1457 | treebuilder(PyObject* self_, PyObject* args) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1458 | { | 
 | 1459 |     if (!PyArg_ParseTuple(args, ":TreeBuilder")) | 
 | 1460 |         return NULL; | 
 | 1461 |  | 
 | 1462 |     return treebuilder_new(); | 
 | 1463 | } | 
 | 1464 |  | 
 | 1465 | static void | 
 | 1466 | treebuilder_dealloc(TreeBuilderObject* self) | 
 | 1467 | { | 
 | 1468 |     Py_XDECREF(self->end_ns_event_obj); | 
 | 1469 |     Py_XDECREF(self->start_ns_event_obj); | 
 | 1470 |     Py_XDECREF(self->end_event_obj); | 
 | 1471 |     Py_XDECREF(self->start_event_obj); | 
 | 1472 |     Py_XDECREF(self->events); | 
 | 1473 |     Py_DECREF(self->stack); | 
 | 1474 |     Py_XDECREF(self->data); | 
 | 1475 |     Py_DECREF(self->last); | 
 | 1476 |     Py_DECREF(self->this); | 
 | 1477 |     Py_XDECREF(self->root); | 
 | 1478 |  | 
 | 1479 |     RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder"); | 
 | 1480 |  | 
 | 1481 |     PyObject_Del(self); | 
 | 1482 | } | 
 | 1483 |  | 
 | 1484 | /* -------------------------------------------------------------------- */ | 
 | 1485 | /* handlers */ | 
 | 1486 |  | 
 | 1487 | LOCAL(PyObject*) | 
 | 1488 | treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding, | 
 | 1489 |                        PyObject* standalone) | 
 | 1490 | { | 
 | 1491 |     Py_RETURN_NONE; | 
 | 1492 | } | 
 | 1493 |  | 
 | 1494 | LOCAL(PyObject*) | 
 | 1495 | treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, | 
 | 1496 |                          PyObject* attrib) | 
 | 1497 | { | 
 | 1498 |     PyObject* node; | 
 | 1499 |     PyObject* this; | 
 | 1500 |  | 
 | 1501 |     if (self->data) { | 
 | 1502 |         if (self->this == self->last) { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1503 |             Py_DECREF(JOIN_OBJ(self->last->text)); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1504 |             self->last->text = JOIN_SET( | 
 | 1505 |                 self->data, PyList_CheckExact(self->data) | 
 | 1506 |                 ); | 
 | 1507 |         } else { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1508 |             Py_DECREF(JOIN_OBJ(self->last->tail)); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1509 |             self->last->tail = JOIN_SET( | 
 | 1510 |                 self->data, PyList_CheckExact(self->data) | 
 | 1511 |                 ); | 
 | 1512 |         } | 
 | 1513 |         self->data = NULL; | 
 | 1514 |     } | 
 | 1515 |  | 
 | 1516 |     node = element_new(tag, attrib); | 
 | 1517 |     if (!node) | 
 | 1518 |         return NULL; | 
 | 1519 |  | 
 | 1520 |     this = (PyObject*) self->this; | 
 | 1521 |  | 
 | 1522 |     if (this != Py_None) { | 
 | 1523 |         if (element_add_subelement((ElementObject*) this, node) < 0) | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1524 |             goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1525 |     } else { | 
 | 1526 |         if (self->root) { | 
 | 1527 |             PyErr_SetString( | 
 | 1528 |                 PyExc_SyntaxError, | 
 | 1529 |                 "multiple elements on top level" | 
 | 1530 |                 ); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1531 |             goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1532 |         } | 
 | 1533 |         Py_INCREF(node); | 
 | 1534 |         self->root = node; | 
 | 1535 |     } | 
 | 1536 |  | 
 | 1537 |     if (self->index < PyList_GET_SIZE(self->stack)) { | 
 | 1538 |         if (PyList_SetItem(self->stack, self->index, this) < 0) | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1539 |             goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1540 |         Py_INCREF(this); | 
 | 1541 |     } else { | 
 | 1542 |         if (PyList_Append(self->stack, this) < 0) | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1543 |             goto error; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1544 |     } | 
 | 1545 |     self->index++; | 
 | 1546 |  | 
 | 1547 |     Py_DECREF(this); | 
 | 1548 |     Py_INCREF(node); | 
 | 1549 |     self->this = (ElementObject*) node; | 
 | 1550 |  | 
 | 1551 |     Py_DECREF(self->last); | 
 | 1552 |     Py_INCREF(node); | 
 | 1553 |     self->last = (ElementObject*) node; | 
 | 1554 |  | 
 | 1555 |     if (self->start_event_obj) { | 
 | 1556 |         PyObject* res; | 
 | 1557 |         PyObject* action = self->start_event_obj; | 
 | 1558 |         res = PyTuple_New(2); | 
 | 1559 |         if (res) { | 
 | 1560 |             Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); | 
 | 1561 |             Py_INCREF(node);   PyTuple_SET_ITEM(res, 1, (PyObject*) node); | 
 | 1562 |             PyList_Append(self->events, res); | 
 | 1563 |             Py_DECREF(res); | 
 | 1564 |         } else | 
 | 1565 |             PyErr_Clear(); /* FIXME: propagate error */ | 
 | 1566 |     } | 
 | 1567 |  | 
 | 1568 |     return node; | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1569 |  | 
 | 1570 |   error: | 
 | 1571 |     Py_DECREF(node); | 
 | 1572 |     return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1573 | } | 
 | 1574 |  | 
 | 1575 | LOCAL(PyObject*) | 
 | 1576 | treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) | 
 | 1577 | { | 
 | 1578 |     if (!self->data) { | 
| Thomas Wouters | 00ee7ba | 2006-08-21 19:07:27 +0000 | [diff] [blame] | 1579 |         if (self->last == (ElementObject*) Py_None) { | 
 | 1580 |             /* ignore calls to data before the first call to start */ | 
 | 1581 |             Py_RETURN_NONE; | 
 | 1582 |         } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1583 |         /* store the first item as is */ | 
 | 1584 |         Py_INCREF(data); self->data = data; | 
 | 1585 |     } else { | 
 | 1586 |         /* more than one item; use a list to collect items */ | 
 | 1587 |         if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 && | 
 | 1588 |             PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) { | 
 | 1589 |             /* expat often generates single character data sections; handle | 
 | 1590 |                the most common case by resizing the existing string... */ | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 1591 |             Py_ssize_t size = PyString_GET_SIZE(self->data); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1592 |             if (_PyString_Resize(&self->data, size + 1) < 0) | 
 | 1593 |                 return NULL; | 
 | 1594 |             PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0]; | 
 | 1595 |         } else if (PyList_CheckExact(self->data)) { | 
 | 1596 |             if (PyList_Append(self->data, data) < 0) | 
 | 1597 |                 return NULL; | 
 | 1598 |         } else { | 
 | 1599 |             PyObject* list = PyList_New(2); | 
 | 1600 |             if (!list) | 
 | 1601 |                 return NULL; | 
 | 1602 |             PyList_SET_ITEM(list, 0, self->data); | 
 | 1603 |             Py_INCREF(data); PyList_SET_ITEM(list, 1, data); | 
 | 1604 |             self->data = list; | 
 | 1605 |         } | 
 | 1606 |     } | 
 | 1607 |  | 
 | 1608 |     Py_RETURN_NONE; | 
 | 1609 | } | 
 | 1610 |  | 
 | 1611 | LOCAL(PyObject*) | 
 | 1612 | treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) | 
 | 1613 | { | 
 | 1614 |     PyObject* item; | 
 | 1615 |  | 
 | 1616 |     if (self->data) { | 
 | 1617 |         if (self->this == self->last) { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1618 |             Py_DECREF(JOIN_OBJ(self->last->text)); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1619 |             self->last->text = JOIN_SET( | 
 | 1620 |                 self->data, PyList_CheckExact(self->data) | 
 | 1621 |                 ); | 
 | 1622 |         } else { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 1623 |             Py_DECREF(JOIN_OBJ(self->last->tail)); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1624 |             self->last->tail = JOIN_SET( | 
 | 1625 |                 self->data, PyList_CheckExact(self->data) | 
 | 1626 |                 ); | 
 | 1627 |         } | 
 | 1628 |         self->data = NULL; | 
 | 1629 |     } | 
 | 1630 |  | 
 | 1631 |     if (self->index == 0) { | 
 | 1632 |         PyErr_SetString( | 
 | 1633 |             PyExc_IndexError, | 
 | 1634 |             "pop from empty stack" | 
 | 1635 |             ); | 
 | 1636 |         return NULL; | 
 | 1637 |     } | 
 | 1638 |  | 
 | 1639 |     self->index--; | 
 | 1640 |  | 
 | 1641 |     item = PyList_GET_ITEM(self->stack, self->index); | 
 | 1642 |     Py_INCREF(item); | 
 | 1643 |  | 
 | 1644 |     Py_DECREF(self->last); | 
 | 1645 |  | 
 | 1646 |     self->last = (ElementObject*) self->this; | 
 | 1647 |     self->this = (ElementObject*) item; | 
 | 1648 |  | 
 | 1649 |     if (self->end_event_obj) { | 
 | 1650 |         PyObject* res; | 
 | 1651 |         PyObject* action = self->end_event_obj; | 
 | 1652 |         PyObject* node = (PyObject*) self->last; | 
 | 1653 |         res = PyTuple_New(2); | 
 | 1654 |         if (res) { | 
 | 1655 |             Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action); | 
 | 1656 |             Py_INCREF(node);   PyTuple_SET_ITEM(res, 1, (PyObject*) node); | 
 | 1657 |             PyList_Append(self->events, res); | 
 | 1658 |             Py_DECREF(res); | 
 | 1659 |         } else | 
 | 1660 |             PyErr_Clear(); /* FIXME: propagate error */ | 
 | 1661 |     } | 
 | 1662 |  | 
 | 1663 |     Py_INCREF(self->last); | 
 | 1664 |     return (PyObject*) self->last; | 
 | 1665 | } | 
 | 1666 |  | 
 | 1667 | LOCAL(void) | 
 | 1668 | treebuilder_handle_namespace(TreeBuilderObject* self, int start, | 
 | 1669 |                              const char* prefix, const char *uri) | 
 | 1670 | { | 
 | 1671 |     PyObject* res; | 
 | 1672 |     PyObject* action; | 
 | 1673 |     PyObject* parcel; | 
 | 1674 |  | 
 | 1675 |     if (!self->events) | 
 | 1676 |         return; | 
 | 1677 |  | 
 | 1678 |     if (start) { | 
 | 1679 |         if (!self->start_ns_event_obj) | 
 | 1680 |             return; | 
 | 1681 |         action = self->start_ns_event_obj; | 
 | 1682 |         /* FIXME: prefix and uri use utf-8 encoding! */ | 
 | 1683 |         parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri); | 
 | 1684 |         if (!parcel) | 
 | 1685 |             return; | 
 | 1686 |         Py_INCREF(action); | 
 | 1687 |     } else { | 
 | 1688 |         if (!self->end_ns_event_obj) | 
 | 1689 |             return; | 
 | 1690 |         action = self->end_ns_event_obj; | 
 | 1691 |         Py_INCREF(action); | 
 | 1692 |         parcel = Py_None; | 
 | 1693 |         Py_INCREF(parcel); | 
 | 1694 |     } | 
 | 1695 |  | 
 | 1696 |     res = PyTuple_New(2); | 
 | 1697 |  | 
 | 1698 |     if (res) { | 
 | 1699 |         PyTuple_SET_ITEM(res, 0, action); | 
 | 1700 |         PyTuple_SET_ITEM(res, 1, parcel); | 
 | 1701 |         PyList_Append(self->events, res); | 
 | 1702 |         Py_DECREF(res); | 
 | 1703 |     } else | 
 | 1704 |         PyErr_Clear(); /* FIXME: propagate error */ | 
 | 1705 | } | 
 | 1706 |  | 
 | 1707 | /* -------------------------------------------------------------------- */ | 
 | 1708 | /* methods (in alphabetical order) */ | 
 | 1709 |  | 
 | 1710 | static PyObject* | 
 | 1711 | treebuilder_data(TreeBuilderObject* self, PyObject* args) | 
 | 1712 | { | 
 | 1713 |     PyObject* data; | 
 | 1714 |     if (!PyArg_ParseTuple(args, "O:data", &data)) | 
 | 1715 |         return NULL; | 
 | 1716 |  | 
 | 1717 |     return treebuilder_handle_data(self, data); | 
 | 1718 | } | 
 | 1719 |  | 
 | 1720 | static PyObject* | 
 | 1721 | treebuilder_end(TreeBuilderObject* self, PyObject* args) | 
 | 1722 | { | 
 | 1723 |     PyObject* tag; | 
 | 1724 |     if (!PyArg_ParseTuple(args, "O:end", &tag)) | 
 | 1725 |         return NULL; | 
 | 1726 |  | 
 | 1727 |     return treebuilder_handle_end(self, tag); | 
 | 1728 | } | 
 | 1729 |  | 
 | 1730 | LOCAL(PyObject*) | 
 | 1731 | treebuilder_done(TreeBuilderObject* self) | 
 | 1732 | { | 
 | 1733 |     PyObject* res; | 
 | 1734 |  | 
 | 1735 |     /* FIXME: check stack size? */ | 
 | 1736 |  | 
 | 1737 |     if (self->root) | 
 | 1738 |         res = self->root; | 
 | 1739 |     else | 
 | 1740 |         res = Py_None; | 
 | 1741 |  | 
 | 1742 |     Py_INCREF(res); | 
 | 1743 |     return res; | 
 | 1744 | } | 
 | 1745 |  | 
 | 1746 | static PyObject* | 
 | 1747 | treebuilder_close(TreeBuilderObject* self, PyObject* args) | 
 | 1748 | { | 
 | 1749 |     if (!PyArg_ParseTuple(args, ":close")) | 
 | 1750 |         return NULL; | 
 | 1751 |  | 
 | 1752 |     return treebuilder_done(self); | 
 | 1753 | } | 
 | 1754 |  | 
 | 1755 | static PyObject* | 
 | 1756 | treebuilder_start(TreeBuilderObject* self, PyObject* args) | 
 | 1757 | { | 
 | 1758 |     PyObject* tag; | 
 | 1759 |     PyObject* attrib = Py_None; | 
 | 1760 |     if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib)) | 
 | 1761 |         return NULL; | 
 | 1762 |  | 
 | 1763 |     return treebuilder_handle_start(self, tag, attrib); | 
 | 1764 | } | 
 | 1765 |  | 
 | 1766 | static PyObject* | 
 | 1767 | treebuilder_xml(TreeBuilderObject* self, PyObject* args) | 
 | 1768 | { | 
 | 1769 |     PyObject* encoding; | 
 | 1770 |     PyObject* standalone; | 
 | 1771 |     if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone)) | 
 | 1772 |         return NULL; | 
 | 1773 |  | 
 | 1774 |     return treebuilder_handle_xml(self, encoding, standalone); | 
 | 1775 | } | 
 | 1776 |  | 
 | 1777 | static PyMethodDef treebuilder_methods[] = { | 
 | 1778 |     {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, | 
 | 1779 |     {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, | 
 | 1780 |     {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, | 
 | 1781 |     {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS}, | 
 | 1782 |     {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, | 
 | 1783 |     {NULL, NULL} | 
 | 1784 | }; | 
 | 1785 |  | 
 | 1786 | static PyObject*   | 
 | 1787 | treebuilder_getattr(TreeBuilderObject* self, char* name) | 
 | 1788 | { | 
 | 1789 |     return Py_FindMethod(treebuilder_methods, (PyObject*) self, name); | 
 | 1790 | } | 
 | 1791 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 1792 | static PyTypeObject TreeBuilder_Type = { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1793 |     PyObject_HEAD_INIT(NULL) | 
 | 1794 |     0, "TreeBuilder", sizeof(TreeBuilderObject), 0, | 
 | 1795 |     /* methods */ | 
 | 1796 |     (destructor)treebuilder_dealloc, /* tp_dealloc */ | 
 | 1797 |     0, /* tp_print */ | 
 | 1798 |     (getattrfunc)treebuilder_getattr, /* tp_getattr */ | 
 | 1799 | }; | 
 | 1800 |  | 
 | 1801 | /* ==================================================================== */ | 
 | 1802 | /* the expat interface */ | 
 | 1803 |  | 
 | 1804 | #if defined(USE_EXPAT) | 
 | 1805 |  | 
 | 1806 | #include "expat.h" | 
 | 1807 |  | 
 | 1808 | #if defined(USE_PYEXPAT_CAPI) | 
 | 1809 | #include "pyexpat.h" | 
 | 1810 | static struct PyExpat_CAPI* expat_capi; | 
 | 1811 | #define EXPAT(func) (expat_capi->func) | 
 | 1812 | #else | 
 | 1813 | #define EXPAT(func) (XML_##func) | 
 | 1814 | #endif | 
 | 1815 |  | 
 | 1816 | typedef struct { | 
 | 1817 |     PyObject_HEAD | 
 | 1818 |  | 
 | 1819 |     XML_Parser parser; | 
 | 1820 |  | 
 | 1821 |     PyObject* target; | 
 | 1822 |     PyObject* entity; | 
 | 1823 |  | 
 | 1824 |     PyObject* names; | 
 | 1825 |  | 
 | 1826 |     PyObject* handle_xml; | 
 | 1827 |     PyObject* handle_start; | 
 | 1828 |     PyObject* handle_data; | 
 | 1829 |     PyObject* handle_end; | 
 | 1830 |  | 
 | 1831 |     PyObject* handle_comment; | 
 | 1832 |     PyObject* handle_pi; | 
 | 1833 |  | 
 | 1834 | } XMLParserObject; | 
 | 1835 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 1836 | static PyTypeObject XMLParser_Type; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1837 |  | 
 | 1838 | /* helpers */ | 
 | 1839 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1840 | LOCAL(int) | 
 | 1841 | checkstring(const char* string, int size) | 
 | 1842 | { | 
 | 1843 |     int i; | 
 | 1844 |  | 
 | 1845 |     /* check if an 8-bit string contains UTF-8 characters */ | 
 | 1846 |     for (i = 0; i < size; i++) | 
 | 1847 |         if (string[i] & 0x80) | 
 | 1848 |             return 1; | 
 | 1849 |  | 
 | 1850 |     return 0; | 
 | 1851 | } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1852 |  | 
 | 1853 | LOCAL(PyObject*) | 
 | 1854 | makestring(const char* string, int size) | 
 | 1855 | { | 
 | 1856 |     /* convert a UTF-8 string to either a 7-bit ascii string or a | 
 | 1857 |        Unicode string */ | 
 | 1858 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1859 |     if (checkstring(string, size)) | 
 | 1860 |         return PyUnicode_DecodeUTF8(string, size, "strict"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1861 |  | 
 | 1862 |     return PyString_FromStringAndSize(string, size); | 
 | 1863 | } | 
 | 1864 |  | 
 | 1865 | LOCAL(PyObject*) | 
 | 1866 | makeuniversal(XMLParserObject* self, const char* string) | 
 | 1867 | { | 
 | 1868 |     /* convert a UTF-8 tag/attribute name from the expat parser | 
 | 1869 |        to a universal name string */ | 
 | 1870 |  | 
 | 1871 |     int size = strlen(string); | 
 | 1872 |     PyObject* key; | 
 | 1873 |     PyObject* value; | 
 | 1874 |  | 
 | 1875 |     /* look the 'raw' name up in the names dictionary */ | 
 | 1876 |     key = PyString_FromStringAndSize(string, size); | 
 | 1877 |     if (!key) | 
 | 1878 |         return NULL; | 
 | 1879 |  | 
 | 1880 |     value = PyDict_GetItem(self->names, key); | 
 | 1881 |  | 
 | 1882 |     if (value) { | 
 | 1883 |         Py_INCREF(value); | 
 | 1884 |     } else { | 
 | 1885 |         /* new name.  convert to universal name, and decode as | 
 | 1886 |            necessary */ | 
 | 1887 |  | 
 | 1888 |         PyObject* tag; | 
 | 1889 |         char* p; | 
 | 1890 |         int i; | 
 | 1891 |  | 
 | 1892 |         /* look for namespace separator */ | 
 | 1893 |         for (i = 0; i < size; i++) | 
 | 1894 |             if (string[i] == '}') | 
 | 1895 |                 break; | 
 | 1896 |         if (i != size) { | 
 | 1897 |             /* convert to universal name */ | 
 | 1898 |             tag = PyString_FromStringAndSize(NULL, size+1); | 
 | 1899 |             p = PyString_AS_STRING(tag); | 
 | 1900 |             p[0] = '{'; | 
 | 1901 |             memcpy(p+1, string, size); | 
 | 1902 |             size++; | 
 | 1903 |         } else { | 
 | 1904 |             /* plain name; use key as tag */ | 
 | 1905 |             Py_INCREF(key); | 
 | 1906 |             tag = key; | 
 | 1907 |         } | 
 | 1908 |          | 
 | 1909 |         /* decode universal name */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1910 |         /* inline makestring, to avoid duplicating the source string if | 
 | 1911 |            it's not an utf-8 string */ | 
 | 1912 |         p = PyString_AS_STRING(tag); | 
 | 1913 |         if (checkstring(p, size)) { | 
 | 1914 |             value = PyUnicode_DecodeUTF8(p, size, "strict"); | 
 | 1915 |             Py_DECREF(tag); | 
 | 1916 |             if (!value) { | 
 | 1917 |                 Py_DECREF(key); | 
 | 1918 |                 return NULL; | 
 | 1919 |             } | 
 | 1920 |         } else | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1921 |             value = tag; /* use tag as is */ | 
 | 1922 |  | 
 | 1923 |         /* add to names dictionary */ | 
 | 1924 |         if (PyDict_SetItem(self->names, key, value) < 0) { | 
 | 1925 |             Py_DECREF(key); | 
 | 1926 |             Py_DECREF(value); | 
 | 1927 |             return NULL; | 
 | 1928 |         } | 
 | 1929 |     } | 
 | 1930 |  | 
 | 1931 |     Py_DECREF(key); | 
 | 1932 |     return value; | 
 | 1933 | } | 
 | 1934 |  | 
 | 1935 | /* -------------------------------------------------------------------- */ | 
 | 1936 | /* handlers */ | 
 | 1937 |  | 
 | 1938 | static void | 
 | 1939 | expat_default_handler(XMLParserObject* self, const XML_Char* data_in, | 
 | 1940 |                       int data_len) | 
 | 1941 | { | 
 | 1942 |     PyObject* key; | 
 | 1943 |     PyObject* value; | 
 | 1944 |     PyObject* res; | 
 | 1945 |  | 
 | 1946 |     if (data_len < 2 || data_in[0] != '&') | 
 | 1947 |         return; | 
 | 1948 |  | 
 | 1949 |     key = makestring(data_in + 1, data_len - 2); | 
 | 1950 |     if (!key) | 
 | 1951 |         return; | 
 | 1952 |  | 
 | 1953 |     value = PyDict_GetItem(self->entity, key); | 
 | 1954 |  | 
 | 1955 |     if (value) { | 
 | 1956 |         if (TreeBuilder_CheckExact(self->target)) | 
 | 1957 |             res = treebuilder_handle_data( | 
 | 1958 |                 (TreeBuilderObject*) self->target, value | 
 | 1959 |                 ); | 
 | 1960 |         else if (self->handle_data) | 
 | 1961 |             res = PyObject_CallFunction(self->handle_data, "O", value); | 
 | 1962 |         else | 
 | 1963 |             res = NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1964 |         Py_XDECREF(res); | 
 | 1965 |     } else { | 
 | 1966 |         PyErr_Format( | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 1967 |             PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld", | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 1968 |             PyString_AS_STRING(key), | 
 | 1969 |             EXPAT(GetErrorLineNumber)(self->parser), | 
 | 1970 |             EXPAT(GetErrorColumnNumber)(self->parser) | 
 | 1971 |             ); | 
 | 1972 |     } | 
 | 1973 |  | 
 | 1974 |     Py_DECREF(key); | 
 | 1975 | } | 
 | 1976 |  | 
 | 1977 | static void | 
 | 1978 | expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, | 
 | 1979 |                     const XML_Char **attrib_in) | 
 | 1980 | { | 
 | 1981 |     PyObject* res; | 
 | 1982 |     PyObject* tag; | 
 | 1983 |     PyObject* attrib; | 
 | 1984 |     int ok; | 
 | 1985 |  | 
 | 1986 |     /* tag name */ | 
 | 1987 |     tag = makeuniversal(self, tag_in); | 
 | 1988 |     if (!tag) | 
 | 1989 |         return; /* parser will look for errors */ | 
 | 1990 |  | 
 | 1991 |     /* attributes */ | 
 | 1992 |     if (attrib_in[0]) { | 
 | 1993 |         attrib = PyDict_New(); | 
 | 1994 |         if (!attrib) | 
 | 1995 |             return; | 
 | 1996 |         while (attrib_in[0] && attrib_in[1]) { | 
 | 1997 |             PyObject* key = makeuniversal(self, attrib_in[0]); | 
 | 1998 |             PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1])); | 
 | 1999 |             if (!key || !value) { | 
 | 2000 |                 Py_XDECREF(value); | 
 | 2001 |                 Py_XDECREF(key); | 
 | 2002 |                 Py_DECREF(attrib); | 
 | 2003 |                 return; | 
 | 2004 |             } | 
 | 2005 |             ok = PyDict_SetItem(attrib, key, value); | 
 | 2006 |             Py_DECREF(value); | 
 | 2007 |             Py_DECREF(key); | 
 | 2008 |             if (ok < 0) { | 
 | 2009 |                 Py_DECREF(attrib); | 
 | 2010 |                 return; | 
 | 2011 |             } | 
 | 2012 |             attrib_in += 2; | 
 | 2013 |         } | 
 | 2014 |     } else { | 
 | 2015 |         Py_INCREF(Py_None); | 
 | 2016 |         attrib = Py_None; | 
 | 2017 |     } | 
 | 2018 |  | 
 | 2019 |     if (TreeBuilder_CheckExact(self->target)) | 
 | 2020 |         /* shortcut */ | 
 | 2021 |         res = treebuilder_handle_start((TreeBuilderObject*) self->target, | 
 | 2022 |                                        tag, attrib); | 
 | 2023 |     else if (self->handle_start) | 
 | 2024 |         res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib); | 
 | 2025 |     else | 
 | 2026 |         res = NULL; | 
 | 2027 |  | 
 | 2028 |     Py_DECREF(tag); | 
 | 2029 |     Py_DECREF(attrib); | 
 | 2030 |  | 
 | 2031 |     Py_XDECREF(res); | 
 | 2032 | } | 
 | 2033 |  | 
 | 2034 | static void | 
 | 2035 | expat_data_handler(XMLParserObject* self, const XML_Char* data_in, | 
 | 2036 |                    int data_len) | 
 | 2037 | { | 
 | 2038 |     PyObject* data; | 
 | 2039 |     PyObject* res; | 
 | 2040 |  | 
 | 2041 |     data = makestring(data_in, data_len); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2042 |     if (!data) | 
 | 2043 |         return; /* parser will look for errors */ | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2044 |  | 
 | 2045 |     if (TreeBuilder_CheckExact(self->target)) | 
 | 2046 |         /* shortcut */ | 
 | 2047 |         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); | 
 | 2048 |     else if (self->handle_data) | 
 | 2049 |         res = PyObject_CallFunction(self->handle_data, "O", data); | 
 | 2050 |     else | 
 | 2051 |         res = NULL; | 
 | 2052 |  | 
 | 2053 |     Py_DECREF(data); | 
 | 2054 |  | 
 | 2055 |     Py_XDECREF(res); | 
 | 2056 | } | 
 | 2057 |  | 
 | 2058 | static void | 
 | 2059 | expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) | 
 | 2060 | { | 
 | 2061 |     PyObject* tag; | 
 | 2062 |     PyObject* res = NULL; | 
 | 2063 |  | 
 | 2064 |     if (TreeBuilder_CheckExact(self->target)) | 
 | 2065 |         /* shortcut */ | 
 | 2066 |         /* the standard tree builder doesn't look at the end tag */ | 
 | 2067 |         res = treebuilder_handle_end( | 
 | 2068 |             (TreeBuilderObject*) self->target, Py_None | 
 | 2069 |             ); | 
 | 2070 |     else if (self->handle_end) { | 
 | 2071 |         tag = makeuniversal(self, tag_in); | 
 | 2072 |         if (tag) { | 
 | 2073 |             res = PyObject_CallFunction(self->handle_end, "O", tag); | 
 | 2074 |             Py_DECREF(tag); | 
 | 2075 |         } | 
 | 2076 |     } | 
 | 2077 |  | 
 | 2078 |     Py_XDECREF(res); | 
 | 2079 | } | 
 | 2080 |  | 
 | 2081 | static void | 
 | 2082 | expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix, | 
 | 2083 |                        const XML_Char *uri) | 
 | 2084 | { | 
 | 2085 |     treebuilder_handle_namespace( | 
 | 2086 |         (TreeBuilderObject*) self->target, 1, prefix, uri | 
 | 2087 |         ); | 
 | 2088 | } | 
 | 2089 |  | 
 | 2090 | static void | 
 | 2091 | expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) | 
 | 2092 | { | 
 | 2093 |     treebuilder_handle_namespace( | 
 | 2094 |         (TreeBuilderObject*) self->target, 0, NULL, NULL | 
 | 2095 |         ); | 
 | 2096 | } | 
 | 2097 |  | 
 | 2098 | static void | 
 | 2099 | expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) | 
 | 2100 | { | 
 | 2101 |     PyObject* comment; | 
 | 2102 |     PyObject* res; | 
 | 2103 |  | 
 | 2104 |     if (self->handle_comment) { | 
 | 2105 |         comment = makestring(comment_in, strlen(comment_in)); | 
 | 2106 |         if (comment) { | 
 | 2107 |             res = PyObject_CallFunction(self->handle_comment, "O", comment); | 
 | 2108 |             Py_XDECREF(res); | 
 | 2109 |             Py_DECREF(comment); | 
 | 2110 |         } | 
 | 2111 |     } | 
 | 2112 | } | 
 | 2113 |  | 
 | 2114 | static void | 
 | 2115 | expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, | 
 | 2116 |                  const XML_Char* data_in) | 
 | 2117 | { | 
 | 2118 |     PyObject* target; | 
 | 2119 |     PyObject* data; | 
 | 2120 |     PyObject* res; | 
 | 2121 |  | 
 | 2122 |     if (self->handle_pi) { | 
 | 2123 |         target = makestring(target_in, strlen(target_in)); | 
 | 2124 |         data = makestring(data_in, strlen(data_in)); | 
 | 2125 |         if (target && data) { | 
 | 2126 |             res = PyObject_CallFunction(self->handle_pi, "OO", target, data); | 
 | 2127 |             Py_XDECREF(res); | 
 | 2128 |             Py_DECREF(data); | 
 | 2129 |             Py_DECREF(target); | 
 | 2130 |         } else { | 
 | 2131 |             Py_XDECREF(data); | 
 | 2132 |             Py_XDECREF(target); | 
 | 2133 |         } | 
 | 2134 |     } | 
 | 2135 | } | 
 | 2136 |  | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2137 | static int | 
 | 2138 | expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, | 
 | 2139 |                                XML_Encoding *info) | 
 | 2140 | { | 
 | 2141 |     PyObject* u; | 
 | 2142 |     Py_UNICODE* p; | 
 | 2143 |     unsigned char s[256]; | 
 | 2144 |     int i; | 
 | 2145 |  | 
 | 2146 |     memset(info, 0, sizeof(XML_Encoding)); | 
 | 2147 |  | 
 | 2148 |     for (i = 0; i < 256; i++) | 
 | 2149 |         s[i] = i; | 
 | 2150 |      | 
| Fredrik Lundh | c338999 | 2005-12-25 11:40:19 +0000 | [diff] [blame] | 2151 |     u = PyUnicode_Decode((char*) s, 256, name, "replace"); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2152 |     if (!u) | 
 | 2153 |         return XML_STATUS_ERROR; | 
 | 2154 |  | 
 | 2155 |     if (PyUnicode_GET_SIZE(u) != 256) { | 
 | 2156 |         Py_DECREF(u); | 
 | 2157 |         return XML_STATUS_ERROR; | 
 | 2158 |     } | 
 | 2159 |  | 
 | 2160 |     p = PyUnicode_AS_UNICODE(u); | 
 | 2161 |  | 
 | 2162 |     for (i = 0; i < 256; i++) { | 
 | 2163 | 	if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) | 
 | 2164 | 	    info->map[i] = p[i]; | 
 | 2165 |         else | 
 | 2166 | 	    info->map[i] = -1; | 
 | 2167 |     } | 
 | 2168 |  | 
 | 2169 |     Py_DECREF(u); | 
 | 2170 |  | 
 | 2171 |     return XML_STATUS_OK; | 
 | 2172 | } | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2173 |  | 
 | 2174 | /* -------------------------------------------------------------------- */ | 
 | 2175 | /* constructor and destructor */ | 
 | 2176 |  | 
 | 2177 | static PyObject* | 
| Thomas Wouters | 73e5a5b | 2006-06-08 15:35:45 +0000 | [diff] [blame] | 2178 | xmlparser(PyObject* self_, PyObject* args, PyObject* kw) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2179 | { | 
 | 2180 |     XMLParserObject* self; | 
 | 2181 |     /* FIXME: does this need to be static? */ | 
 | 2182 |     static XML_Memory_Handling_Suite memory_handler; | 
 | 2183 |  | 
 | 2184 |     PyObject* target = NULL; | 
 | 2185 |     char* encoding = NULL; | 
| Martin v. Löwis | 02cbf4a | 2006-02-27 17:20:04 +0000 | [diff] [blame] | 2186 |     static char* kwlist[] = { "target", "encoding", NULL }; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2187 |     if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist, | 
 | 2188 |                                      &target, &encoding)) | 
 | 2189 |         return NULL; | 
 | 2190 |  | 
 | 2191 | #if defined(USE_PYEXPAT_CAPI) | 
 | 2192 |     if (!expat_capi) { | 
 | 2193 |         PyErr_SetString( | 
 | 2194 |             PyExc_RuntimeError, "cannot load dispatch table from pyexpat" | 
 | 2195 |             ); | 
 | 2196 |         return NULL; | 
 | 2197 |     } | 
 | 2198 | #endif | 
 | 2199 |  | 
 | 2200 |     self = PyObject_New(XMLParserObject, &XMLParser_Type); | 
 | 2201 |     if (self == NULL) | 
 | 2202 |         return NULL; | 
 | 2203 |  | 
 | 2204 |     self->entity = PyDict_New(); | 
 | 2205 |     if (!self->entity) { | 
 | 2206 |         PyObject_Del(self); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2207 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2208 |     } | 
 | 2209 |       | 
 | 2210 |     self->names = PyDict_New(); | 
 | 2211 |     if (!self->names) { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2212 |         PyObject_Del(self->entity); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2213 |         PyObject_Del(self); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2214 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2215 |     } | 
 | 2216 |  | 
 | 2217 |     memory_handler.malloc_fcn = PyObject_Malloc; | 
 | 2218 |     memory_handler.realloc_fcn = PyObject_Realloc; | 
 | 2219 |     memory_handler.free_fcn = PyObject_Free; | 
 | 2220 |  | 
 | 2221 |     self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}"); | 
 | 2222 |     if (!self->parser) { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2223 |         PyObject_Del(self->names); | 
 | 2224 |         PyObject_Del(self->entity); | 
 | 2225 |         PyObject_Del(self); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2226 |         PyErr_NoMemory(); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2227 |         return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2228 |     } | 
 | 2229 |  | 
 | 2230 |     /* setup target handlers */ | 
 | 2231 |     if (!target) { | 
 | 2232 |         target = treebuilder_new(); | 
 | 2233 |         if (!target) { | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2234 |             EXPAT(ParserFree)(self->parser); | 
 | 2235 |             PyObject_Del(self->names); | 
 | 2236 |             PyObject_Del(self->entity); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2237 |             PyObject_Del(self); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2238 |             return NULL; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2239 |         } | 
 | 2240 |     } else | 
 | 2241 |         Py_INCREF(target); | 
 | 2242 |     self->target = target; | 
 | 2243 |  | 
 | 2244 |     self->handle_xml = PyObject_GetAttrString(target, "xml"); | 
 | 2245 |     self->handle_start = PyObject_GetAttrString(target, "start"); | 
 | 2246 |     self->handle_data = PyObject_GetAttrString(target, "data"); | 
 | 2247 |     self->handle_end = PyObject_GetAttrString(target, "end"); | 
 | 2248 |     self->handle_comment = PyObject_GetAttrString(target, "comment"); | 
 | 2249 |     self->handle_pi = PyObject_GetAttrString(target, "pi"); | 
 | 2250 |  | 
 | 2251 |     PyErr_Clear(); | 
 | 2252 |  | 
 | 2253 |     /* configure parser */ | 
 | 2254 |     EXPAT(SetUserData)(self->parser, self); | 
 | 2255 |     EXPAT(SetElementHandler)( | 
 | 2256 |         self->parser, | 
 | 2257 |         (XML_StartElementHandler) expat_start_handler, | 
 | 2258 |         (XML_EndElementHandler) expat_end_handler | 
 | 2259 |         ); | 
 | 2260 |     EXPAT(SetDefaultHandlerExpand)( | 
 | 2261 |         self->parser, | 
 | 2262 |         (XML_DefaultHandler) expat_default_handler | 
 | 2263 |         ); | 
 | 2264 |     EXPAT(SetCharacterDataHandler)( | 
 | 2265 |         self->parser, | 
 | 2266 |         (XML_CharacterDataHandler) expat_data_handler | 
 | 2267 |         ); | 
 | 2268 |     if (self->handle_comment) | 
 | 2269 |         EXPAT(SetCommentHandler)( | 
 | 2270 |             self->parser, | 
 | 2271 |             (XML_CommentHandler) expat_comment_handler | 
 | 2272 |             ); | 
 | 2273 |     if (self->handle_pi) | 
 | 2274 |         EXPAT(SetProcessingInstructionHandler)( | 
 | 2275 |             self->parser, | 
 | 2276 |             (XML_ProcessingInstructionHandler) expat_pi_handler | 
 | 2277 |             ); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2278 |     EXPAT(SetUnknownEncodingHandler)( | 
 | 2279 |         self->parser, | 
 | 2280 |         (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL | 
 | 2281 |         ); | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2282 |  | 
 | 2283 |     ALLOC(sizeof(XMLParserObject), "create expatparser"); | 
 | 2284 |  | 
 | 2285 |     return (PyObject*) self; | 
 | 2286 | } | 
 | 2287 |  | 
 | 2288 | static void | 
 | 2289 | xmlparser_dealloc(XMLParserObject* self) | 
 | 2290 | { | 
 | 2291 |     EXPAT(ParserFree)(self->parser); | 
 | 2292 |  | 
 | 2293 |     Py_XDECREF(self->handle_pi); | 
 | 2294 |     Py_XDECREF(self->handle_comment); | 
 | 2295 |     Py_XDECREF(self->handle_end); | 
 | 2296 |     Py_XDECREF(self->handle_data); | 
 | 2297 |     Py_XDECREF(self->handle_start); | 
 | 2298 |     Py_XDECREF(self->handle_xml); | 
 | 2299 |  | 
 | 2300 |     Py_DECREF(self->target); | 
 | 2301 |     Py_DECREF(self->entity); | 
 | 2302 |     Py_DECREF(self->names); | 
 | 2303 |  | 
 | 2304 |     RELEASE(sizeof(XMLParserObject), "destroy expatparser"); | 
 | 2305 |  | 
 | 2306 |     PyObject_Del(self); | 
 | 2307 | } | 
 | 2308 |  | 
 | 2309 | /* -------------------------------------------------------------------- */ | 
 | 2310 | /* methods (in alphabetical order) */ | 
 | 2311 |  | 
 | 2312 | LOCAL(PyObject*) | 
 | 2313 | expat_parse(XMLParserObject* self, char* data, int data_len, int final) | 
 | 2314 | { | 
 | 2315 |     int ok; | 
 | 2316 |  | 
 | 2317 |     ok = EXPAT(Parse)(self->parser, data, data_len, final); | 
 | 2318 |  | 
 | 2319 |     if (PyErr_Occurred()) | 
 | 2320 |         return NULL; | 
 | 2321 |  | 
 | 2322 |     if (!ok) { | 
 | 2323 |         PyErr_Format( | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 2324 |             PyExc_SyntaxError, "%s: line %ld, column %ld", | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2325 |             EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)), | 
 | 2326 |             EXPAT(GetErrorLineNumber)(self->parser), | 
 | 2327 |             EXPAT(GetErrorColumnNumber)(self->parser) | 
 | 2328 |             ); | 
 | 2329 |         return NULL; | 
 | 2330 |     } | 
 | 2331 |  | 
 | 2332 |     Py_RETURN_NONE; | 
 | 2333 | } | 
 | 2334 |  | 
 | 2335 | static PyObject* | 
 | 2336 | xmlparser_close(XMLParserObject* self, PyObject* args) | 
 | 2337 | { | 
 | 2338 |     /* end feeding data to parser */ | 
 | 2339 |  | 
 | 2340 |     PyObject* res; | 
 | 2341 |     if (!PyArg_ParseTuple(args, ":close")) | 
 | 2342 |         return NULL; | 
 | 2343 |  | 
 | 2344 |     res = expat_parse(self, "", 0, 1); | 
 | 2345 |  | 
 | 2346 |     if (res && TreeBuilder_CheckExact(self->target)) { | 
 | 2347 |         Py_DECREF(res); | 
 | 2348 |         return treebuilder_done((TreeBuilderObject*) self->target); | 
 | 2349 |     } | 
 | 2350 |  | 
 | 2351 |     return res; | 
 | 2352 | } | 
 | 2353 |  | 
 | 2354 | static PyObject* | 
 | 2355 | xmlparser_feed(XMLParserObject* self, PyObject* args) | 
 | 2356 | { | 
 | 2357 |     /* feed data to parser */ | 
 | 2358 |  | 
 | 2359 |     char* data; | 
 | 2360 |     int data_len; | 
 | 2361 |     if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len)) | 
 | 2362 |         return NULL; | 
 | 2363 |  | 
 | 2364 |     return expat_parse(self, data, data_len, 0); | 
 | 2365 | } | 
 | 2366 |  | 
 | 2367 | static PyObject* | 
 | 2368 | xmlparser_parse(XMLParserObject* self, PyObject* args) | 
 | 2369 | { | 
 | 2370 |     /* (internal) parse until end of input stream */ | 
 | 2371 |  | 
 | 2372 |     PyObject* reader; | 
 | 2373 |     PyObject* buffer; | 
 | 2374 |     PyObject* res; | 
 | 2375 |  | 
 | 2376 |     PyObject* fileobj; | 
 | 2377 |     if (!PyArg_ParseTuple(args, "O:_parse", &fileobj)) | 
 | 2378 |         return NULL; | 
 | 2379 |  | 
 | 2380 |     reader = PyObject_GetAttrString(fileobj, "read"); | 
 | 2381 |     if (!reader) | 
 | 2382 |         return NULL; | 
 | 2383 |      | 
 | 2384 |     /* read from open file object */ | 
 | 2385 |     for (;;) { | 
 | 2386 |  | 
 | 2387 |         buffer = PyObject_CallFunction(reader, "i", 64*1024); | 
 | 2388 |  | 
 | 2389 |         if (!buffer) { | 
 | 2390 |             /* read failed (e.g. due to KeyboardInterrupt) */ | 
 | 2391 |             Py_DECREF(reader); | 
 | 2392 |             return NULL; | 
 | 2393 |         } | 
 | 2394 |  | 
 | 2395 |         if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) { | 
 | 2396 |             Py_DECREF(buffer); | 
 | 2397 |             break; | 
 | 2398 |         } | 
 | 2399 |  | 
 | 2400 |         res = expat_parse( | 
 | 2401 |             self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0 | 
 | 2402 |             ); | 
 | 2403 |  | 
 | 2404 |         Py_DECREF(buffer); | 
 | 2405 |  | 
 | 2406 |         if (!res) { | 
 | 2407 |             Py_DECREF(reader); | 
 | 2408 |             return NULL; | 
 | 2409 |         } | 
 | 2410 |         Py_DECREF(res); | 
 | 2411 |  | 
 | 2412 |     } | 
 | 2413 |  | 
 | 2414 |     Py_DECREF(reader); | 
 | 2415 |  | 
 | 2416 |     res = expat_parse(self, "", 0, 1); | 
 | 2417 |  | 
 | 2418 |     if (res && TreeBuilder_CheckExact(self->target)) { | 
 | 2419 |         Py_DECREF(res); | 
 | 2420 |         return treebuilder_done((TreeBuilderObject*) self->target); | 
 | 2421 |     } | 
 | 2422 |  | 
 | 2423 |     return res; | 
 | 2424 | } | 
 | 2425 |  | 
 | 2426 | static PyObject* | 
 | 2427 | xmlparser_setevents(XMLParserObject* self, PyObject* args) | 
 | 2428 | { | 
 | 2429 |     /* activate element event reporting */ | 
 | 2430 |  | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 2431 |     Py_ssize_t i; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2432 |     TreeBuilderObject* target; | 
 | 2433 |  | 
 | 2434 |     PyObject* events; /* event collector */ | 
 | 2435 |     PyObject* event_set = Py_None; | 
 | 2436 |     if (!PyArg_ParseTuple(args, "O!|O:_setevents",  &PyList_Type, &events, | 
 | 2437 |                           &event_set)) | 
 | 2438 |         return NULL; | 
 | 2439 |  | 
 | 2440 |     if (!TreeBuilder_CheckExact(self->target)) { | 
 | 2441 |         PyErr_SetString( | 
 | 2442 |             PyExc_TypeError, | 
 | 2443 |             "event handling only supported for cElementTree.Treebuilder " | 
 | 2444 |             "targets" | 
 | 2445 |             ); | 
 | 2446 |         return NULL; | 
 | 2447 |     } | 
 | 2448 |  | 
 | 2449 |     target = (TreeBuilderObject*) self->target; | 
 | 2450 |  | 
 | 2451 |     Py_INCREF(events); | 
 | 2452 |     Py_XDECREF(target->events); | 
 | 2453 |     target->events = events; | 
 | 2454 |  | 
 | 2455 |     /* clear out existing events */ | 
 | 2456 |     Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL; | 
 | 2457 |     Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL; | 
 | 2458 |     Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL; | 
 | 2459 |     Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL; | 
 | 2460 |  | 
 | 2461 |     if (event_set == Py_None) { | 
 | 2462 |         /* default is "end" only */ | 
 | 2463 |         target->end_event_obj = PyString_FromString("end"); | 
 | 2464 |         Py_RETURN_NONE; | 
 | 2465 |     } | 
 | 2466 |  | 
 | 2467 |     if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */ | 
 | 2468 |         goto error; | 
 | 2469 |  | 
 | 2470 |     for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) { | 
 | 2471 |         PyObject* item = PyTuple_GET_ITEM(event_set, i); | 
 | 2472 |         char* event; | 
 | 2473 |         if (!PyString_Check(item)) | 
 | 2474 |             goto error; | 
 | 2475 |         event = PyString_AS_STRING(item); | 
 | 2476 |         if (strcmp(event, "start") == 0) { | 
 | 2477 |             Py_INCREF(item); | 
 | 2478 |             target->start_event_obj = item; | 
 | 2479 |         } else if (strcmp(event, "end") == 0) { | 
 | 2480 |             Py_INCREF(item); | 
 | 2481 |             Py_XDECREF(target->end_event_obj); | 
 | 2482 |             target->end_event_obj = item; | 
 | 2483 |         } else if (strcmp(event, "start-ns") == 0) { | 
 | 2484 |             Py_INCREF(item); | 
 | 2485 |             Py_XDECREF(target->start_ns_event_obj); | 
 | 2486 |             target->start_ns_event_obj = item; | 
 | 2487 |             EXPAT(SetNamespaceDeclHandler)( | 
 | 2488 |                 self->parser, | 
 | 2489 |                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, | 
 | 2490 |                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler | 
 | 2491 |                 ); | 
 | 2492 |         } else if (strcmp(event, "end-ns") == 0) { | 
 | 2493 |             Py_INCREF(item); | 
 | 2494 |             Py_XDECREF(target->end_ns_event_obj); | 
 | 2495 |             target->end_ns_event_obj = item; | 
 | 2496 |             EXPAT(SetNamespaceDeclHandler)( | 
 | 2497 |                 self->parser, | 
 | 2498 |                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, | 
 | 2499 |                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler | 
 | 2500 |                 ); | 
 | 2501 |         } else { | 
 | 2502 |             PyErr_Format( | 
 | 2503 |                 PyExc_ValueError, | 
 | 2504 |                 "unknown event '%s'", event | 
 | 2505 |                 ); | 
 | 2506 |             return NULL; | 
 | 2507 |         } | 
 | 2508 |     } | 
 | 2509 |  | 
 | 2510 |     Py_RETURN_NONE; | 
 | 2511 |  | 
 | 2512 |   error: | 
 | 2513 |     PyErr_SetString( | 
 | 2514 |         PyExc_TypeError, | 
 | 2515 |         "invalid event tuple" | 
 | 2516 |         ); | 
 | 2517 |     return NULL; | 
 | 2518 | } | 
 | 2519 |  | 
 | 2520 | static PyMethodDef xmlparser_methods[] = { | 
 | 2521 |     {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS}, | 
 | 2522 |     {"close", (PyCFunction) xmlparser_close, METH_VARARGS}, | 
 | 2523 |     {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS}, | 
 | 2524 |     {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS}, | 
 | 2525 |     {NULL, NULL} | 
 | 2526 | }; | 
 | 2527 |  | 
 | 2528 | static PyObject*   | 
 | 2529 | xmlparser_getattr(XMLParserObject* self, char* name) | 
 | 2530 | { | 
 | 2531 |     PyObject* res; | 
 | 2532 |  | 
 | 2533 |     res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name); | 
 | 2534 |     if (res) | 
 | 2535 | 	return res; | 
 | 2536 |  | 
 | 2537 |     PyErr_Clear(); | 
 | 2538 |  | 
 | 2539 |     if (strcmp(name, "entity") == 0) | 
 | 2540 | 	res = self->entity; | 
 | 2541 |     else if (strcmp(name, "target") == 0) | 
 | 2542 | 	res = self->target; | 
 | 2543 |     else if (strcmp(name, "version") == 0) { | 
 | 2544 |         char buffer[100]; | 
 | 2545 |         sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION, | 
 | 2546 |                 XML_MINOR_VERSION, XML_MICRO_VERSION); | 
 | 2547 |         return PyString_FromString(buffer); | 
 | 2548 |     } else { | 
 | 2549 |         PyErr_SetString(PyExc_AttributeError, name); | 
 | 2550 |         return NULL; | 
 | 2551 |     } | 
 | 2552 |  | 
 | 2553 |     Py_INCREF(res); | 
 | 2554 |     return res; | 
 | 2555 | } | 
 | 2556 |  | 
| Neal Norwitz | 227b533 | 2006-03-22 09:28:35 +0000 | [diff] [blame] | 2557 | static PyTypeObject XMLParser_Type = { | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2558 |     PyObject_HEAD_INIT(NULL) | 
 | 2559 |     0, "XMLParser", sizeof(XMLParserObject), 0, | 
 | 2560 |     /* methods */ | 
 | 2561 |     (destructor)xmlparser_dealloc, /* tp_dealloc */ | 
 | 2562 |     0, /* tp_print */ | 
 | 2563 |     (getattrfunc)xmlparser_getattr, /* tp_getattr */ | 
 | 2564 | }; | 
 | 2565 |  | 
 | 2566 | #endif | 
 | 2567 |  | 
 | 2568 | /* ==================================================================== */ | 
 | 2569 | /* python module interface */ | 
 | 2570 |  | 
 | 2571 | static PyMethodDef _functions[] = { | 
 | 2572 |     {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS}, | 
 | 2573 |     {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, | 
 | 2574 |     {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS}, | 
 | 2575 | #if defined(USE_EXPAT) | 
 | 2576 |     {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, | 
 | 2577 |     {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, | 
 | 2578 | #endif | 
 | 2579 |     {NULL, NULL} | 
 | 2580 | }; | 
 | 2581 |  | 
| Neal Norwitz | f6657e6 | 2006-12-28 04:47:50 +0000 | [diff] [blame] | 2582 | PyMODINIT_FUNC | 
| Fredrik Lundh | 6d52b55 | 2005-12-16 22:06:43 +0000 | [diff] [blame] | 2583 | init_elementtree(void) | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2584 | { | 
 | 2585 |     PyObject* m; | 
 | 2586 |     PyObject* g; | 
 | 2587 |     char* bootstrap; | 
 | 2588 | #if defined(USE_PYEXPAT_CAPI) | 
 | 2589 |     struct PyExpat_CAPI* capi; | 
 | 2590 | #endif | 
 | 2591 |  | 
 | 2592 |     /* Patch object type */ | 
 | 2593 |     Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type; | 
 | 2594 | #if defined(USE_EXPAT) | 
 | 2595 |     XMLParser_Type.ob_type = &PyType_Type; | 
 | 2596 | #endif | 
 | 2597 |  | 
| Fredrik Lundh | 6d52b55 | 2005-12-16 22:06:43 +0000 | [diff] [blame] | 2598 |     m = Py_InitModule("_elementtree", _functions); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2599 |     if (!m) | 
 | 2600 |         return; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2601 |  | 
 | 2602 |     /* python glue code */ | 
 | 2603 |  | 
 | 2604 |     g = PyDict_New(); | 
| Fredrik Lundh | 44ed4db | 2006-03-12 21:06:35 +0000 | [diff] [blame] | 2605 |     if (!g) | 
 | 2606 |         return; | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2607 |  | 
 | 2608 |     PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins()); | 
 | 2609 |  | 
 | 2610 |     bootstrap = ( | 
 | 2611 |  | 
 | 2612 | #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000) | 
 | 2613 |         "from __future__ import generators\n" /* enable yield under 2.2 */ | 
 | 2614 | #endif | 
 | 2615 |  | 
 | 2616 |         "from copy import copy, deepcopy\n" | 
 | 2617 |  | 
 | 2618 |         "try:\n" | 
| Fredrik Lundh | 6d52b55 | 2005-12-16 22:06:43 +0000 | [diff] [blame] | 2619 |         "  from xml.etree import ElementTree\n" | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2620 |         "except ImportError:\n" | 
 | 2621 |         "  import ElementTree\n" | 
 | 2622 |         "ET = ElementTree\n" | 
 | 2623 |         "del ElementTree\n" | 
 | 2624 |  | 
| Fredrik Lundh | 6d52b55 | 2005-12-16 22:06:43 +0000 | [diff] [blame] | 2625 |         "import _elementtree as cElementTree\n" | 
| Fredrik Lundh | 8c8836b | 2005-12-16 22:06:06 +0000 | [diff] [blame] | 2626 |  | 
 | 2627 |         "try:\n" /* check if copy works as is */ | 
 | 2628 |         "  copy(cElementTree.Element('x'))\n" | 
 | 2629 |         "except:\n" | 
 | 2630 |         "  def copyelement(elem):\n" | 
 | 2631 |         "    return elem\n" | 
 | 2632 |  | 
 | 2633 |         "def Comment(text=None):\n" /* public */ | 
 | 2634 |         "  element = cElementTree.Element(ET.Comment)\n" | 
 | 2635 |         "  element.text = text\n" | 
 | 2636 |         "  return element\n" | 
 | 2637 |         "cElementTree.Comment = Comment\n" | 
 | 2638 |  | 
 | 2639 |         "class ElementTree(ET.ElementTree):\n" /* public */ | 
 | 2640 |         "  def parse(self, source, parser=None):\n" | 
 | 2641 |         "    if not hasattr(source, 'read'):\n" | 
 | 2642 |         "      source = open(source, 'rb')\n" | 
 | 2643 |         "    if parser is not None:\n" | 
 | 2644 |         "      while 1:\n" | 
 | 2645 |         "        data = source.read(65536)\n" | 
 | 2646 |         "        if not data:\n" | 
 | 2647 |         "          break\n" | 
 | 2648 |         "        parser.feed(data)\n" | 
 | 2649 |         "      self._root = parser.close()\n" | 
 | 2650 |         "    else:\n"  | 
 | 2651 |         "      parser = cElementTree.XMLParser()\n" | 
 | 2652 |         "      self._root = parser._parse(source)\n" | 
 | 2653 |         "    return self._root\n" | 
 | 2654 |         "cElementTree.ElementTree = ElementTree\n" | 
 | 2655 |  | 
 | 2656 |         "def getiterator(node, tag=None):\n" /* helper */ | 
 | 2657 |         "  if tag == '*':\n" | 
 | 2658 |         "    tag = None\n" | 
 | 2659 | #if (PY_VERSION_HEX < 0x02020000) | 
 | 2660 |         "  nodes = []\n" /* 2.1 doesn't have yield */ | 
 | 2661 |         "  if tag is None or node.tag == tag:\n" | 
 | 2662 |         "    nodes.append(node)\n" | 
 | 2663 |         "  for node in node:\n" | 
 | 2664 |         "    nodes.extend(getiterator(node, tag))\n" | 
 | 2665 |         "  return nodes\n" | 
 | 2666 | #else | 
 | 2667 |         "  if tag is None or node.tag == tag:\n" | 
 | 2668 |         "    yield node\n" | 
 | 2669 |         "  for node in node:\n" | 
 | 2670 |         "    for node in getiterator(node, tag):\n" | 
 | 2671 |         "      yield node\n" | 
 | 2672 | #endif | 
 | 2673 |  | 
 | 2674 |         "def parse(source, parser=None):\n" /* public */ | 
 | 2675 |         "  tree = ElementTree()\n" | 
 | 2676 |         "  tree.parse(source, parser)\n" | 
 | 2677 |         "  return tree\n" | 
 | 2678 |         "cElementTree.parse = parse\n" | 
 | 2679 |  | 
 | 2680 | #if (PY_VERSION_HEX < 0x02020000) | 
 | 2681 |         "if hasattr(ET, 'iterparse'):\n" | 
 | 2682 |         "    cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */ | 
 | 2683 | #else | 
 | 2684 |         "class iterparse(object):\n" | 
 | 2685 |         " root = None\n" | 
 | 2686 |         " def __init__(self, file, events=None):\n" | 
 | 2687 |         "  if not hasattr(file, 'read'):\n" | 
 | 2688 |         "    file = open(file, 'rb')\n" | 
 | 2689 |         "  self._file = file\n" | 
 | 2690 |         "  self._events = events\n" | 
 | 2691 |         " def __iter__(self):\n"  | 
 | 2692 |         "  events = []\n" | 
 | 2693 |         "  b = cElementTree.TreeBuilder()\n" | 
 | 2694 |         "  p = cElementTree.XMLParser(b)\n" | 
 | 2695 |         "  p._setevents(events, self._events)\n" | 
 | 2696 |         "  while 1:\n" | 
 | 2697 |         "    data = self._file.read(16384)\n" | 
 | 2698 |         "    if not data:\n" | 
 | 2699 |         "      break\n" | 
 | 2700 |         "    p.feed(data)\n" | 
 | 2701 |         "    for event in events:\n" | 
 | 2702 |         "      yield event\n" | 
 | 2703 |         "    del events[:]\n" | 
 | 2704 |         "  root = p.close()\n" | 
 | 2705 |         "  for event in events:\n" | 
 | 2706 |         "    yield event\n" | 
 | 2707 |         "  self.root = root\n" | 
 | 2708 |         "cElementTree.iterparse = iterparse\n" | 
 | 2709 | #endif | 
 | 2710 |  | 
 | 2711 |         "def PI(target, text=None):\n" /* public */ | 
 | 2712 |         "  element = cElementTree.Element(ET.ProcessingInstruction)\n" | 
 | 2713 |         "  element.text = target\n" | 
 | 2714 |         "  if text:\n" | 
 | 2715 |         "    element.text = element.text + ' ' + text\n" | 
 | 2716 |         "  return element\n" | 
 | 2717 |  | 
 | 2718 |         "  elem = cElementTree.Element(ET.PI)\n" | 
 | 2719 |         "  elem.text = text\n" | 
 | 2720 |         "  return elem\n" | 
 | 2721 |         "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n" | 
 | 2722 |  | 
 | 2723 |         "def XML(text):\n" /* public */ | 
 | 2724 |         "  parser = cElementTree.XMLParser()\n" | 
 | 2725 |         "  parser.feed(text)\n" | 
 | 2726 |         "  return parser.close()\n" | 
 | 2727 |         "cElementTree.XML = cElementTree.fromstring = XML\n" | 
 | 2728 |  | 
 | 2729 |         "def XMLID(text):\n" /* public */ | 
 | 2730 |         "  tree = XML(text)\n" | 
 | 2731 |         "  ids = {}\n" | 
 | 2732 |         "  for elem in tree.getiterator():\n" | 
 | 2733 |         "    id = elem.get('id')\n" | 
 | 2734 |         "    if id:\n" | 
 | 2735 |         "      ids[id] = elem\n" | 
 | 2736 |         "  return tree, ids\n" | 
 | 2737 |         "cElementTree.XMLID = XMLID\n" | 
 | 2738 |  | 
 | 2739 |         "cElementTree.dump = ET.dump\n" | 
 | 2740 |         "cElementTree.ElementPath = ElementPath = ET.ElementPath\n" | 
 | 2741 |         "cElementTree.iselement = ET.iselement\n" | 
 | 2742 |         "cElementTree.QName = ET.QName\n" | 
 | 2743 |         "cElementTree.tostring = ET.tostring\n" | 
 | 2744 |         "cElementTree.VERSION = '" VERSION "'\n" | 
 | 2745 |         "cElementTree.__version__ = '" VERSION "'\n" | 
 | 2746 |         "cElementTree.XMLParserError = SyntaxError\n" | 
 | 2747 |  | 
 | 2748 |        ); | 
 | 2749 |  | 
 | 2750 |     PyRun_String(bootstrap, Py_file_input, g, NULL); | 
 | 2751 |  | 
 | 2752 |     elementpath_obj = PyDict_GetItemString(g, "ElementPath"); | 
 | 2753 |  | 
 | 2754 |     elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement"); | 
 | 2755 |     if (elementtree_copyelement_obj) { | 
 | 2756 |         /* reduce hack needed; enable reduce method */ | 
 | 2757 |         PyMethodDef* mp; | 
 | 2758 |         for (mp = element_methods; mp->ml_name; mp++) | 
 | 2759 |             if (mp->ml_meth == (PyCFunction) element_reduce) { | 
 | 2760 |                 mp->ml_name = "__reduce__"; | 
 | 2761 |                 break; | 
 | 2762 |             } | 
 | 2763 |     } else | 
 | 2764 |         PyErr_Clear(); | 
 | 2765 |     elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy"); | 
 | 2766 |     elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator"); | 
 | 2767 |  | 
 | 2768 | #if defined(USE_PYEXPAT_CAPI) | 
 | 2769 |     /* link against pyexpat, if possible */ | 
 | 2770 |     capi = PyCObject_Import("pyexpat", "expat_CAPI"); | 
 | 2771 |     if (capi && | 
 | 2772 |         strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 && | 
 | 2773 |         capi->size <= sizeof(*expat_capi) && | 
 | 2774 |         capi->MAJOR_VERSION == XML_MAJOR_VERSION && | 
 | 2775 |         capi->MINOR_VERSION == XML_MINOR_VERSION && | 
 | 2776 |         capi->MICRO_VERSION == XML_MICRO_VERSION) | 
 | 2777 |         expat_capi = capi; | 
 | 2778 |     else | 
 | 2779 |         expat_capi = NULL; | 
 | 2780 | #endif | 
 | 2781 |  | 
 | 2782 | } |