blob: 645e999a0be6ca3c0bc4d59aa96411110a26b189 [file] [log] [blame]
Eli Bendersky84fae782013-03-09 07:12:48 -08001"""Lightweight XML support for Python.
2
3 XML is an inherently hierarchical data format, and the most natural way to
4 represent it is with a tree. This module has two classes for this purpose:
5
6 1. ElementTree represents the whole XML document as a tree and
7
8 2. Element represents a single node in this tree.
9
10 Interactions with the whole document (reading and writing to/from files) are
11 usually done on the ElementTree level. Interactions with a single XML element
12 and its sub-elements are done on the Element level.
13
14 Element is a flexible container object designed to store hierarchical data
15 structures in memory. It can be described as a cross between a list and a
16 dictionary. Each Element has a number of properties associated with it:
17
18 'tag' - a string containing the element's name.
19
20 'attributes' - a Python dictionary storing the element's attributes.
21
22 'text' - a string containing the element's text content.
23
24 'tail' - an optional string containing text after the element's end tag.
25
26 And a number of child elements stored in a Python sequence.
27
28 To create an element instance, use the Element constructor,
29 or the SubElement factory function.
30
31 You can also use the ElementTree class to wrap an element structure
32 and convert it to and from XML.
33
34"""
35
Eli Benderskybf05df22013-04-20 05:44:01 -070036#---------------------------------------------------------------------
37# Licensed to PSF under a Contributor Agreement.
38# See http://www.python.org/psf/license for licensing details.
Armin Rigo9ed73062005-12-14 18:10:45 +000039#
40# ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +000041# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
Armin Rigo9ed73062005-12-14 18:10:45 +000042#
43# fredrik@pythonware.com
44# http://www.pythonware.com
Armin Rigo9ed73062005-12-14 18:10:45 +000045# --------------------------------------------------------------------
46# The ElementTree toolkit is
47#
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048# Copyright (c) 1999-2008 by Fredrik Lundh
Armin Rigo9ed73062005-12-14 18:10:45 +000049#
50# By obtaining, using, and/or copying this software and/or its
51# associated documentation, you agree that you have read, understood,
52# and will comply with the following terms and conditions:
53#
54# Permission to use, copy, modify, and distribute this software and
55# its associated documentation for any purpose and without fee is
56# hereby granted, provided that the above copyright notice appears in
57# all copies, and that both that copyright notice and this permission
58# notice appear in supporting documentation, and that the name of
59# Secret Labs AB or the author not be used in advertising or publicity
60# pertaining to distribution of the software without specific, written
61# prior permission.
62#
63# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
64# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
65# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
66# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
67# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
68# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
69# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
70# OF THIS SOFTWARE.
71# --------------------------------------------------------------------
72
73__all__ = [
74 # public symbols
75 "Comment",
76 "dump",
77 "Element", "ElementTree",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000078 "fromstring", "fromstringlist",
Armin Rigo9ed73062005-12-14 18:10:45 +000079 "iselement", "iterparse",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000080 "parse", "ParseError",
Armin Rigo9ed73062005-12-14 18:10:45 +000081 "PI", "ProcessingInstruction",
82 "QName",
83 "SubElement",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000084 "tostring", "tostringlist",
Armin Rigo9ed73062005-12-14 18:10:45 +000085 "TreeBuilder",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000086 "VERSION",
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010087 "XML", "XMLID",
Martin Panterdcfebb32016-04-01 06:55:55 +000088 "XMLParser", "XMLPullParser",
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010089 "register_namespace",
Stefan Behnele1d5dd62019-05-01 22:34:13 +020090 "canonicalize", "C14NWriterTarget",
Armin Rigo9ed73062005-12-14 18:10:45 +000091 ]
92
Florent Xiclunaf15351d2010-03-13 23:24:31 +000093VERSION = "1.3.0"
94
Florent Xiclunaf15351d2010-03-13 23:24:31 +000095import sys
96import re
97import warnings
Eli Bendersky00f402b2012-07-15 06:02:22 +030098import io
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +020099import collections
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300100import collections.abc
Eli Bendersky00f402b2012-07-15 06:02:22 +0300101import contextlib
Armin Rigo9ed73062005-12-14 18:10:45 +0000102
Eli Bendersky27cbb192012-06-15 09:03:19 +0300103from . import ElementPath
Armin Rigo9ed73062005-12-14 18:10:45 +0000104
Armin Rigo9ed73062005-12-14 18:10:45 +0000105
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106class ParseError(SyntaxError):
Eli Bendersky84fae782013-03-09 07:12:48 -0800107 """An error when parsing an XML document.
108
109 In addition to its exception value, a ParseError contains
110 two extra attributes:
111 'code' - the specific exception code
112 'position' - the line and column of the error
113
114 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000115 pass
116
117# --------------------------------------------------------------------
118
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000119
120def iselement(element):
Eli Bendersky84fae782013-03-09 07:12:48 -0800121 """Return True if *element* appears to be an Element."""
Florent Xiclunaa72a98f2012-02-13 11:03:30 +0100122 return hasattr(element, 'tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000123
Armin Rigo9ed73062005-12-14 18:10:45 +0000124
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000125class Element:
Eli Bendersky84fae782013-03-09 07:12:48 -0800126 """An XML element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000127
Eli Bendersky84fae782013-03-09 07:12:48 -0800128 This class is the reference implementation of the Element interface.
129
130 An element's length is its number of subelements. That means if you
Serhiy Storchaka56a6d852014-12-01 18:28:43 +0200131 want to check if an element is truly empty, you should check BOTH
Eli Bendersky84fae782013-03-09 07:12:48 -0800132 its length AND its text attribute.
133
134 The element tag, attribute names, and attribute values can be either
135 bytes or strings.
136
137 *tag* is the element name. *attrib* is an optional dictionary containing
138 element attributes. *extra* are additional element attributes given as
139 keyword arguments.
140
141 Example form:
142 <tag attrib>text<child/>...</tag>tail
143
144 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000145
146 tag = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800147 """The element's name."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000148
149 attrib = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800150 """Dictionary of the element's attributes."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000151
152 text = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800153 """
154 Text before first subelement. This is either a string or the value None.
155 Note that if there is no text, this attribute may be either
156 None or the empty string, depending on the parser.
Armin Rigo9ed73062005-12-14 18:10:45 +0000157
Eli Bendersky84fae782013-03-09 07:12:48 -0800158 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000159
Eli Bendersky84fae782013-03-09 07:12:48 -0800160 tail = None
161 """
162 Text after this element's end tag, but before the next sibling element's
163 start tag. This is either a string or the value None. Note that if there
164 was no text, this attribute may be either None or an empty string,
165 depending on the parser.
Armin Rigo9ed73062005-12-14 18:10:45 +0000166
Eli Bendersky84fae782013-03-09 07:12:48 -0800167 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000168
169 def __init__(self, tag, attrib={}, **extra):
Eli Bendersky737b1732012-05-29 06:02:56 +0300170 if not isinstance(attrib, dict):
171 raise TypeError("attrib must be dict, not %s" % (
172 attrib.__class__.__name__,))
Armin Rigo9ed73062005-12-14 18:10:45 +0000173 self.tag = tag
Serhiy Storchakada084702019-03-27 08:02:28 +0200174 self.attrib = {**attrib, **extra}
Armin Rigo9ed73062005-12-14 18:10:45 +0000175 self._children = []
176
177 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300178 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
Armin Rigo9ed73062005-12-14 18:10:45 +0000179
Armin Rigo9ed73062005-12-14 18:10:45 +0000180 def makeelement(self, tag, attrib):
Eli Bendersky84fae782013-03-09 07:12:48 -0800181 """Create a new element with the same type.
182
183 *tag* is a string containing the element name.
184 *attrib* is a dictionary containing the element attributes.
185
186 Do not call this method, use the SubElement factory function instead.
187
188 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000189 return self.__class__(tag, attrib)
Armin Rigo9ed73062005-12-14 18:10:45 +0000190
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000191 def copy(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800192 """Return copy of current element.
193
194 This creates a shallow copy. Subelements will be shared with the
195 original tree.
196
197 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000198 elem = self.makeelement(self.tag, self.attrib)
199 elem.text = self.text
200 elem.tail = self.tail
201 elem[:] = self
202 return elem
203
Armin Rigo9ed73062005-12-14 18:10:45 +0000204 def __len__(self):
205 return len(self._children)
206
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000207 def __bool__(self):
208 warnings.warn(
209 "The behavior of this method will change in future versions. "
210 "Use specific 'len(elem)' or 'elem is not None' test instead.",
211 FutureWarning, stacklevel=2
212 )
213 return len(self._children) != 0 # emulate old behaviour, for now
214
Armin Rigo9ed73062005-12-14 18:10:45 +0000215 def __getitem__(self, index):
216 return self._children[index]
217
Armin Rigo9ed73062005-12-14 18:10:45 +0000218 def __setitem__(self, index, element):
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300219 if isinstance(index, slice):
220 for elt in element:
221 self._assert_is_element(elt)
222 else:
223 self._assert_is_element(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000224 self._children[index] = element
225
Armin Rigo9ed73062005-12-14 18:10:45 +0000226 def __delitem__(self, index):
227 del self._children[index]
228
Eli Bendersky84fae782013-03-09 07:12:48 -0800229 def append(self, subelement):
230 """Add *subelement* to the end of this element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000231
Eli Bendersky84fae782013-03-09 07:12:48 -0800232 The new element will appear in document order after the last existing
233 subelement (or directly after the text, if it's the first subelement),
234 but before the end tag for this element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000235
Eli Bendersky84fae782013-03-09 07:12:48 -0800236 """
237 self._assert_is_element(subelement)
238 self._children.append(subelement)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000239
240 def extend(self, elements):
Eli Bendersky84fae782013-03-09 07:12:48 -0800241 """Append subelements from a sequence.
242
243 *elements* is a sequence with zero or more elements.
244
245 """
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200246 for element in elements:
247 self._assert_is_element(element)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000248 self._children.extend(elements)
249
Eli Bendersky84fae782013-03-09 07:12:48 -0800250 def insert(self, index, subelement):
251 """Insert *subelement* at position *index*."""
252 self._assert_is_element(subelement)
253 self._children.insert(index, subelement)
Armin Rigo9ed73062005-12-14 18:10:45 +0000254
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200255 def _assert_is_element(self, e):
Antoine Pitrouee329312012-10-04 19:53:29 +0200256 # Need to refer to the actual Python implementation, not the
257 # shadowing C implementation.
Eli Bendersky46955b22013-05-19 09:20:50 -0700258 if not isinstance(e, _Element_Py):
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200259 raise TypeError('expected an Element, not %s' % type(e).__name__)
260
Eli Bendersky84fae782013-03-09 07:12:48 -0800261 def remove(self, subelement):
262 """Remove matching subelement.
263
264 Unlike the find methods, this method compares elements based on
265 identity, NOT ON tag value or contents. To remove subelements by
266 other means, the easiest way is to use a list comprehension to
267 select what elements to keep, and then use slice assignment to update
268 the parent element.
269
270 ValueError is raised if a matching element could not be found.
271
272 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000273 # assert iselement(element)
Eli Bendersky84fae782013-03-09 07:12:48 -0800274 self._children.remove(subelement)
Armin Rigo9ed73062005-12-14 18:10:45 +0000275
276 def getchildren(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800277 """(Deprecated) Return all subelements.
278
279 Elements are returned in document order.
280
281 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000282 warnings.warn(
283 "This method will be removed in future versions. "
284 "Use 'list(elem)' or iteration over elem instead.",
285 DeprecationWarning, stacklevel=2
286 )
Armin Rigo9ed73062005-12-14 18:10:45 +0000287 return self._children
288
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000289 def find(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800290 """Find first matching element by tag name or path.
291
292 *path* is a string having either an element tag or an XPath,
293 *namespaces* is an optional mapping from namespace prefix to full name.
294
295 Return the first matching element, or None if no element was found.
296
297 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000298 return ElementPath.find(self, path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000299
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000300 def findtext(self, path, default=None, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800301 """Find text for first matching element by tag name or path.
302
303 *path* is a string having either an element tag or an XPath,
304 *default* is the value to return if the element was not found,
305 *namespaces* is an optional mapping from namespace prefix to full name.
306
307 Return text content of first matching element, or default value if
308 none was found. Note that if an element is found having no text
309 content, the empty string is returned.
310
311 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000312 return ElementPath.findtext(self, path, default, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000313
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000314 def findall(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800315 """Find all matching subelements by tag name or path.
316
317 *path* is a string having either an element tag or an XPath,
318 *namespaces* is an optional mapping from namespace prefix to full name.
319
320 Returns list containing all matching elements in document order.
321
322 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000323 return ElementPath.findall(self, path, namespaces)
324
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000325 def iterfind(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800326 """Find all matching subelements by tag name or path.
327
328 *path* is a string having either an element tag or an XPath,
329 *namespaces* is an optional mapping from namespace prefix to full name.
330
331 Return an iterable yielding all matching elements in document order.
332
333 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000334 return ElementPath.iterfind(self, path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000335
Armin Rigo9ed73062005-12-14 18:10:45 +0000336 def clear(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800337 """Reset element.
338
339 This function removes all subelements, clears all attributes, and sets
340 the text and tail attributes to None.
341
342 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000343 self.attrib.clear()
344 self._children = []
345 self.text = self.tail = None
346
Armin Rigo9ed73062005-12-14 18:10:45 +0000347 def get(self, key, default=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800348 """Get element attribute.
349
350 Equivalent to attrib.get, but some implementations may handle this a
351 bit more efficiently. *key* is what attribute to look for, and
352 *default* is what to return if the attribute was not found.
353
354 Returns a string containing the attribute value, or the default if
355 attribute was not found.
356
357 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000358 return self.attrib.get(key, default)
359
Armin Rigo9ed73062005-12-14 18:10:45 +0000360 def set(self, key, value):
Eli Bendersky84fae782013-03-09 07:12:48 -0800361 """Set element attribute.
362
363 Equivalent to attrib[key] = value, but some implementations may handle
364 this a bit more efficiently. *key* is what attribute to set, and
365 *value* is the attribute value to set it to.
366
367 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000368 self.attrib[key] = value
369
Armin Rigo9ed73062005-12-14 18:10:45 +0000370 def keys(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800371 """Get list of attribute names.
372
373 Names are returned in an arbitrary order, just like an ordinary
374 Python dict. Equivalent to attrib.keys()
375
376 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000377 return self.attrib.keys()
378
Armin Rigo9ed73062005-12-14 18:10:45 +0000379 def items(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800380 """Get element attributes as a sequence.
381
382 The attributes are returned in arbitrary order. Equivalent to
383 attrib.items().
384
385 Return a list of (name, value) tuples.
386
387 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000388 return self.attrib.items()
389
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000390 def iter(self, tag=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800391 """Create tree iterator.
392
393 The iterator loops over the element and all subelements in document
394 order, returning all elements with a matching tag.
395
396 If the tree structure is modified during iteration, new or removed
397 elements may or may not be included. To get a stable set, use the
398 list() function on the iterator, and loop over the resulting list.
399
400 *tag* is what tags to look for (default is to return all elements)
401
402 Return an iterator containing all the matching elements.
403
404 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000405 if tag == "*":
406 tag = None
407 if tag is None or self.tag == tag:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000408 yield self
409 for e in self._children:
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700410 yield from e.iter(tag)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000411
412 # compatibility
413 def getiterator(self, tag=None):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000414 warnings.warn(
415 "This method will be removed in future versions. "
416 "Use 'elem.iter()' or 'list(elem.iter())' instead.",
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +0300417 DeprecationWarning, stacklevel=2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000418 )
419 return list(self.iter(tag))
420
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000421 def itertext(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800422 """Create text iterator.
423
424 The iterator loops over the element and all subelements in document
425 order, returning all inner text.
426
427 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000428 tag = self.tag
429 if not isinstance(tag, str) and tag is not None:
430 return
Serhiy Storchaka66c08d92015-12-21 11:09:48 +0200431 t = self.text
432 if t:
433 yield t
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000434 for e in self:
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700435 yield from e.itertext()
Serhiy Storchaka66c08d92015-12-21 11:09:48 +0200436 t = e.tail
437 if t:
438 yield t
Armin Rigo9ed73062005-12-14 18:10:45 +0000439
Armin Rigo9ed73062005-12-14 18:10:45 +0000440
441def SubElement(parent, tag, attrib={}, **extra):
Eli Bendersky84fae782013-03-09 07:12:48 -0800442 """Subelement factory which creates an element instance, and appends it
443 to an existing parent.
444
445 The element tag, attribute names, and attribute values can be either
446 bytes or Unicode strings.
447
448 *parent* is the parent element, *tag* is the subelements name, *attrib* is
449 an optional directory containing element attributes, *extra* are
450 additional attributes given as keyword arguments.
451
452 """
Serhiy Storchakada084702019-03-27 08:02:28 +0200453 attrib = {**attrib, **extra}
Armin Rigo9ed73062005-12-14 18:10:45 +0000454 element = parent.makeelement(tag, attrib)
455 parent.append(element)
456 return element
457
Armin Rigo9ed73062005-12-14 18:10:45 +0000458
459def Comment(text=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800460 """Comment element factory.
461
462 This function creates a special element which the standard serializer
463 serializes as an XML comment.
464
465 *text* is a string containing the comment string.
466
467 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000468 element = Element(Comment)
469 element.text = text
470 return element
471
Armin Rigo9ed73062005-12-14 18:10:45 +0000472
473def ProcessingInstruction(target, text=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800474 """Processing Instruction element factory.
475
476 This function creates a special element which the standard serializer
477 serializes as an XML comment.
478
479 *target* is a string containing the processing instruction, *text* is a
480 string containing the processing instruction contents, if any.
481
482 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000483 element = Element(ProcessingInstruction)
484 element.text = target
485 if text:
486 element.text = element.text + " " + text
487 return element
488
489PI = ProcessingInstruction
490
Armin Rigo9ed73062005-12-14 18:10:45 +0000491
492class QName:
Eli Bendersky84fae782013-03-09 07:12:48 -0800493 """Qualified name wrapper.
494
495 This class can be used to wrap a QName attribute value in order to get
496 proper namespace handing on output.
497
498 *text_or_uri* is a string containing the QName value either in the form
499 {uri}local, or if the tag argument is given, the URI part of a QName.
500
501 *tag* is an optional argument which if given, will make the first
502 argument (text_or_uri) be interpreted as a URI, and this argument (tag)
503 be interpreted as a local name.
504
505 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000506 def __init__(self, text_or_uri, tag=None):
507 if tag:
508 text_or_uri = "{%s}%s" % (text_or_uri, tag)
509 self.text = text_or_uri
510 def __str__(self):
511 return self.text
Georg Brandlb56c0e22010-12-09 18:10:27 +0000512 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300513 return '<%s %r>' % (self.__class__.__name__, self.text)
Armin Rigo9ed73062005-12-14 18:10:45 +0000514 def __hash__(self):
515 return hash(self.text)
Mark Dickinsona56c4672009-01-27 18:17:45 +0000516 def __le__(self, other):
Armin Rigo9ed73062005-12-14 18:10:45 +0000517 if isinstance(other, QName):
Mark Dickinsona56c4672009-01-27 18:17:45 +0000518 return self.text <= other.text
519 return self.text <= other
520 def __lt__(self, other):
521 if isinstance(other, QName):
522 return self.text < other.text
523 return self.text < other
524 def __ge__(self, other):
525 if isinstance(other, QName):
526 return self.text >= other.text
527 return self.text >= other
528 def __gt__(self, other):
529 if isinstance(other, QName):
530 return self.text > other.text
531 return self.text > other
532 def __eq__(self, other):
533 if isinstance(other, QName):
534 return self.text == other.text
535 return self.text == other
Armin Rigo9ed73062005-12-14 18:10:45 +0000536
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000537# --------------------------------------------------------------------
538
Armin Rigo9ed73062005-12-14 18:10:45 +0000539
540class ElementTree:
Eli Bendersky84fae782013-03-09 07:12:48 -0800541 """An XML element hierarchy.
Armin Rigo9ed73062005-12-14 18:10:45 +0000542
Eli Bendersky84fae782013-03-09 07:12:48 -0800543 This class also provides support for serialization to and from
544 standard XML.
545
546 *element* is an optional root element node,
547 *file* is an optional file handle or file name of an XML file whose
548 contents will be used to initialize the tree with.
549
550 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000551 def __init__(self, element=None, file=None):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000552 # assert element is None or iselement(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000553 self._root = element # first node
554 if file:
555 self.parse(file)
556
Armin Rigo9ed73062005-12-14 18:10:45 +0000557 def getroot(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800558 """Return root element of this tree."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000559 return self._root
560
Armin Rigo9ed73062005-12-14 18:10:45 +0000561 def _setroot(self, element):
Eli Bendersky84fae782013-03-09 07:12:48 -0800562 """Replace root element of this tree.
563
564 This will discard the current contents of the tree and replace it
565 with the given element. Use with care!
566
567 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000568 # assert iselement(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000569 self._root = element
570
Armin Rigo9ed73062005-12-14 18:10:45 +0000571 def parse(self, source, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800572 """Load external XML document into element tree.
573
574 *source* is a file name or file object, *parser* is an optional parser
575 instance that defaults to XMLParser.
576
577 ParseError is raised if the parser fails to parse the document.
578
579 Returns the root element of the given source document.
580
581 """
Antoine Pitroue033e062010-10-29 10:38:18 +0000582 close_source = False
Armin Rigo9ed73062005-12-14 18:10:45 +0000583 if not hasattr(source, "read"):
584 source = open(source, "rb")
Antoine Pitroue033e062010-10-29 10:38:18 +0000585 close_source = True
586 try:
Eli Benderskya3699232013-05-19 18:47:23 -0700587 if parser is None:
588 # If no parser was specified, create a default XMLParser
589 parser = XMLParser()
590 if hasattr(parser, '_parse_whole'):
591 # The default XMLParser, when it comes from an accelerator,
592 # can define an internal _parse_whole API for efficiency.
593 # It can be used to parse the whole source without feeding
594 # it with chunks.
595 self._root = parser._parse_whole(source)
596 return self._root
597 while True:
Antoine Pitroue033e062010-10-29 10:38:18 +0000598 data = source.read(65536)
599 if not data:
600 break
601 parser.feed(data)
602 self._root = parser.close()
603 return self._root
604 finally:
605 if close_source:
606 source.close()
Armin Rigo9ed73062005-12-14 18:10:45 +0000607
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000608 def iter(self, tag=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800609 """Create and return tree iterator for the root element.
610
611 The iterator loops over all elements in this tree, in document order.
612
613 *tag* is a string with the tag name to iterate over
614 (default is to return all elements).
615
616 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000617 # assert self._root is not None
618 return self._root.iter(tag)
619
620 # compatibility
Armin Rigo9ed73062005-12-14 18:10:45 +0000621 def getiterator(self, tag=None):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000622 warnings.warn(
623 "This method will be removed in future versions. "
624 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +0300625 DeprecationWarning, stacklevel=2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000626 )
627 return list(self.iter(tag))
Armin Rigo9ed73062005-12-14 18:10:45 +0000628
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000629 def find(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800630 """Find first matching element by tag name or path.
631
632 Same as getroot().find(path), which is Element.find()
633
634 *path* is a string having either an element tag or an XPath,
635 *namespaces* is an optional mapping from namespace prefix to full name.
636
637 Return the first matching element, or None if no element was found.
638
639 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000640 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000641 if path[:1] == "/":
642 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000643 warnings.warn(
644 "This search is broken in 1.3 and earlier, and will be "
645 "fixed in a future version. If you rely on the current "
646 "behaviour, change it to %r" % path,
647 FutureWarning, stacklevel=2
648 )
649 return self._root.find(path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000650
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000651 def findtext(self, path, default=None, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800652 """Find first matching element by tag name or path.
653
654 Same as getroot().findtext(path), which is Element.findtext()
655
656 *path* is a string having either an element tag or an XPath,
657 *namespaces* is an optional mapping from namespace prefix to full name.
658
659 Return the first matching element, or None if no element was found.
660
661 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000662 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000663 if path[:1] == "/":
664 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000665 warnings.warn(
666 "This search is broken in 1.3 and earlier, and will be "
667 "fixed in a future version. If you rely on the current "
668 "behaviour, change it to %r" % path,
669 FutureWarning, stacklevel=2
670 )
671 return self._root.findtext(path, default, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000672
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000673 def findall(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800674 """Find all matching subelements by tag name or path.
675
676 Same as getroot().findall(path), which is Element.findall().
677
678 *path* is a string having either an element tag or an XPath,
679 *namespaces* is an optional mapping from namespace prefix to full name.
680
681 Return list containing all matching elements in document order.
682
683 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000684 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000685 if path[:1] == "/":
686 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000687 warnings.warn(
688 "This search is broken in 1.3 and earlier, and will be "
689 "fixed in a future version. If you rely on the current "
690 "behaviour, change it to %r" % path,
691 FutureWarning, stacklevel=2
692 )
693 return self._root.findall(path, namespaces)
694
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000695 def iterfind(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800696 """Find all matching subelements by tag name or path.
697
698 Same as getroot().iterfind(path), which is element.iterfind()
699
700 *path* is a string having either an element tag or an XPath,
701 *namespaces* is an optional mapping from namespace prefix to full name.
702
703 Return an iterable yielding all matching elements in document order.
704
705 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000706 # assert self._root is not None
707 if path[:1] == "/":
708 path = "." + path
709 warnings.warn(
710 "This search is broken in 1.3 and earlier, and will be "
711 "fixed in a future version. If you rely on the current "
712 "behaviour, change it to %r" % path,
713 FutureWarning, stacklevel=2
714 )
715 return self._root.iterfind(path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000716
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000717 def write(self, file_or_filename,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000718 encoding=None,
719 xml_declaration=None,
720 default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800721 method=None, *,
722 short_empty_elements=True):
Eli Bendersky84fae782013-03-09 07:12:48 -0800723 """Write element tree to a file as XML.
724
725 Arguments:
726 *file_or_filename* -- file name or a file object opened for writing
727
728 *encoding* -- the output encoding (default: US-ASCII)
729
730 *xml_declaration* -- bool indicating if an XML declaration should be
731 added to the output. If None, an XML declaration
732 is added if encoding IS NOT either of:
733 US-ASCII, UTF-8, or Unicode
734
735 *default_namespace* -- sets the default XML namespace (for "xmlns")
736
737 *method* -- either "xml" (default), "html, "text", or "c14n"
738
739 *short_empty_elements* -- controls the formatting of elements
740 that contain no content. If True (default)
741 they are emitted as a single self-closed
742 tag, otherwise they are emitted as a pair
743 of start/end tags
Eli Benderskye9af8272013-01-13 06:27:51 -0800744
745 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000746 if not method:
747 method = "xml"
748 elif method not in _serialize:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000749 raise ValueError("unknown method %r" % method)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000750 if not encoding:
751 if method == "c14n":
752 encoding = "utf-8"
753 else:
754 encoding = "us-ascii"
Martin Panter89f76d32015-09-23 01:14:35 +0000755 enc_lower = encoding.lower()
756 with _get_writer(file_or_filename, enc_lower) as write:
Eli Bendersky00f402b2012-07-15 06:02:22 +0300757 if method == "xml" and (xml_declaration or
758 (xml_declaration is None and
Martin Panter89f76d32015-09-23 01:14:35 +0000759 enc_lower not in ("utf-8", "us-ascii", "unicode"))):
Eli Bendersky00f402b2012-07-15 06:02:22 +0300760 declared_encoding = encoding
Martin Panter89f76d32015-09-23 01:14:35 +0000761 if enc_lower == "unicode":
Eli Bendersky00f402b2012-07-15 06:02:22 +0300762 # Retrieve the default encoding for the xml declaration
763 import locale
764 declared_encoding = locale.getpreferredencoding()
765 write("<?xml version='1.0' encoding='%s'?>\n" % (
766 declared_encoding,))
767 if method == "text":
768 _serialize_text(write, self._root)
Armin Rigo9ed73062005-12-14 18:10:45 +0000769 else:
Eli Bendersky00f402b2012-07-15 06:02:22 +0300770 qnames, namespaces = _namespaces(self._root, default_namespace)
771 serialize = _serialize[method]
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800772 serialize(write, self._root, qnames, namespaces,
773 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000774
775 def write_c14n(self, file):
776 # lxml.etree compatibility. use output method instead
777 return self.write(file, method="c14n")
Armin Rigo9ed73062005-12-14 18:10:45 +0000778
779# --------------------------------------------------------------------
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000780# serialization support
781
Eli Bendersky00f402b2012-07-15 06:02:22 +0300782@contextlib.contextmanager
783def _get_writer(file_or_filename, encoding):
Ezio Melottib5bc3532013-08-17 16:11:40 +0300784 # returns text write method and release all resources after using
Eli Bendersky00f402b2012-07-15 06:02:22 +0300785 try:
786 write = file_or_filename.write
787 except AttributeError:
788 # file_or_filename is a file name
789 if encoding == "unicode":
790 file = open(file_or_filename, "w")
791 else:
792 file = open(file_or_filename, "w", encoding=encoding,
793 errors="xmlcharrefreplace")
794 with file:
795 yield file.write
796 else:
797 # file_or_filename is a file-like object
798 # encoding determines if it is a text or binary writer
799 if encoding == "unicode":
800 # use a text writer as is
801 yield write
802 else:
803 # wrap a binary writer with TextIOWrapper
804 with contextlib.ExitStack() as stack:
805 if isinstance(file_or_filename, io.BufferedIOBase):
806 file = file_or_filename
807 elif isinstance(file_or_filename, io.RawIOBase):
808 file = io.BufferedWriter(file_or_filename)
809 # Keep the original file open when the BufferedWriter is
810 # destroyed
811 stack.callback(file.detach)
812 else:
813 # This is to handle passed objects that aren't in the
814 # IOBase hierarchy, but just have a write method
815 file = io.BufferedIOBase()
816 file.writable = lambda: True
817 file.write = write
818 try:
819 # TextIOWrapper uses this methods to determine
820 # if BOM (for UTF-16, etc) should be added
821 file.seekable = file_or_filename.seekable
822 file.tell = file_or_filename.tell
823 except AttributeError:
824 pass
825 file = io.TextIOWrapper(file,
826 encoding=encoding,
827 errors="xmlcharrefreplace",
828 newline="\n")
829 # Keep the original file open when the TextIOWrapper is
830 # destroyed
831 stack.callback(file.detach)
832 yield file.write
833
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000834def _namespaces(elem, default_namespace=None):
835 # identify namespaces used in this tree
836
837 # maps qnames to *encoded* prefix:local names
838 qnames = {None: None}
839
840 # maps uri:s to prefixes
841 namespaces = {}
842 if default_namespace:
843 namespaces[default_namespace] = ""
844
845 def add_qname(qname):
846 # calculate serialized qname representation
847 try:
848 if qname[:1] == "{":
849 uri, tag = qname[1:].rsplit("}", 1)
850 prefix = namespaces.get(uri)
851 if prefix is None:
852 prefix = _namespace_map.get(uri)
853 if prefix is None:
854 prefix = "ns%d" % len(namespaces)
855 if prefix != "xml":
856 namespaces[uri] = prefix
857 if prefix:
858 qnames[qname] = "%s:%s" % (prefix, tag)
859 else:
860 qnames[qname] = tag # default element
861 else:
862 if default_namespace:
863 # FIXME: can this be handled in XML 1.0?
864 raise ValueError(
865 "cannot use non-qualified names with "
866 "default_namespace option"
867 )
868 qnames[qname] = qname
869 except TypeError:
870 _raise_serialization_error(qname)
871
872 # populate qname and namespaces table
Eli Bendersky64d11e62012-06-15 07:42:50 +0300873 for elem in elem.iter():
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000874 tag = elem.tag
Senthil Kumaranec30b3d2010-11-09 02:36:59 +0000875 if isinstance(tag, QName):
876 if tag.text not in qnames:
877 add_qname(tag.text)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000878 elif isinstance(tag, str):
879 if tag not in qnames:
880 add_qname(tag)
881 elif tag is not None and tag is not Comment and tag is not PI:
882 _raise_serialization_error(tag)
883 for key, value in elem.items():
884 if isinstance(key, QName):
885 key = key.text
886 if key not in qnames:
887 add_qname(key)
888 if isinstance(value, QName) and value.text not in qnames:
889 add_qname(value.text)
890 text = elem.text
891 if isinstance(text, QName) and text.text not in qnames:
892 add_qname(text.text)
893 return qnames, namespaces
894
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800895def _serialize_xml(write, elem, qnames, namespaces,
896 short_empty_elements, **kwargs):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000897 tag = elem.tag
898 text = elem.text
899 if tag is Comment:
900 write("<!--%s-->" % text)
901 elif tag is ProcessingInstruction:
902 write("<?%s?>" % text)
903 else:
904 tag = qnames[tag]
905 if tag is None:
906 if text:
907 write(_escape_cdata(text))
908 for e in elem:
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800909 _serialize_xml(write, e, qnames, None,
910 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000911 else:
912 write("<" + tag)
913 items = list(elem.items())
914 if items or namespaces:
915 if namespaces:
916 for v, k in sorted(namespaces.items(),
917 key=lambda x: x[1]): # sort on prefix
918 if k:
919 k = ":" + k
920 write(" xmlns%s=\"%s\"" % (
921 k,
922 _escape_attrib(v)
923 ))
Raymond Hettingere3685fd2018-10-28 11:18:22 -0700924 for k, v in items:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000925 if isinstance(k, QName):
926 k = k.text
927 if isinstance(v, QName):
928 v = qnames[v.text]
929 else:
930 v = _escape_attrib(v)
931 write(" %s=\"%s\"" % (qnames[k], v))
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800932 if text or len(elem) or not short_empty_elements:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000933 write(">")
934 if text:
935 write(_escape_cdata(text))
936 for e in elem:
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800937 _serialize_xml(write, e, qnames, None,
938 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000939 write("</" + tag + ">")
940 else:
941 write(" />")
942 if elem.tail:
943 write(_escape_cdata(elem.tail))
944
945HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
Ezio Melottic90111f2012-09-19 08:19:12 +0300946 "img", "input", "isindex", "link", "meta", "param")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000947
948try:
949 HTML_EMPTY = set(HTML_EMPTY)
950except NameError:
951 pass
952
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800953def _serialize_html(write, elem, qnames, namespaces, **kwargs):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000954 tag = elem.tag
955 text = elem.text
956 if tag is Comment:
957 write("<!--%s-->" % _escape_cdata(text))
958 elif tag is ProcessingInstruction:
959 write("<?%s?>" % _escape_cdata(text))
960 else:
961 tag = qnames[tag]
962 if tag is None:
963 if text:
964 write(_escape_cdata(text))
965 for e in elem:
966 _serialize_html(write, e, qnames, None)
967 else:
968 write("<" + tag)
969 items = list(elem.items())
970 if items or namespaces:
971 if namespaces:
972 for v, k in sorted(namespaces.items(),
973 key=lambda x: x[1]): # sort on prefix
974 if k:
975 k = ":" + k
976 write(" xmlns%s=\"%s\"" % (
977 k,
978 _escape_attrib(v)
979 ))
Serhiy Storchaka3b05ad72018-10-29 19:31:04 +0200980 for k, v in items:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000981 if isinstance(k, QName):
982 k = k.text
983 if isinstance(v, QName):
984 v = qnames[v.text]
985 else:
986 v = _escape_attrib_html(v)
987 # FIXME: handle boolean attributes
988 write(" %s=\"%s\"" % (qnames[k], v))
989 write(">")
Christian Heimes54ad7e32013-07-05 01:39:49 +0200990 ltag = tag.lower()
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000991 if text:
Christian Heimes54ad7e32013-07-05 01:39:49 +0200992 if ltag == "script" or ltag == "style":
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000993 write(text)
994 else:
995 write(_escape_cdata(text))
996 for e in elem:
997 _serialize_html(write, e, qnames, None)
Christian Heimes54ad7e32013-07-05 01:39:49 +0200998 if ltag not in HTML_EMPTY:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000999 write("</" + tag + ">")
1000 if elem.tail:
1001 write(_escape_cdata(elem.tail))
1002
1003def _serialize_text(write, elem):
1004 for part in elem.itertext():
1005 write(part)
1006 if elem.tail:
1007 write(elem.tail)
1008
1009_serialize = {
1010 "xml": _serialize_xml,
1011 "html": _serialize_html,
1012 "text": _serialize_text,
1013# this optional method is imported at the end of the module
1014# "c14n": _serialize_c14n,
1015}
Armin Rigo9ed73062005-12-14 18:10:45 +00001016
Armin Rigo9ed73062005-12-14 18:10:45 +00001017
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001018def register_namespace(prefix, uri):
Eli Bendersky84fae782013-03-09 07:12:48 -08001019 """Register a namespace prefix.
1020
1021 The registry is global, and any existing mapping for either the
1022 given prefix or the namespace URI will be removed.
1023
1024 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
1025 attributes in this namespace will be serialized with prefix if possible.
1026
1027 ValueError is raised if prefix is reserved or is invalid.
1028
1029 """
R David Murray44b548d2016-09-08 13:59:53 -04001030 if re.match(r"ns\d+$", prefix):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001031 raise ValueError("Prefix format reserved for internal use")
Georg Brandl90b20672010-12-28 10:38:33 +00001032 for k, v in list(_namespace_map.items()):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001033 if k == uri or v == prefix:
1034 del _namespace_map[k]
1035 _namespace_map[uri] = prefix
1036
1037_namespace_map = {
1038 # "well-known" namespace prefixes
1039 "http://www.w3.org/XML/1998/namespace": "xml",
1040 "http://www.w3.org/1999/xhtml": "html",
1041 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1042 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1043 # xml schema
1044 "http://www.w3.org/2001/XMLSchema": "xs",
1045 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1046 # dublin core
1047 "http://purl.org/dc/elements/1.1/": "dc",
1048}
Florent Xicluna16395052012-02-16 23:28:35 +01001049# For tests and troubleshooting
1050register_namespace._namespace_map = _namespace_map
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001051
1052def _raise_serialization_error(text):
1053 raise TypeError(
1054 "cannot serialize %r (type %s)" % (text, type(text).__name__)
1055 )
1056
1057def _escape_cdata(text):
1058 # escape character data
1059 try:
1060 # it's worth avoiding do-nothing calls for strings that are
Mike53f7a7c2017-12-14 14:04:53 +03001061 # shorter than 500 characters, or so. assume that's, by far,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001062 # the most common case in most applications.
1063 if "&" in text:
1064 text = text.replace("&", "&amp;")
1065 if "<" in text:
1066 text = text.replace("<", "&lt;")
1067 if ">" in text:
1068 text = text.replace(">", "&gt;")
1069 return text
1070 except (TypeError, AttributeError):
1071 _raise_serialization_error(text)
1072
1073def _escape_attrib(text):
1074 # escape attribute value
1075 try:
1076 if "&" in text:
1077 text = text.replace("&", "&amp;")
1078 if "<" in text:
1079 text = text.replace("<", "&lt;")
1080 if ">" in text:
1081 text = text.replace(">", "&gt;")
1082 if "\"" in text:
1083 text = text.replace("\"", "&quot;")
Raymond Hettinger076366c2016-09-11 23:18:03 -07001084 # The following business with carriage returns is to satisfy
Raymond Hettinger11fa3ff2016-09-11 23:23:24 -07001085 # Section 2.11 of the XML specification, stating that
Raymond Hettinger076366c2016-09-11 23:18:03 -07001086 # CR or CR LN should be replaced with just LN
1087 # http://www.w3.org/TR/REC-xml/#sec-line-ends
1088 if "\r\n" in text:
1089 text = text.replace("\r\n", "\n")
1090 if "\r" in text:
1091 text = text.replace("\r", "\n")
1092 #The following four lines are issue 17582
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001093 if "\n" in text:
1094 text = text.replace("\n", "&#10;")
Raymond Hettinger076366c2016-09-11 23:18:03 -07001095 if "\t" in text:
1096 text = text.replace("\t", "&#09;")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001097 return text
1098 except (TypeError, AttributeError):
1099 _raise_serialization_error(text)
1100
1101def _escape_attrib_html(text):
1102 # escape attribute value
1103 try:
1104 if "&" in text:
1105 text = text.replace("&", "&amp;")
1106 if ">" in text:
1107 text = text.replace(">", "&gt;")
1108 if "\"" in text:
1109 text = text.replace("\"", "&quot;")
1110 return text
1111 except (TypeError, AttributeError):
1112 _raise_serialization_error(text)
1113
1114# --------------------------------------------------------------------
1115
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001116def tostring(element, encoding=None, method=None, *,
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001117 xml_declaration=None, default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001118 short_empty_elements=True):
Eli Bendersky84fae782013-03-09 07:12:48 -08001119 """Generate string representation of XML element.
1120
1121 All subelements are included. If encoding is "unicode", a string
1122 is returned. Otherwise a bytestring is returned.
1123
1124 *element* is an Element instance, *encoding* is an optional output
1125 encoding defaulting to US-ASCII, *method* is an optional output which can
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001126 be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
1127 sets the default XML namespace (for "xmlns").
Eli Bendersky84fae782013-03-09 07:12:48 -08001128
1129 Returns an (optionally) encoded string containing the XML data.
1130
1131 """
Eli Bendersky00f402b2012-07-15 06:02:22 +03001132 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001133 ElementTree(element).write(stream, encoding,
1134 xml_declaration=xml_declaration,
1135 default_namespace=default_namespace,
1136 method=method,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001137 short_empty_elements=short_empty_elements)
Eli Bendersky00f402b2012-07-15 06:02:22 +03001138 return stream.getvalue()
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001139
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001140class _ListDataStream(io.BufferedIOBase):
Eli Bendersky84fae782013-03-09 07:12:48 -08001141 """An auxiliary stream accumulating into a list reference."""
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001142 def __init__(self, lst):
1143 self.lst = lst
Eli Benderskyf90fc682012-07-17 15:09:56 +03001144
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001145 def writable(self):
1146 return True
1147
1148 def seekable(self):
1149 return True
1150
1151 def write(self, b):
1152 self.lst.append(b)
1153
1154 def tell(self):
1155 return len(self.lst)
1156
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001157def tostringlist(element, encoding=None, method=None, *,
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001158 xml_declaration=None, default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001159 short_empty_elements=True):
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001160 lst = []
1161 stream = _ListDataStream(lst)
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001162 ElementTree(element).write(stream, encoding,
1163 xml_declaration=xml_declaration,
1164 default_namespace=default_namespace,
1165 method=method,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001166 short_empty_elements=short_empty_elements)
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001167 return lst
Armin Rigo9ed73062005-12-14 18:10:45 +00001168
Armin Rigo9ed73062005-12-14 18:10:45 +00001169
1170def dump(elem):
Eli Bendersky84fae782013-03-09 07:12:48 -08001171 """Write element tree or element structure to sys.stdout.
1172
1173 This function should be used for debugging only.
1174
1175 *elem* is either an ElementTree, or a single Element. The exact output
1176 format is implementation dependent. In this version, it's written as an
1177 ordinary XML file.
1178
1179 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001180 # debugging
1181 if not isinstance(elem, ElementTree):
1182 elem = ElementTree(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +00001183 elem.write(sys.stdout, encoding="unicode")
Armin Rigo9ed73062005-12-14 18:10:45 +00001184 tail = elem.getroot().tail
1185 if not tail or tail[-1] != "\n":
1186 sys.stdout.write("\n")
1187
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188# --------------------------------------------------------------------
1189# parsing
Armin Rigo9ed73062005-12-14 18:10:45 +00001190
Armin Rigo9ed73062005-12-14 18:10:45 +00001191
1192def parse(source, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001193 """Parse XML document into element tree.
1194
1195 *source* is a filename or file object containing XML data,
1196 *parser* is an optional parser instance defaulting to XMLParser.
1197
1198 Return an ElementTree instance.
1199
1200 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001201 tree = ElementTree()
1202 tree.parse(source, parser)
1203 return tree
1204
Armin Rigo9ed73062005-12-14 18:10:45 +00001205
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206def iterparse(source, events=None, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001207 """Incrementally parse XML document into ElementTree.
1208
1209 This class also reports what's going on to the user based on the
1210 *events* it is initialized with. The supported events are the strings
1211 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1212 detailed namespace information). If *events* is omitted, only
1213 "end" events are reported.
1214
1215 *source* is a filename or file object containing XML data, *events* is
1216 a list of events to report back, *parser* is an optional parser instance.
1217
1218 Returns an iterator providing (event, elem) pairs.
1219
1220 """
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001221 # Use the internal, undocumented _parser argument for now; When the
1222 # parser argument of iterparse is removed, this can be killed.
1223 pullparser = XMLPullParser(events=events, _parser=parser)
1224 def iterator():
1225 try:
1226 while True:
1227 yield from pullparser.read_events()
1228 # load event buffer
1229 data = source.read(16 * 1024)
1230 if not data:
1231 break
1232 pullparser.feed(data)
1233 root = pullparser._close_and_return_root()
1234 yield from pullparser.read_events()
1235 it.root = root
1236 finally:
1237 if close_source:
1238 source.close()
1239
Serhiy Storchaka2e576f52017-04-24 09:05:00 +03001240 class IterParseIterator(collections.abc.Iterator):
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001241 __next__ = iterator().__next__
1242 it = IterParseIterator()
1243 it.root = None
1244 del iterator, IterParseIterator
1245
Antoine Pitroue033e062010-10-29 10:38:18 +00001246 close_source = False
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001247 if not hasattr(source, "read"):
1248 source = open(source, "rb")
Antoine Pitroue033e062010-10-29 10:38:18 +00001249 close_source = True
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001250
1251 return it
Armin Rigo9ed73062005-12-14 18:10:45 +00001252
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001253
Eli Benderskyb5869342013-08-30 05:51:20 -07001254class XMLPullParser:
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001255
Eli Benderskyb5869342013-08-30 05:51:20 -07001256 def __init__(self, events=None, *, _parser=None):
1257 # The _parser argument is for internal use only and must not be relied
1258 # upon in user code. It will be removed in a future release.
1259 # See http://bugs.python.org/issue17741 for more details.
1260
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001261 self._events_queue = collections.deque()
Eli Benderskyb5869342013-08-30 05:51:20 -07001262 self._parser = _parser or XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001263 # wire up the parser for event reporting
Armin Rigo9ed73062005-12-14 18:10:45 +00001264 if events is None:
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001265 events = ("end",)
1266 self._parser._setevents(self._events_queue, events)
1267
Eli Benderskyb5869342013-08-30 05:51:20 -07001268 def feed(self, data):
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001269 """Feed encoded data to parser."""
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001270 if self._parser is None:
Eli Benderskyb5869342013-08-30 05:51:20 -07001271 raise ValueError("feed() called after end of stream")
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001272 if data:
1273 try:
1274 self._parser.feed(data)
1275 except SyntaxError as exc:
1276 self._events_queue.append(exc)
1277
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001278 def _close_and_return_root(self):
1279 # iterparse needs this to set its root attribute properly :(
1280 root = self._parser.close()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001281 self._parser = None
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001282 return root
1283
1284 def close(self):
1285 """Finish feeding data to parser.
1286
1287 Unlike XMLParser, does not return the root element. Use
1288 read_events() to consume elements from XMLPullParser.
1289 """
1290 self._close_and_return_root()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001291
Eli Benderskyb5869342013-08-30 05:51:20 -07001292 def read_events(self):
R David Murray410d3202014-01-04 23:52:50 -05001293 """Return an iterator over currently available (event, elem) pairs.
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001294
1295 Events are consumed from the internal event queue as they are
1296 retrieved from the iterator.
1297 """
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001298 events = self._events_queue
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001299 while events:
1300 event = events.popleft()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001301 if isinstance(event, Exception):
1302 raise event
1303 else:
1304 yield event
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001305
1306
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001307def XML(text, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001308 """Parse XML document from string constant.
1309
1310 This function can be used to embed "XML Literals" in Python code.
1311
1312 *text* is a string containing XML data, *parser* is an
1313 optional parser instance, defaulting to the standard XMLParser.
1314
1315 Returns an Element instance.
1316
1317 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001318 if not parser:
1319 parser = XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001320 parser.feed(text)
1321 return parser.close()
1322
Armin Rigo9ed73062005-12-14 18:10:45 +00001323
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324def XMLID(text, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001325 """Parse XML document from string constant for its IDs.
1326
1327 *text* is a string containing XML data, *parser* is an
1328 optional parser instance, defaulting to the standard XMLParser.
1329
1330 Returns an (Element, dict) tuple, in which the
1331 dict maps element id:s to elements.
1332
1333 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001334 if not parser:
1335 parser = XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001336 parser.feed(text)
1337 tree = parser.close()
1338 ids = {}
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001339 for elem in tree.iter():
Armin Rigo9ed73062005-12-14 18:10:45 +00001340 id = elem.get("id")
1341 if id:
1342 ids[id] = elem
1343 return tree, ids
1344
Victor Stinner765531d2013-03-26 01:11:54 +01001345# Parse XML document from string constant. Alias for XML().
Armin Rigo9ed73062005-12-14 18:10:45 +00001346fromstring = XML
Armin Rigo9ed73062005-12-14 18:10:45 +00001347
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001348def fromstringlist(sequence, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001349 """Parse XML document from sequence of string fragments.
1350
1351 *sequence* is a list of other sequence, *parser* is an optional parser
1352 instance, defaulting to the standard XMLParser.
1353
1354 Returns an Element instance.
1355
1356 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001357 if not parser:
1358 parser = XMLParser(target=TreeBuilder())
1359 for text in sequence:
1360 parser.feed(text)
1361 return parser.close()
1362
1363# --------------------------------------------------------------------
Armin Rigo9ed73062005-12-14 18:10:45 +00001364
Armin Rigo9ed73062005-12-14 18:10:45 +00001365
1366class TreeBuilder:
Eli Bendersky84fae782013-03-09 07:12:48 -08001367 """Generic element structure builder.
Armin Rigo9ed73062005-12-14 18:10:45 +00001368
Eli Bendersky84fae782013-03-09 07:12:48 -08001369 This builder converts a sequence of start, data, and end method
1370 calls to a well-formed element structure.
1371
1372 You can use this class to build an element structure using a custom XML
1373 parser, or a parser for some other XML-like format.
1374
1375 *element_factory* is an optional element factory which is called
1376 to create new Element instances, as necessary.
1377
Stefan Behnel43851a22019-05-01 21:20:38 +02001378 *comment_factory* is a factory to create comments to be used instead of
1379 the standard factory. If *insert_comments* is false (the default),
1380 comments will not be inserted into the tree.
1381
1382 *pi_factory* is a factory to create processing instructions to be used
1383 instead of the standard factory. If *insert_pis* is false (the default),
1384 processing instructions will not be inserted into the tree.
Eli Bendersky84fae782013-03-09 07:12:48 -08001385 """
Stefan Behnel43851a22019-05-01 21:20:38 +02001386 def __init__(self, element_factory=None, *,
1387 comment_factory=None, pi_factory=None,
1388 insert_comments=False, insert_pis=False):
Armin Rigo9ed73062005-12-14 18:10:45 +00001389 self._data = [] # data collector
1390 self._elem = [] # element stack
1391 self._last = None # last element
Stefan Behnel43851a22019-05-01 21:20:38 +02001392 self._root = None # root element
Armin Rigo9ed73062005-12-14 18:10:45 +00001393 self._tail = None # true if we're after an end tag
Stefan Behnel43851a22019-05-01 21:20:38 +02001394 if comment_factory is None:
1395 comment_factory = Comment
1396 self._comment_factory = comment_factory
1397 self.insert_comments = insert_comments
1398 if pi_factory is None:
1399 pi_factory = ProcessingInstruction
1400 self._pi_factory = pi_factory
1401 self.insert_pis = insert_pis
Armin Rigo9ed73062005-12-14 18:10:45 +00001402 if element_factory is None:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001403 element_factory = Element
Armin Rigo9ed73062005-12-14 18:10:45 +00001404 self._factory = element_factory
1405
Armin Rigo9ed73062005-12-14 18:10:45 +00001406 def close(self):
Eli Bendersky84fae782013-03-09 07:12:48 -08001407 """Flush builder buffers and return toplevel document Element."""
Armin Rigo9ed73062005-12-14 18:10:45 +00001408 assert len(self._elem) == 0, "missing end tags"
Stefan Behnel43851a22019-05-01 21:20:38 +02001409 assert self._root is not None, "missing toplevel element"
1410 return self._root
Armin Rigo9ed73062005-12-14 18:10:45 +00001411
1412 def _flush(self):
1413 if self._data:
1414 if self._last is not None:
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001415 text = "".join(self._data)
Armin Rigo9ed73062005-12-14 18:10:45 +00001416 if self._tail:
1417 assert self._last.tail is None, "internal error (tail)"
1418 self._last.tail = text
1419 else:
1420 assert self._last.text is None, "internal error (text)"
1421 self._last.text = text
1422 self._data = []
1423
Armin Rigo9ed73062005-12-14 18:10:45 +00001424 def data(self, data):
Eli Bendersky84fae782013-03-09 07:12:48 -08001425 """Add text to current element."""
Armin Rigo9ed73062005-12-14 18:10:45 +00001426 self._data.append(data)
1427
Armin Rigo9ed73062005-12-14 18:10:45 +00001428 def start(self, tag, attrs):
Eli Bendersky84fae782013-03-09 07:12:48 -08001429 """Open new element and return it.
1430
1431 *tag* is the element name, *attrs* is a dict containing element
1432 attributes.
1433
1434 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001435 self._flush()
1436 self._last = elem = self._factory(tag, attrs)
1437 if self._elem:
1438 self._elem[-1].append(elem)
Stefan Behnel43851a22019-05-01 21:20:38 +02001439 elif self._root is None:
1440 self._root = elem
Armin Rigo9ed73062005-12-14 18:10:45 +00001441 self._elem.append(elem)
1442 self._tail = 0
1443 return elem
1444
Armin Rigo9ed73062005-12-14 18:10:45 +00001445 def end(self, tag):
Eli Bendersky84fae782013-03-09 07:12:48 -08001446 """Close and return current Element.
1447
1448 *tag* is the element name.
1449
1450 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001451 self._flush()
1452 self._last = self._elem.pop()
1453 assert self._last.tag == tag,\
1454 "end tag mismatch (expected %s, got %s)" % (
1455 self._last.tag, tag)
1456 self._tail = 1
1457 return self._last
1458
Stefan Behnel43851a22019-05-01 21:20:38 +02001459 def comment(self, text):
1460 """Create a comment using the comment_factory.
1461
1462 *text* is the text of the comment.
1463 """
1464 return self._handle_single(
1465 self._comment_factory, self.insert_comments, text)
1466
1467 def pi(self, target, text=None):
1468 """Create a processing instruction using the pi_factory.
1469
1470 *target* is the target name of the processing instruction.
1471 *text* is the data of the processing instruction, or ''.
1472 """
1473 return self._handle_single(
1474 self._pi_factory, self.insert_pis, target, text)
1475
1476 def _handle_single(self, factory, insert, *args):
1477 elem = factory(*args)
1478 if insert:
1479 self._flush()
1480 self._last = elem
1481 if self._elem:
1482 self._elem[-1].append(elem)
1483 self._tail = 1
1484 return elem
1485
Armin Rigo9ed73062005-12-14 18:10:45 +00001486
Eli Bendersky84fae782013-03-09 07:12:48 -08001487# also see ElementTree and TreeBuilder
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001488class XMLParser:
Eli Bendersky84fae782013-03-09 07:12:48 -08001489 """Element structure builder for XML source data based on the expat parser.
1490
Eli Bendersky84fae782013-03-09 07:12:48 -08001491 *target* is an optional target object which defaults to an instance of the
1492 standard TreeBuilder class, *encoding* is an optional encoding string
1493 which if given, overrides the encoding specified in the XML file:
1494 http://www.iana.org/assignments/character-sets
1495
1496 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001497
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001498 def __init__(self, *, target=None, encoding=None):
Armin Rigo9ed73062005-12-14 18:10:45 +00001499 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001500 from xml.parsers import expat
Brett Cannoncd171c82013-07-04 17:43:24 -04001501 except ImportError:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001502 try:
1503 import pyexpat as expat
Brett Cannoncd171c82013-07-04 17:43:24 -04001504 except ImportError:
1505 raise ImportError(
1506 "No module named expat; use SimpleXMLTreeBuilder instead"
1507 )
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001508 parser = expat.ParserCreate(encoding, "}")
Armin Rigo9ed73062005-12-14 18:10:45 +00001509 if target is None:
1510 target = TreeBuilder()
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001511 # underscored names are provided for compatibility only
1512 self.parser = self._parser = parser
1513 self.target = self._target = target
1514 self._error = expat.error
Armin Rigo9ed73062005-12-14 18:10:45 +00001515 self._names = {} # name memo cache
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001516 # main callbacks
Armin Rigo9ed73062005-12-14 18:10:45 +00001517 parser.DefaultHandlerExpand = self._default
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001518 if hasattr(target, 'start'):
1519 parser.StartElementHandler = self._start
1520 if hasattr(target, 'end'):
1521 parser.EndElementHandler = self._end
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001522 if hasattr(target, 'start_ns'):
1523 parser.StartNamespaceDeclHandler = self._start_ns
1524 if hasattr(target, 'end_ns'):
1525 parser.EndNamespaceDeclHandler = self._end_ns
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001526 if hasattr(target, 'data'):
1527 parser.CharacterDataHandler = target.data
1528 # miscellaneous callbacks
1529 if hasattr(target, 'comment'):
1530 parser.CommentHandler = target.comment
1531 if hasattr(target, 'pi'):
1532 parser.ProcessingInstructionHandler = target.pi
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001533 # Configure pyexpat: buffering, new-style attribute handling.
1534 parser.buffer_text = 1
1535 parser.ordered_attributes = 1
1536 parser.specified_attributes = 1
Armin Rigo9ed73062005-12-14 18:10:45 +00001537 self._doctype = None
1538 self.entity = {}
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001539 try:
1540 self.version = "Expat %d.%d.%d" % expat.version_info
1541 except AttributeError:
1542 pass # unknown
1543
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001544 def _setevents(self, events_queue, events_to_report):
Eli Benderskyb5869342013-08-30 05:51:20 -07001545 # Internal API for XMLPullParser
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001546 # events_to_report: a list of events to report during parsing (same as
Eli Benderskyb5869342013-08-30 05:51:20 -07001547 # the *events* of XMLPullParser's constructor.
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001548 # events_queue: a list of actual parsing events that will be populated
1549 # by the underlying parser.
1550 #
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001551 parser = self._parser
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001552 append = events_queue.append
1553 for event_name in events_to_report:
1554 if event_name == "start":
Eli Benderskyc9f5ca22013-04-20 09:11:37 -07001555 parser.ordered_attributes = 1
1556 parser.specified_attributes = 1
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001557 def handler(tag, attrib_in, event=event_name, append=append,
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001558 start=self._start):
Eli Benderskyc9f5ca22013-04-20 09:11:37 -07001559 append((event, start(tag, attrib_in)))
1560 parser.StartElementHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001561 elif event_name == "end":
1562 def handler(tag, event=event_name, append=append,
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001563 end=self._end):
1564 append((event, end(tag)))
1565 parser.EndElementHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001566 elif event_name == "start-ns":
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001567 # TreeBuilder does not implement .start_ns()
1568 if hasattr(self.target, "start_ns"):
1569 def handler(prefix, uri, event=event_name, append=append,
1570 start_ns=self._start_ns):
1571 append((event, start_ns(prefix, uri)))
1572 else:
1573 def handler(prefix, uri, event=event_name, append=append):
1574 append((event, (prefix or '', uri or '')))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001575 parser.StartNamespaceDeclHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001576 elif event_name == "end-ns":
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001577 # TreeBuilder does not implement .end_ns()
1578 if hasattr(self.target, "end_ns"):
1579 def handler(prefix, event=event_name, append=append,
1580 end_ns=self._end_ns):
1581 append((event, end_ns(prefix)))
1582 else:
1583 def handler(prefix, event=event_name, append=append):
1584 append((event, None))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001585 parser.EndNamespaceDeclHandler = handler
Stefan Behnel43851a22019-05-01 21:20:38 +02001586 elif event_name == 'comment':
1587 def handler(text, event=event_name, append=append, self=self):
1588 append((event, self.target.comment(text)))
1589 parser.CommentHandler = handler
1590 elif event_name == 'pi':
1591 def handler(pi_target, data, event=event_name, append=append,
1592 self=self):
1593 append((event, self.target.pi(pi_target, data)))
1594 parser.ProcessingInstructionHandler = handler
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001595 else:
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001596 raise ValueError("unknown event %r" % event_name)
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001597
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001598 def _raiseerror(self, value):
1599 err = ParseError(value)
1600 err.code = value.code
1601 err.position = value.lineno, value.offset
1602 raise err
Armin Rigo9ed73062005-12-14 18:10:45 +00001603
Armin Rigo9ed73062005-12-14 18:10:45 +00001604 def _fixname(self, key):
1605 # expand qname, and convert name string to ascii, if possible
1606 try:
1607 name = self._names[key]
1608 except KeyError:
1609 name = key
1610 if "}" in name:
1611 name = "{" + name
Martin v. Löwisf30bb0e2007-07-28 11:40:46 +00001612 self._names[key] = name
Armin Rigo9ed73062005-12-14 18:10:45 +00001613 return name
1614
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001615 def _start_ns(self, prefix, uri):
1616 return self.target.start_ns(prefix or '', uri or '')
1617
1618 def _end_ns(self, prefix):
1619 return self.target.end_ns(prefix or '')
1620
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001621 def _start(self, tag, attr_list):
1622 # Handler for expat's StartElementHandler. Since ordered_attributes
1623 # is set, the attributes are reported as a list of alternating
1624 # attribute name,value.
Armin Rigo9ed73062005-12-14 18:10:45 +00001625 fixname = self._fixname
1626 tag = fixname(tag)
1627 attrib = {}
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001628 if attr_list:
1629 for i in range(0, len(attr_list), 2):
1630 attrib[fixname(attr_list[i])] = attr_list[i+1]
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001631 return self.target.start(tag, attrib)
Armin Rigo9ed73062005-12-14 18:10:45 +00001632
Armin Rigo9ed73062005-12-14 18:10:45 +00001633 def _end(self, tag):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001634 return self.target.end(self._fixname(tag))
1635
Armin Rigo9ed73062005-12-14 18:10:45 +00001636 def _default(self, text):
1637 prefix = text[:1]
1638 if prefix == "&":
1639 # deal with undefined entities
1640 try:
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001641 data_handler = self.target.data
1642 except AttributeError:
1643 return
1644 try:
1645 data_handler(self.entity[text[1:-1]])
Armin Rigo9ed73062005-12-14 18:10:45 +00001646 except KeyError:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001647 from xml.parsers import expat
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001648 err = expat.error(
Armin Rigo9ed73062005-12-14 18:10:45 +00001649 "undefined entity %s: line %d, column %d" %
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001650 (text, self.parser.ErrorLineNumber,
1651 self.parser.ErrorColumnNumber)
Armin Rigo9ed73062005-12-14 18:10:45 +00001652 )
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001653 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001654 err.lineno = self.parser.ErrorLineNumber
1655 err.offset = self.parser.ErrorColumnNumber
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001656 raise err
Armin Rigo9ed73062005-12-14 18:10:45 +00001657 elif prefix == "<" and text[:9] == "<!DOCTYPE":
1658 self._doctype = [] # inside a doctype declaration
1659 elif self._doctype is not None:
1660 # parse doctype contents
1661 if prefix == ">":
1662 self._doctype = None
1663 return
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001664 text = text.strip()
Armin Rigo9ed73062005-12-14 18:10:45 +00001665 if not text:
1666 return
1667 self._doctype.append(text)
1668 n = len(self._doctype)
1669 if n > 2:
1670 type = self._doctype[1]
1671 if type == "PUBLIC" and n == 4:
1672 name, type, pubid, system = self._doctype
Florent Xiclunaa1c974a2012-07-07 13:16:44 +02001673 if pubid:
1674 pubid = pubid[1:-1]
Armin Rigo9ed73062005-12-14 18:10:45 +00001675 elif type == "SYSTEM" and n == 3:
1676 name, type, system = self._doctype
1677 pubid = None
1678 else:
1679 return
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001680 if hasattr(self.target, "doctype"):
1681 self.target.doctype(name, pubid, system[1:-1])
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001682 elif hasattr(self, "doctype"):
1683 warnings.warn(
1684 "The doctype() method of XMLParser is ignored. "
1685 "Define doctype() method on the TreeBuilder target.",
1686 RuntimeWarning)
1687
Armin Rigo9ed73062005-12-14 18:10:45 +00001688 self._doctype = None
1689
Armin Rigo9ed73062005-12-14 18:10:45 +00001690 def feed(self, data):
Eli Bendersky84fae782013-03-09 07:12:48 -08001691 """Feed encoded data to parser."""
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001692 try:
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001693 self.parser.Parse(data, 0)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001694 except self._error as v:
1695 self._raiseerror(v)
Armin Rigo9ed73062005-12-14 18:10:45 +00001696
Armin Rigo9ed73062005-12-14 18:10:45 +00001697 def close(self):
Eli Bendersky84fae782013-03-09 07:12:48 -08001698 """Finish feeding data to parser and return element structure."""
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001699 try:
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001700 self.parser.Parse("", 1) # end of data
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001701 except self._error as v:
1702 self._raiseerror(v)
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001703 try:
Florent Xiclunafb067462012-03-05 11:42:49 +01001704 close_handler = self.target.close
1705 except AttributeError:
1706 pass
1707 else:
1708 return close_handler()
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001709 finally:
1710 # get rid of circular references
1711 del self.parser, self._parser
1712 del self.target, self._target
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001713
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01001714
Stefan Behnele1d5dd62019-05-01 22:34:13 +02001715# --------------------------------------------------------------------
1716# C14N 2.0
1717
1718def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
1719 """Convert XML to its C14N 2.0 serialised form.
1720
1721 If *out* is provided, it must be a file or file-like object that receives
1722 the serialised canonical XML output (text, not bytes) through its ``.write()``
1723 method. To write to a file, open it in text mode with encoding "utf-8".
1724 If *out* is not provided, this function returns the output as text string.
1725
1726 Either *xml_data* (an XML string) or *from_file* (a file path or
1727 file-like object) must be provided as input.
1728
1729 The configuration options are the same as for the ``C14NWriterTarget``.
1730 """
1731 if xml_data is None and from_file is None:
1732 raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
1733 sio = None
1734 if out is None:
1735 sio = out = io.StringIO()
1736
1737 parser = XMLParser(target=C14NWriterTarget(out.write, **options))
1738
1739 if xml_data is not None:
1740 parser.feed(xml_data)
1741 parser.close()
1742 elif from_file is not None:
1743 parse(from_file, parser=parser)
1744
1745 return sio.getvalue() if sio is not None else None
1746
1747
1748_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
1749
1750
1751class C14NWriterTarget:
1752 """
1753 Canonicalization writer target for the XMLParser.
1754
1755 Serialises parse events to XML C14N 2.0.
1756
1757 The *write* function is used for writing out the resulting data stream
1758 as text (not bytes). To write to a file, open it in text mode with encoding
1759 "utf-8" and pass its ``.write`` method.
1760
1761 Configuration options:
1762
1763 - *with_comments*: set to true to include comments
1764 - *strip_text*: set to true to strip whitespace before and after text content
1765 - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
1766 - *qname_aware_tags*: a set of qname aware tag names in which prefixes
1767 should be replaced in text content
1768 - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
1769 should be replaced in text content
1770 - *exclude_attrs*: a set of attribute names that should not be serialised
1771 - *exclude_tags*: a set of tag names that should not be serialised
1772 """
1773 def __init__(self, write, *,
1774 with_comments=False, strip_text=False, rewrite_prefixes=False,
1775 qname_aware_tags=None, qname_aware_attrs=None,
1776 exclude_attrs=None, exclude_tags=None):
1777 self._write = write
1778 self._data = []
1779 self._with_comments = with_comments
1780 self._strip_text = strip_text
1781 self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
1782 self._exclude_tags = set(exclude_tags) if exclude_tags else None
1783
1784 self._rewrite_prefixes = rewrite_prefixes
1785 if qname_aware_tags:
1786 self._qname_aware_tags = set(qname_aware_tags)
1787 else:
1788 self._qname_aware_tags = None
1789 if qname_aware_attrs:
1790 self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
1791 else:
1792 self._find_qname_aware_attrs = None
1793
1794 # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
1795 self._declared_ns_stack = [[
1796 ("http://www.w3.org/XML/1998/namespace", "xml"),
1797 ]]
1798 # Stack with user declared namespace prefixes as (uri, prefix) pairs.
1799 self._ns_stack = []
1800 if not rewrite_prefixes:
1801 self._ns_stack.append(list(_namespace_map.items()))
1802 self._ns_stack.append([])
1803 self._prefix_map = {}
1804 self._preserve_space = [False]
1805 self._pending_start = None
1806 self._root_seen = False
1807 self._root_done = False
1808 self._ignored_depth = 0
1809
1810 def _iter_namespaces(self, ns_stack, _reversed=reversed):
1811 for namespaces in _reversed(ns_stack):
1812 if namespaces: # almost no element declares new namespaces
1813 yield from namespaces
1814
1815 def _resolve_prefix_name(self, prefixed_name):
1816 prefix, name = prefixed_name.split(':', 1)
1817 for uri, p in self._iter_namespaces(self._ns_stack):
1818 if p == prefix:
1819 return f'{{{uri}}}{name}'
1820 raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
1821
1822 def _qname(self, qname, uri=None):
1823 if uri is None:
1824 uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
1825 else:
1826 tag = qname
1827
1828 prefixes_seen = set()
1829 for u, prefix in self._iter_namespaces(self._declared_ns_stack):
1830 if u == uri and prefix not in prefixes_seen:
1831 return f'{prefix}:{tag}' if prefix else tag, tag, uri
1832 prefixes_seen.add(prefix)
1833
1834 # Not declared yet => add new declaration.
1835 if self._rewrite_prefixes:
1836 if uri in self._prefix_map:
1837 prefix = self._prefix_map[uri]
1838 else:
1839 prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
1840 self._declared_ns_stack[-1].append((uri, prefix))
1841 return f'{prefix}:{tag}', tag, uri
1842
1843 if not uri and '' not in prefixes_seen:
1844 # No default namespace declared => no prefix needed.
1845 return tag, tag, uri
1846
1847 for u, prefix in self._iter_namespaces(self._ns_stack):
1848 if u == uri:
1849 self._declared_ns_stack[-1].append((uri, prefix))
1850 return f'{prefix}:{tag}' if prefix else tag, tag, uri
1851
1852 raise ValueError(f'Namespace "{uri}" is not declared in scope')
1853
1854 def data(self, data):
1855 if not self._ignored_depth:
1856 self._data.append(data)
1857
1858 def _flush(self, _join_text=''.join):
1859 data = _join_text(self._data)
1860 del self._data[:]
1861 if self._strip_text and not self._preserve_space[-1]:
1862 data = data.strip()
1863 if self._pending_start is not None:
1864 args, self._pending_start = self._pending_start, None
1865 qname_text = data if data and _looks_like_prefix_name(data) else None
1866 self._start(*args, qname_text)
1867 if qname_text is not None:
1868 return
1869 if data and self._root_seen:
1870 self._write(_escape_cdata_c14n(data))
1871
1872 def start_ns(self, prefix, uri):
1873 if self._ignored_depth:
1874 return
1875 # we may have to resolve qnames in text content
1876 if self._data:
1877 self._flush()
1878 self._ns_stack[-1].append((uri, prefix))
1879
1880 def start(self, tag, attrs):
1881 if self._exclude_tags is not None and (
1882 self._ignored_depth or tag in self._exclude_tags):
1883 self._ignored_depth += 1
1884 return
1885 if self._data:
1886 self._flush()
1887
1888 new_namespaces = []
1889 self._declared_ns_stack.append(new_namespaces)
1890
1891 if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
1892 # Need to parse text first to see if it requires a prefix declaration.
1893 self._pending_start = (tag, attrs, new_namespaces)
1894 return
1895 self._start(tag, attrs, new_namespaces)
1896
1897 def _start(self, tag, attrs, new_namespaces, qname_text=None):
1898 if self._exclude_attrs is not None and attrs:
1899 attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
1900
1901 qnames = {tag, *attrs}
1902 resolved_names = {}
1903
1904 # Resolve prefixes in attribute and tag text.
1905 if qname_text is not None:
1906 qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
1907 qnames.add(qname)
1908 if self._find_qname_aware_attrs is not None and attrs:
1909 qattrs = self._find_qname_aware_attrs(attrs)
1910 if qattrs:
1911 for attr_name in qattrs:
1912 value = attrs[attr_name]
1913 if _looks_like_prefix_name(value):
1914 qname = resolved_names[value] = self._resolve_prefix_name(value)
1915 qnames.add(qname)
1916 else:
1917 qattrs = None
1918 else:
1919 qattrs = None
1920
1921 # Assign prefixes in lexicographical order of used URIs.
1922 parse_qname = self._qname
1923 parsed_qnames = {n: parse_qname(n) for n in sorted(
1924 qnames, key=lambda n: n.split('}', 1))}
1925
1926 # Write namespace declarations in prefix order ...
1927 if new_namespaces:
1928 attr_list = [
1929 ('xmlns:' + prefix if prefix else 'xmlns', uri)
1930 for uri, prefix in new_namespaces
1931 ]
1932 attr_list.sort()
1933 else:
1934 # almost always empty
1935 attr_list = []
1936
1937 # ... followed by attributes in URI+name order
1938 if attrs:
1939 for k, v in sorted(attrs.items()):
1940 if qattrs is not None and k in qattrs and v in resolved_names:
1941 v = parsed_qnames[resolved_names[v]][0]
1942 attr_qname, attr_name, uri = parsed_qnames[k]
1943 # No prefix for attributes in default ('') namespace.
1944 attr_list.append((attr_qname if uri else attr_name, v))
1945
1946 # Honour xml:space attributes.
1947 space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
1948 self._preserve_space.append(
1949 space_behaviour == 'preserve' if space_behaviour
1950 else self._preserve_space[-1])
1951
1952 # Write the tag.
1953 write = self._write
1954 write('<' + parsed_qnames[tag][0])
1955 if attr_list:
1956 write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
1957 write('>')
1958
1959 # Write the resolved qname text content.
1960 if qname_text is not None:
1961 write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
1962
1963 self._root_seen = True
1964 self._ns_stack.append([])
1965
1966 def end(self, tag):
1967 if self._ignored_depth:
1968 self._ignored_depth -= 1
1969 return
1970 if self._data:
1971 self._flush()
1972 self._write(f'</{self._qname(tag)[0]}>')
1973 self._preserve_space.pop()
1974 self._root_done = len(self._preserve_space) == 1
1975 self._declared_ns_stack.pop()
1976 self._ns_stack.pop()
1977
1978 def comment(self, text):
1979 if not self._with_comments:
1980 return
1981 if self._ignored_depth:
1982 return
1983 if self._root_done:
1984 self._write('\n')
1985 elif self._root_seen and self._data:
1986 self._flush()
1987 self._write(f'<!--{_escape_cdata_c14n(text)}-->')
1988 if not self._root_seen:
1989 self._write('\n')
1990
1991 def pi(self, target, data):
1992 if self._ignored_depth:
1993 return
1994 if self._root_done:
1995 self._write('\n')
1996 elif self._root_seen and self._data:
1997 self._flush()
1998 self._write(
1999 f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
2000 if not self._root_seen:
2001 self._write('\n')
2002
2003
2004def _escape_cdata_c14n(text):
2005 # escape character data
2006 try:
2007 # it's worth avoiding do-nothing calls for strings that are
2008 # shorter than 500 character, or so. assume that's, by far,
2009 # the most common case in most applications.
2010 if '&' in text:
2011 text = text.replace('&', '&amp;')
2012 if '<' in text:
2013 text = text.replace('<', '&lt;')
2014 if '>' in text:
2015 text = text.replace('>', '&gt;')
2016 if '\r' in text:
2017 text = text.replace('\r', '&#xD;')
2018 return text
2019 except (TypeError, AttributeError):
2020 _raise_serialization_error(text)
2021
2022
2023def _escape_attrib_c14n(text):
2024 # escape attribute value
2025 try:
2026 if '&' in text:
2027 text = text.replace('&', '&amp;')
2028 if '<' in text:
2029 text = text.replace('<', '&lt;')
2030 if '"' in text:
2031 text = text.replace('"', '&quot;')
2032 if '\t' in text:
2033 text = text.replace('\t', '&#x9;')
2034 if '\n' in text:
2035 text = text.replace('\n', '&#xA;')
2036 if '\r' in text:
2037 text = text.replace('\r', '&#xD;')
2038 return text
2039 except (TypeError, AttributeError):
2040 _raise_serialization_error(text)
2041
2042
2043# --------------------------------------------------------------------
2044
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002045# Import the C accelerators
2046try:
Eli Bendersky46955b22013-05-19 09:20:50 -07002047 # Element is going to be shadowed by the C implementation. We need to keep
2048 # the Python version of it accessible for some "creative" by external code
2049 # (see tests)
2050 _Element_Py = Element
2051
Stefan Behnel43851a22019-05-01 21:20:38 +02002052 # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002053 from _elementtree import *
Stefan Behnel43851a22019-05-01 21:20:38 +02002054 from _elementtree import _set_factories
Eli Benderskyc4e98a62013-05-19 09:24:43 -07002055except ImportError:
2056 pass
Stefan Behnel43851a22019-05-01 21:20:38 +02002057else:
2058 _set_factories(Comment, ProcessingInstruction)