blob: beb2d68803a52df3e518c64510663a969a27b91d [file] [log] [blame]
Eli Bendersky84fae782013-03-09 07:12:48 -08001"""Lightweight XML support for Python.
2
3 XML is an inherently hierarchical data format, and the most natural way to
4 represent it is with a tree. This module has two classes for this purpose:
5
6 1. ElementTree represents the whole XML document as a tree and
7
8 2. Element represents a single node in this tree.
9
10 Interactions with the whole document (reading and writing to/from files) are
11 usually done on the ElementTree level. Interactions with a single XML element
12 and its sub-elements are done on the Element level.
13
14 Element is a flexible container object designed to store hierarchical data
15 structures in memory. It can be described as a cross between a list and a
16 dictionary. Each Element has a number of properties associated with it:
17
18 'tag' - a string containing the element's name.
19
20 'attributes' - a Python dictionary storing the element's attributes.
21
22 'text' - a string containing the element's text content.
23
24 'tail' - an optional string containing text after the element's end tag.
25
26 And a number of child elements stored in a Python sequence.
27
28 To create an element instance, use the Element constructor,
29 or the SubElement factory function.
30
31 You can also use the ElementTree class to wrap an element structure
32 and convert it to and from XML.
33
34"""
35
Eli Benderskybf05df22013-04-20 05:44:01 -070036#---------------------------------------------------------------------
37# Licensed to PSF under a Contributor Agreement.
38# See http://www.python.org/psf/license for licensing details.
Armin Rigo9ed73062005-12-14 18:10:45 +000039#
40# ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +000041# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
Armin Rigo9ed73062005-12-14 18:10:45 +000042#
43# fredrik@pythonware.com
44# http://www.pythonware.com
Armin Rigo9ed73062005-12-14 18:10:45 +000045# --------------------------------------------------------------------
46# The ElementTree toolkit is
47#
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048# Copyright (c) 1999-2008 by Fredrik Lundh
Armin Rigo9ed73062005-12-14 18:10:45 +000049#
50# By obtaining, using, and/or copying this software and/or its
51# associated documentation, you agree that you have read, understood,
52# and will comply with the following terms and conditions:
53#
54# Permission to use, copy, modify, and distribute this software and
55# its associated documentation for any purpose and without fee is
56# hereby granted, provided that the above copyright notice appears in
57# all copies, and that both that copyright notice and this permission
58# notice appear in supporting documentation, and that the name of
59# Secret Labs AB or the author not be used in advertising or publicity
60# pertaining to distribution of the software without specific, written
61# prior permission.
62#
63# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
64# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
65# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
66# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
67# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
68# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
69# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
70# OF THIS SOFTWARE.
71# --------------------------------------------------------------------
72
73__all__ = [
74 # public symbols
75 "Comment",
76 "dump",
77 "Element", "ElementTree",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000078 "fromstring", "fromstringlist",
Stefan Behnelb5d3cee2019-08-23 16:44:25 +020079 "indent", "iselement", "iterparse",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000080 "parse", "ParseError",
Armin Rigo9ed73062005-12-14 18:10:45 +000081 "PI", "ProcessingInstruction",
82 "QName",
83 "SubElement",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000084 "tostring", "tostringlist",
Armin Rigo9ed73062005-12-14 18:10:45 +000085 "TreeBuilder",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000086 "VERSION",
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010087 "XML", "XMLID",
Martin Panterdcfebb32016-04-01 06:55:55 +000088 "XMLParser", "XMLPullParser",
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010089 "register_namespace",
Stefan Behnele1d5dd62019-05-01 22:34:13 +020090 "canonicalize", "C14NWriterTarget",
Armin Rigo9ed73062005-12-14 18:10:45 +000091 ]
92
Florent Xiclunaf15351d2010-03-13 23:24:31 +000093VERSION = "1.3.0"
94
Florent Xiclunaf15351d2010-03-13 23:24:31 +000095import sys
96import re
97import warnings
Eli Bendersky00f402b2012-07-15 06:02:22 +030098import io
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +020099import collections
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300100import collections.abc
Eli Bendersky00f402b2012-07-15 06:02:22 +0300101import contextlib
Armin Rigo9ed73062005-12-14 18:10:45 +0000102
Eli Bendersky27cbb192012-06-15 09:03:19 +0300103from . import ElementPath
Armin Rigo9ed73062005-12-14 18:10:45 +0000104
Armin Rigo9ed73062005-12-14 18:10:45 +0000105
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106class ParseError(SyntaxError):
Eli Bendersky84fae782013-03-09 07:12:48 -0800107 """An error when parsing an XML document.
108
109 In addition to its exception value, a ParseError contains
110 two extra attributes:
111 'code' - the specific exception code
112 'position' - the line and column of the error
113
114 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000115 pass
116
117# --------------------------------------------------------------------
118
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000119
120def iselement(element):
Eli Bendersky84fae782013-03-09 07:12:48 -0800121 """Return True if *element* appears to be an Element."""
Florent Xiclunaa72a98f2012-02-13 11:03:30 +0100122 return hasattr(element, 'tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000123
Armin Rigo9ed73062005-12-14 18:10:45 +0000124
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000125class Element:
Eli Bendersky84fae782013-03-09 07:12:48 -0800126 """An XML element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000127
Eli Bendersky84fae782013-03-09 07:12:48 -0800128 This class is the reference implementation of the Element interface.
129
130 An element's length is its number of subelements. That means if you
Serhiy Storchaka56a6d852014-12-01 18:28:43 +0200131 want to check if an element is truly empty, you should check BOTH
Eli Bendersky84fae782013-03-09 07:12:48 -0800132 its length AND its text attribute.
133
134 The element tag, attribute names, and attribute values can be either
135 bytes or strings.
136
137 *tag* is the element name. *attrib* is an optional dictionary containing
138 element attributes. *extra* are additional element attributes given as
139 keyword arguments.
140
141 Example form:
142 <tag attrib>text<child/>...</tag>tail
143
144 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000145
146 tag = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800147 """The element's name."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000148
149 attrib = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800150 """Dictionary of the element's attributes."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000151
152 text = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800153 """
154 Text before first subelement. This is either a string or the value None.
155 Note that if there is no text, this attribute may be either
156 None or the empty string, depending on the parser.
Armin Rigo9ed73062005-12-14 18:10:45 +0000157
Eli Bendersky84fae782013-03-09 07:12:48 -0800158 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000159
Eli Bendersky84fae782013-03-09 07:12:48 -0800160 tail = None
161 """
162 Text after this element's end tag, but before the next sibling element's
163 start tag. This is either a string or the value None. Note that if there
164 was no text, this attribute may be either None or an empty string,
165 depending on the parser.
Armin Rigo9ed73062005-12-14 18:10:45 +0000166
Eli Bendersky84fae782013-03-09 07:12:48 -0800167 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000168
169 def __init__(self, tag, attrib={}, **extra):
Eli Bendersky737b1732012-05-29 06:02:56 +0300170 if not isinstance(attrib, dict):
171 raise TypeError("attrib must be dict, not %s" % (
172 attrib.__class__.__name__,))
Armin Rigo9ed73062005-12-14 18:10:45 +0000173 self.tag = tag
Serhiy Storchakada084702019-03-27 08:02:28 +0200174 self.attrib = {**attrib, **extra}
Armin Rigo9ed73062005-12-14 18:10:45 +0000175 self._children = []
176
177 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300178 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
Armin Rigo9ed73062005-12-14 18:10:45 +0000179
Armin Rigo9ed73062005-12-14 18:10:45 +0000180 def makeelement(self, tag, attrib):
Eli Bendersky84fae782013-03-09 07:12:48 -0800181 """Create a new element with the same type.
182
183 *tag* is a string containing the element name.
184 *attrib* is a dictionary containing the element attributes.
185
186 Do not call this method, use the SubElement factory function instead.
187
188 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000189 return self.__class__(tag, attrib)
Armin Rigo9ed73062005-12-14 18:10:45 +0000190
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000191 def copy(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800192 """Return copy of current element.
193
194 This creates a shallow copy. Subelements will be shared with the
195 original tree.
196
197 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000198 elem = self.makeelement(self.tag, self.attrib)
199 elem.text = self.text
200 elem.tail = self.tail
201 elem[:] = self
202 return elem
203
Armin Rigo9ed73062005-12-14 18:10:45 +0000204 def __len__(self):
205 return len(self._children)
206
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000207 def __bool__(self):
208 warnings.warn(
209 "The behavior of this method will change in future versions. "
210 "Use specific 'len(elem)' or 'elem is not None' test instead.",
211 FutureWarning, stacklevel=2
212 )
213 return len(self._children) != 0 # emulate old behaviour, for now
214
Armin Rigo9ed73062005-12-14 18:10:45 +0000215 def __getitem__(self, index):
216 return self._children[index]
217
Armin Rigo9ed73062005-12-14 18:10:45 +0000218 def __setitem__(self, index, element):
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300219 if isinstance(index, slice):
220 for elt in element:
221 self._assert_is_element(elt)
222 else:
223 self._assert_is_element(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000224 self._children[index] = element
225
Armin Rigo9ed73062005-12-14 18:10:45 +0000226 def __delitem__(self, index):
227 del self._children[index]
228
Eli Bendersky84fae782013-03-09 07:12:48 -0800229 def append(self, subelement):
230 """Add *subelement* to the end of this element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000231
Eli Bendersky84fae782013-03-09 07:12:48 -0800232 The new element will appear in document order after the last existing
233 subelement (or directly after the text, if it's the first subelement),
234 but before the end tag for this element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000235
Eli Bendersky84fae782013-03-09 07:12:48 -0800236 """
237 self._assert_is_element(subelement)
238 self._children.append(subelement)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000239
240 def extend(self, elements):
Eli Bendersky84fae782013-03-09 07:12:48 -0800241 """Append subelements from a sequence.
242
243 *elements* is a sequence with zero or more elements.
244
245 """
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200246 for element in elements:
247 self._assert_is_element(element)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000248 self._children.extend(elements)
249
Eli Bendersky84fae782013-03-09 07:12:48 -0800250 def insert(self, index, subelement):
251 """Insert *subelement* at position *index*."""
252 self._assert_is_element(subelement)
253 self._children.insert(index, subelement)
Armin Rigo9ed73062005-12-14 18:10:45 +0000254
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200255 def _assert_is_element(self, e):
Antoine Pitrouee329312012-10-04 19:53:29 +0200256 # Need to refer to the actual Python implementation, not the
257 # shadowing C implementation.
Eli Bendersky46955b22013-05-19 09:20:50 -0700258 if not isinstance(e, _Element_Py):
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200259 raise TypeError('expected an Element, not %s' % type(e).__name__)
260
Eli Bendersky84fae782013-03-09 07:12:48 -0800261 def remove(self, subelement):
262 """Remove matching subelement.
263
264 Unlike the find methods, this method compares elements based on
265 identity, NOT ON tag value or contents. To remove subelements by
266 other means, the easiest way is to use a list comprehension to
267 select what elements to keep, and then use slice assignment to update
268 the parent element.
269
270 ValueError is raised if a matching element could not be found.
271
272 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000273 # assert iselement(element)
Eli Bendersky84fae782013-03-09 07:12:48 -0800274 self._children.remove(subelement)
Armin Rigo9ed73062005-12-14 18:10:45 +0000275
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000276 def find(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800277 """Find first matching element by tag name or path.
278
279 *path* is a string having either an element tag or an XPath,
280 *namespaces* is an optional mapping from namespace prefix to full name.
281
282 Return the first matching element, or None if no element was found.
283
284 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000285 return ElementPath.find(self, path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000286
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000287 def findtext(self, path, default=None, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800288 """Find text for first matching element by tag name or path.
289
290 *path* is a string having either an element tag or an XPath,
291 *default* is the value to return if the element was not found,
292 *namespaces* is an optional mapping from namespace prefix to full name.
293
294 Return text content of first matching element, or default value if
295 none was found. Note that if an element is found having no text
296 content, the empty string is returned.
297
298 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000299 return ElementPath.findtext(self, path, default, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000300
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000301 def findall(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800302 """Find all matching subelements by tag name or path.
303
304 *path* is a string having either an element tag or an XPath,
305 *namespaces* is an optional mapping from namespace prefix to full name.
306
307 Returns list containing all matching elements in document order.
308
309 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000310 return ElementPath.findall(self, path, namespaces)
311
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000312 def iterfind(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800313 """Find all matching subelements by tag name or path.
314
315 *path* is a string having either an element tag or an XPath,
316 *namespaces* is an optional mapping from namespace prefix to full name.
317
318 Return an iterable yielding all matching elements in document order.
319
320 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000321 return ElementPath.iterfind(self, path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000322
Armin Rigo9ed73062005-12-14 18:10:45 +0000323 def clear(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800324 """Reset element.
325
326 This function removes all subelements, clears all attributes, and sets
327 the text and tail attributes to None.
328
329 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000330 self.attrib.clear()
331 self._children = []
332 self.text = self.tail = None
333
Armin Rigo9ed73062005-12-14 18:10:45 +0000334 def get(self, key, default=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800335 """Get element attribute.
336
337 Equivalent to attrib.get, but some implementations may handle this a
338 bit more efficiently. *key* is what attribute to look for, and
339 *default* is what to return if the attribute was not found.
340
341 Returns a string containing the attribute value, or the default if
342 attribute was not found.
343
344 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000345 return self.attrib.get(key, default)
346
Armin Rigo9ed73062005-12-14 18:10:45 +0000347 def set(self, key, value):
Eli Bendersky84fae782013-03-09 07:12:48 -0800348 """Set element attribute.
349
350 Equivalent to attrib[key] = value, but some implementations may handle
351 this a bit more efficiently. *key* is what attribute to set, and
352 *value* is the attribute value to set it to.
353
354 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000355 self.attrib[key] = value
356
Armin Rigo9ed73062005-12-14 18:10:45 +0000357 def keys(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800358 """Get list of attribute names.
359
360 Names are returned in an arbitrary order, just like an ordinary
361 Python dict. Equivalent to attrib.keys()
362
363 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000364 return self.attrib.keys()
365
Armin Rigo9ed73062005-12-14 18:10:45 +0000366 def items(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800367 """Get element attributes as a sequence.
368
369 The attributes are returned in arbitrary order. Equivalent to
370 attrib.items().
371
372 Return a list of (name, value) tuples.
373
374 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000375 return self.attrib.items()
376
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000377 def iter(self, tag=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800378 """Create tree iterator.
379
380 The iterator loops over the element and all subelements in document
381 order, returning all elements with a matching tag.
382
383 If the tree structure is modified during iteration, new or removed
384 elements may or may not be included. To get a stable set, use the
385 list() function on the iterator, and loop over the resulting list.
386
387 *tag* is what tags to look for (default is to return all elements)
388
389 Return an iterator containing all the matching elements.
390
391 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000392 if tag == "*":
393 tag = None
394 if tag is None or self.tag == tag:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000395 yield self
396 for e in self._children:
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700397 yield from e.iter(tag)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000398
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000399 def itertext(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800400 """Create text iterator.
401
402 The iterator loops over the element and all subelements in document
403 order, returning all inner text.
404
405 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000406 tag = self.tag
407 if not isinstance(tag, str) and tag is not None:
408 return
Serhiy Storchaka66c08d92015-12-21 11:09:48 +0200409 t = self.text
410 if t:
411 yield t
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000412 for e in self:
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700413 yield from e.itertext()
Serhiy Storchaka66c08d92015-12-21 11:09:48 +0200414 t = e.tail
415 if t:
416 yield t
Armin Rigo9ed73062005-12-14 18:10:45 +0000417
Armin Rigo9ed73062005-12-14 18:10:45 +0000418
419def SubElement(parent, tag, attrib={}, **extra):
Eli Bendersky84fae782013-03-09 07:12:48 -0800420 """Subelement factory which creates an element instance, and appends it
421 to an existing parent.
422
423 The element tag, attribute names, and attribute values can be either
424 bytes or Unicode strings.
425
426 *parent* is the parent element, *tag* is the subelements name, *attrib* is
427 an optional directory containing element attributes, *extra* are
428 additional attributes given as keyword arguments.
429
430 """
Serhiy Storchakada084702019-03-27 08:02:28 +0200431 attrib = {**attrib, **extra}
Armin Rigo9ed73062005-12-14 18:10:45 +0000432 element = parent.makeelement(tag, attrib)
433 parent.append(element)
434 return element
435
Armin Rigo9ed73062005-12-14 18:10:45 +0000436
437def Comment(text=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800438 """Comment element factory.
439
440 This function creates a special element which the standard serializer
441 serializes as an XML comment.
442
443 *text* is a string containing the comment string.
444
445 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000446 element = Element(Comment)
447 element.text = text
448 return element
449
Armin Rigo9ed73062005-12-14 18:10:45 +0000450
451def ProcessingInstruction(target, text=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800452 """Processing Instruction element factory.
453
454 This function creates a special element which the standard serializer
455 serializes as an XML comment.
456
457 *target* is a string containing the processing instruction, *text* is a
458 string containing the processing instruction contents, if any.
459
460 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000461 element = Element(ProcessingInstruction)
462 element.text = target
463 if text:
464 element.text = element.text + " " + text
465 return element
466
467PI = ProcessingInstruction
468
Armin Rigo9ed73062005-12-14 18:10:45 +0000469
470class QName:
Eli Bendersky84fae782013-03-09 07:12:48 -0800471 """Qualified name wrapper.
472
473 This class can be used to wrap a QName attribute value in order to get
474 proper namespace handing on output.
475
476 *text_or_uri* is a string containing the QName value either in the form
477 {uri}local, or if the tag argument is given, the URI part of a QName.
478
479 *tag* is an optional argument which if given, will make the first
480 argument (text_or_uri) be interpreted as a URI, and this argument (tag)
481 be interpreted as a local name.
482
483 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000484 def __init__(self, text_or_uri, tag=None):
485 if tag:
486 text_or_uri = "{%s}%s" % (text_or_uri, tag)
487 self.text = text_or_uri
488 def __str__(self):
489 return self.text
Georg Brandlb56c0e22010-12-09 18:10:27 +0000490 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300491 return '<%s %r>' % (self.__class__.__name__, self.text)
Armin Rigo9ed73062005-12-14 18:10:45 +0000492 def __hash__(self):
493 return hash(self.text)
Mark Dickinsona56c4672009-01-27 18:17:45 +0000494 def __le__(self, other):
Armin Rigo9ed73062005-12-14 18:10:45 +0000495 if isinstance(other, QName):
Mark Dickinsona56c4672009-01-27 18:17:45 +0000496 return self.text <= other.text
497 return self.text <= other
498 def __lt__(self, other):
499 if isinstance(other, QName):
500 return self.text < other.text
501 return self.text < other
502 def __ge__(self, other):
503 if isinstance(other, QName):
504 return self.text >= other.text
505 return self.text >= other
506 def __gt__(self, other):
507 if isinstance(other, QName):
508 return self.text > other.text
509 return self.text > other
510 def __eq__(self, other):
511 if isinstance(other, QName):
512 return self.text == other.text
513 return self.text == other
Armin Rigo9ed73062005-12-14 18:10:45 +0000514
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000515# --------------------------------------------------------------------
516
Armin Rigo9ed73062005-12-14 18:10:45 +0000517
518class ElementTree:
Eli Bendersky84fae782013-03-09 07:12:48 -0800519 """An XML element hierarchy.
Armin Rigo9ed73062005-12-14 18:10:45 +0000520
Eli Bendersky84fae782013-03-09 07:12:48 -0800521 This class also provides support for serialization to and from
522 standard XML.
523
524 *element* is an optional root element node,
525 *file* is an optional file handle or file name of an XML file whose
526 contents will be used to initialize the tree with.
527
528 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000529 def __init__(self, element=None, file=None):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000530 # assert element is None or iselement(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000531 self._root = element # first node
532 if file:
533 self.parse(file)
534
Armin Rigo9ed73062005-12-14 18:10:45 +0000535 def getroot(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800536 """Return root element of this tree."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000537 return self._root
538
Armin Rigo9ed73062005-12-14 18:10:45 +0000539 def _setroot(self, element):
Eli Bendersky84fae782013-03-09 07:12:48 -0800540 """Replace root element of this tree.
541
542 This will discard the current contents of the tree and replace it
543 with the given element. Use with care!
544
545 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000546 # assert iselement(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000547 self._root = element
548
Armin Rigo9ed73062005-12-14 18:10:45 +0000549 def parse(self, source, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800550 """Load external XML document into element tree.
551
552 *source* is a file name or file object, *parser* is an optional parser
553 instance that defaults to XMLParser.
554
555 ParseError is raised if the parser fails to parse the document.
556
557 Returns the root element of the given source document.
558
559 """
Antoine Pitroue033e062010-10-29 10:38:18 +0000560 close_source = False
Armin Rigo9ed73062005-12-14 18:10:45 +0000561 if not hasattr(source, "read"):
562 source = open(source, "rb")
Antoine Pitroue033e062010-10-29 10:38:18 +0000563 close_source = True
564 try:
Eli Benderskya3699232013-05-19 18:47:23 -0700565 if parser is None:
566 # If no parser was specified, create a default XMLParser
567 parser = XMLParser()
568 if hasattr(parser, '_parse_whole'):
569 # The default XMLParser, when it comes from an accelerator,
570 # can define an internal _parse_whole API for efficiency.
571 # It can be used to parse the whole source without feeding
572 # it with chunks.
573 self._root = parser._parse_whole(source)
574 return self._root
575 while True:
Antoine Pitroue033e062010-10-29 10:38:18 +0000576 data = source.read(65536)
577 if not data:
578 break
579 parser.feed(data)
580 self._root = parser.close()
581 return self._root
582 finally:
583 if close_source:
584 source.close()
Armin Rigo9ed73062005-12-14 18:10:45 +0000585
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000586 def iter(self, tag=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800587 """Create and return tree iterator for the root element.
588
589 The iterator loops over all elements in this tree, in document order.
590
591 *tag* is a string with the tag name to iterate over
592 (default is to return all elements).
593
594 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000595 # assert self._root is not None
596 return self._root.iter(tag)
597
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000598 def find(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800599 """Find first matching element by tag name or path.
600
601 Same as getroot().find(path), which is Element.find()
602
603 *path* is a string having either an element tag or an XPath,
604 *namespaces* is an optional mapping from namespace prefix to full name.
605
606 Return the first matching element, or None if no element was found.
607
608 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000609 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000610 if path[:1] == "/":
611 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000612 warnings.warn(
613 "This search is broken in 1.3 and earlier, and will be "
614 "fixed in a future version. If you rely on the current "
615 "behaviour, change it to %r" % path,
616 FutureWarning, stacklevel=2
617 )
618 return self._root.find(path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000619
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000620 def findtext(self, path, default=None, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800621 """Find first matching element by tag name or path.
622
623 Same as getroot().findtext(path), which is Element.findtext()
624
625 *path* is a string having either an element tag or an XPath,
626 *namespaces* is an optional mapping from namespace prefix to full name.
627
628 Return the first matching element, or None if no element was found.
629
630 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000631 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000632 if path[:1] == "/":
633 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000634 warnings.warn(
635 "This search is broken in 1.3 and earlier, and will be "
636 "fixed in a future version. If you rely on the current "
637 "behaviour, change it to %r" % path,
638 FutureWarning, stacklevel=2
639 )
640 return self._root.findtext(path, default, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000641
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000642 def findall(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800643 """Find all matching subelements by tag name or path.
644
645 Same as getroot().findall(path), which is Element.findall().
646
647 *path* is a string having either an element tag or an XPath,
648 *namespaces* is an optional mapping from namespace prefix to full name.
649
650 Return list containing all matching elements in document order.
651
652 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000653 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000654 if path[:1] == "/":
655 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000656 warnings.warn(
657 "This search is broken in 1.3 and earlier, and will be "
658 "fixed in a future version. If you rely on the current "
659 "behaviour, change it to %r" % path,
660 FutureWarning, stacklevel=2
661 )
662 return self._root.findall(path, namespaces)
663
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000664 def iterfind(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800665 """Find all matching subelements by tag name or path.
666
667 Same as getroot().iterfind(path), which is element.iterfind()
668
669 *path* is a string having either an element tag or an XPath,
670 *namespaces* is an optional mapping from namespace prefix to full name.
671
672 Return an iterable yielding all matching elements in document order.
673
674 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000675 # assert self._root is not None
676 if path[:1] == "/":
677 path = "." + path
678 warnings.warn(
679 "This search is broken in 1.3 and earlier, and will be "
680 "fixed in a future version. If you rely on the current "
681 "behaviour, change it to %r" % path,
682 FutureWarning, stacklevel=2
683 )
684 return self._root.iterfind(path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000685
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000686 def write(self, file_or_filename,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000687 encoding=None,
688 xml_declaration=None,
689 default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800690 method=None, *,
691 short_empty_elements=True):
Eli Bendersky84fae782013-03-09 07:12:48 -0800692 """Write element tree to a file as XML.
693
694 Arguments:
695 *file_or_filename* -- file name or a file object opened for writing
696
697 *encoding* -- the output encoding (default: US-ASCII)
698
699 *xml_declaration* -- bool indicating if an XML declaration should be
700 added to the output. If None, an XML declaration
701 is added if encoding IS NOT either of:
702 US-ASCII, UTF-8, or Unicode
703
704 *default_namespace* -- sets the default XML namespace (for "xmlns")
705
706 *method* -- either "xml" (default), "html, "text", or "c14n"
707
708 *short_empty_elements* -- controls the formatting of elements
709 that contain no content. If True (default)
710 they are emitted as a single self-closed
711 tag, otherwise they are emitted as a pair
712 of start/end tags
Eli Benderskye9af8272013-01-13 06:27:51 -0800713
714 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000715 if not method:
716 method = "xml"
717 elif method not in _serialize:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000718 raise ValueError("unknown method %r" % method)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000719 if not encoding:
720 if method == "c14n":
721 encoding = "utf-8"
722 else:
723 encoding = "us-ascii"
Martin Panter89f76d32015-09-23 01:14:35 +0000724 enc_lower = encoding.lower()
725 with _get_writer(file_or_filename, enc_lower) as write:
Eli Bendersky00f402b2012-07-15 06:02:22 +0300726 if method == "xml" and (xml_declaration or
727 (xml_declaration is None and
Martin Panter89f76d32015-09-23 01:14:35 +0000728 enc_lower not in ("utf-8", "us-ascii", "unicode"))):
Eli Bendersky00f402b2012-07-15 06:02:22 +0300729 declared_encoding = encoding
Martin Panter89f76d32015-09-23 01:14:35 +0000730 if enc_lower == "unicode":
Eli Bendersky00f402b2012-07-15 06:02:22 +0300731 # Retrieve the default encoding for the xml declaration
732 import locale
733 declared_encoding = locale.getpreferredencoding()
734 write("<?xml version='1.0' encoding='%s'?>\n" % (
735 declared_encoding,))
736 if method == "text":
737 _serialize_text(write, self._root)
Armin Rigo9ed73062005-12-14 18:10:45 +0000738 else:
Eli Bendersky00f402b2012-07-15 06:02:22 +0300739 qnames, namespaces = _namespaces(self._root, default_namespace)
740 serialize = _serialize[method]
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800741 serialize(write, self._root, qnames, namespaces,
742 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000743
744 def write_c14n(self, file):
745 # lxml.etree compatibility. use output method instead
746 return self.write(file, method="c14n")
Armin Rigo9ed73062005-12-14 18:10:45 +0000747
748# --------------------------------------------------------------------
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000749# serialization support
750
Eli Bendersky00f402b2012-07-15 06:02:22 +0300751@contextlib.contextmanager
752def _get_writer(file_or_filename, encoding):
Ezio Melottib5bc3532013-08-17 16:11:40 +0300753 # returns text write method and release all resources after using
Eli Bendersky00f402b2012-07-15 06:02:22 +0300754 try:
755 write = file_or_filename.write
756 except AttributeError:
757 # file_or_filename is a file name
758 if encoding == "unicode":
759 file = open(file_or_filename, "w")
760 else:
761 file = open(file_or_filename, "w", encoding=encoding,
762 errors="xmlcharrefreplace")
763 with file:
764 yield file.write
765 else:
766 # file_or_filename is a file-like object
767 # encoding determines if it is a text or binary writer
768 if encoding == "unicode":
769 # use a text writer as is
770 yield write
771 else:
772 # wrap a binary writer with TextIOWrapper
773 with contextlib.ExitStack() as stack:
774 if isinstance(file_or_filename, io.BufferedIOBase):
775 file = file_or_filename
776 elif isinstance(file_or_filename, io.RawIOBase):
777 file = io.BufferedWriter(file_or_filename)
778 # Keep the original file open when the BufferedWriter is
779 # destroyed
780 stack.callback(file.detach)
781 else:
782 # This is to handle passed objects that aren't in the
783 # IOBase hierarchy, but just have a write method
784 file = io.BufferedIOBase()
785 file.writable = lambda: True
786 file.write = write
787 try:
788 # TextIOWrapper uses this methods to determine
789 # if BOM (for UTF-16, etc) should be added
790 file.seekable = file_or_filename.seekable
791 file.tell = file_or_filename.tell
792 except AttributeError:
793 pass
794 file = io.TextIOWrapper(file,
795 encoding=encoding,
796 errors="xmlcharrefreplace",
797 newline="\n")
798 # Keep the original file open when the TextIOWrapper is
799 # destroyed
800 stack.callback(file.detach)
801 yield file.write
802
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803def _namespaces(elem, default_namespace=None):
804 # identify namespaces used in this tree
805
806 # maps qnames to *encoded* prefix:local names
807 qnames = {None: None}
808
809 # maps uri:s to prefixes
810 namespaces = {}
811 if default_namespace:
812 namespaces[default_namespace] = ""
813
814 def add_qname(qname):
815 # calculate serialized qname representation
816 try:
817 if qname[:1] == "{":
818 uri, tag = qname[1:].rsplit("}", 1)
819 prefix = namespaces.get(uri)
820 if prefix is None:
821 prefix = _namespace_map.get(uri)
822 if prefix is None:
823 prefix = "ns%d" % len(namespaces)
824 if prefix != "xml":
825 namespaces[uri] = prefix
826 if prefix:
827 qnames[qname] = "%s:%s" % (prefix, tag)
828 else:
829 qnames[qname] = tag # default element
830 else:
831 if default_namespace:
832 # FIXME: can this be handled in XML 1.0?
833 raise ValueError(
834 "cannot use non-qualified names with "
835 "default_namespace option"
836 )
837 qnames[qname] = qname
838 except TypeError:
839 _raise_serialization_error(qname)
840
841 # populate qname and namespaces table
Eli Bendersky64d11e62012-06-15 07:42:50 +0300842 for elem in elem.iter():
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000843 tag = elem.tag
Senthil Kumaranec30b3d2010-11-09 02:36:59 +0000844 if isinstance(tag, QName):
845 if tag.text not in qnames:
846 add_qname(tag.text)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000847 elif isinstance(tag, str):
848 if tag not in qnames:
849 add_qname(tag)
850 elif tag is not None and tag is not Comment and tag is not PI:
851 _raise_serialization_error(tag)
852 for key, value in elem.items():
853 if isinstance(key, QName):
854 key = key.text
855 if key not in qnames:
856 add_qname(key)
857 if isinstance(value, QName) and value.text not in qnames:
858 add_qname(value.text)
859 text = elem.text
860 if isinstance(text, QName) and text.text not in qnames:
861 add_qname(text.text)
862 return qnames, namespaces
863
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800864def _serialize_xml(write, elem, qnames, namespaces,
865 short_empty_elements, **kwargs):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000866 tag = elem.tag
867 text = elem.text
868 if tag is Comment:
869 write("<!--%s-->" % text)
870 elif tag is ProcessingInstruction:
871 write("<?%s?>" % text)
872 else:
873 tag = qnames[tag]
874 if tag is None:
875 if text:
876 write(_escape_cdata(text))
877 for e in elem:
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800878 _serialize_xml(write, e, qnames, None,
879 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000880 else:
881 write("<" + tag)
882 items = list(elem.items())
883 if items or namespaces:
884 if namespaces:
885 for v, k in sorted(namespaces.items(),
886 key=lambda x: x[1]): # sort on prefix
887 if k:
888 k = ":" + k
889 write(" xmlns%s=\"%s\"" % (
890 k,
891 _escape_attrib(v)
892 ))
Raymond Hettingere3685fd2018-10-28 11:18:22 -0700893 for k, v in items:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000894 if isinstance(k, QName):
895 k = k.text
896 if isinstance(v, QName):
897 v = qnames[v.text]
898 else:
899 v = _escape_attrib(v)
900 write(" %s=\"%s\"" % (qnames[k], v))
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800901 if text or len(elem) or not short_empty_elements:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000902 write(">")
903 if text:
904 write(_escape_cdata(text))
905 for e in elem:
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800906 _serialize_xml(write, e, qnames, None,
907 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000908 write("</" + tag + ">")
909 else:
910 write(" />")
911 if elem.tail:
912 write(_escape_cdata(elem.tail))
913
914HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
Ezio Melottic90111f2012-09-19 08:19:12 +0300915 "img", "input", "isindex", "link", "meta", "param")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000916
917try:
918 HTML_EMPTY = set(HTML_EMPTY)
919except NameError:
920 pass
921
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800922def _serialize_html(write, elem, qnames, namespaces, **kwargs):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000923 tag = elem.tag
924 text = elem.text
925 if tag is Comment:
926 write("<!--%s-->" % _escape_cdata(text))
927 elif tag is ProcessingInstruction:
928 write("<?%s?>" % _escape_cdata(text))
929 else:
930 tag = qnames[tag]
931 if tag is None:
932 if text:
933 write(_escape_cdata(text))
934 for e in elem:
935 _serialize_html(write, e, qnames, None)
936 else:
937 write("<" + tag)
938 items = list(elem.items())
939 if items or namespaces:
940 if namespaces:
941 for v, k in sorted(namespaces.items(),
942 key=lambda x: x[1]): # sort on prefix
943 if k:
944 k = ":" + k
945 write(" xmlns%s=\"%s\"" % (
946 k,
947 _escape_attrib(v)
948 ))
Serhiy Storchaka3b05ad72018-10-29 19:31:04 +0200949 for k, v in items:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000950 if isinstance(k, QName):
951 k = k.text
952 if isinstance(v, QName):
953 v = qnames[v.text]
954 else:
955 v = _escape_attrib_html(v)
956 # FIXME: handle boolean attributes
957 write(" %s=\"%s\"" % (qnames[k], v))
958 write(">")
Christian Heimes54ad7e32013-07-05 01:39:49 +0200959 ltag = tag.lower()
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000960 if text:
Christian Heimes54ad7e32013-07-05 01:39:49 +0200961 if ltag == "script" or ltag == "style":
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000962 write(text)
963 else:
964 write(_escape_cdata(text))
965 for e in elem:
966 _serialize_html(write, e, qnames, None)
Christian Heimes54ad7e32013-07-05 01:39:49 +0200967 if ltag not in HTML_EMPTY:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000968 write("</" + tag + ">")
969 if elem.tail:
970 write(_escape_cdata(elem.tail))
971
972def _serialize_text(write, elem):
973 for part in elem.itertext():
974 write(part)
975 if elem.tail:
976 write(elem.tail)
977
978_serialize = {
979 "xml": _serialize_xml,
980 "html": _serialize_html,
981 "text": _serialize_text,
982# this optional method is imported at the end of the module
983# "c14n": _serialize_c14n,
984}
Armin Rigo9ed73062005-12-14 18:10:45 +0000985
Armin Rigo9ed73062005-12-14 18:10:45 +0000986
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000987def register_namespace(prefix, uri):
Eli Bendersky84fae782013-03-09 07:12:48 -0800988 """Register a namespace prefix.
989
990 The registry is global, and any existing mapping for either the
991 given prefix or the namespace URI will be removed.
992
993 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
994 attributes in this namespace will be serialized with prefix if possible.
995
996 ValueError is raised if prefix is reserved or is invalid.
997
998 """
R David Murray44b548d2016-09-08 13:59:53 -0400999 if re.match(r"ns\d+$", prefix):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001000 raise ValueError("Prefix format reserved for internal use")
Georg Brandl90b20672010-12-28 10:38:33 +00001001 for k, v in list(_namespace_map.items()):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001002 if k == uri or v == prefix:
1003 del _namespace_map[k]
1004 _namespace_map[uri] = prefix
1005
1006_namespace_map = {
1007 # "well-known" namespace prefixes
1008 "http://www.w3.org/XML/1998/namespace": "xml",
1009 "http://www.w3.org/1999/xhtml": "html",
1010 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1011 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1012 # xml schema
1013 "http://www.w3.org/2001/XMLSchema": "xs",
1014 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1015 # dublin core
1016 "http://purl.org/dc/elements/1.1/": "dc",
1017}
Florent Xicluna16395052012-02-16 23:28:35 +01001018# For tests and troubleshooting
1019register_namespace._namespace_map = _namespace_map
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001020
1021def _raise_serialization_error(text):
1022 raise TypeError(
1023 "cannot serialize %r (type %s)" % (text, type(text).__name__)
1024 )
1025
1026def _escape_cdata(text):
1027 # escape character data
1028 try:
1029 # it's worth avoiding do-nothing calls for strings that are
Mike53f7a7c2017-12-14 14:04:53 +03001030 # shorter than 500 characters, or so. assume that's, by far,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001031 # the most common case in most applications.
1032 if "&" in text:
1033 text = text.replace("&", "&amp;")
1034 if "<" in text:
1035 text = text.replace("<", "&lt;")
1036 if ">" in text:
1037 text = text.replace(">", "&gt;")
1038 return text
1039 except (TypeError, AttributeError):
1040 _raise_serialization_error(text)
1041
1042def _escape_attrib(text):
1043 # escape attribute value
1044 try:
1045 if "&" in text:
1046 text = text.replace("&", "&amp;")
1047 if "<" in text:
1048 text = text.replace("<", "&lt;")
1049 if ">" in text:
1050 text = text.replace(">", "&gt;")
1051 if "\"" in text:
1052 text = text.replace("\"", "&quot;")
Raymond Hettinger076366c2016-09-11 23:18:03 -07001053 # The following business with carriage returns is to satisfy
Raymond Hettinger11fa3ff2016-09-11 23:23:24 -07001054 # Section 2.11 of the XML specification, stating that
Raymond Hettinger076366c2016-09-11 23:18:03 -07001055 # CR or CR LN should be replaced with just LN
1056 # http://www.w3.org/TR/REC-xml/#sec-line-ends
1057 if "\r\n" in text:
1058 text = text.replace("\r\n", "\n")
1059 if "\r" in text:
1060 text = text.replace("\r", "\n")
1061 #The following four lines are issue 17582
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001062 if "\n" in text:
1063 text = text.replace("\n", "&#10;")
Raymond Hettinger076366c2016-09-11 23:18:03 -07001064 if "\t" in text:
1065 text = text.replace("\t", "&#09;")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001066 return text
1067 except (TypeError, AttributeError):
1068 _raise_serialization_error(text)
1069
1070def _escape_attrib_html(text):
1071 # escape attribute value
1072 try:
1073 if "&" in text:
1074 text = text.replace("&", "&amp;")
1075 if ">" in text:
1076 text = text.replace(">", "&gt;")
1077 if "\"" in text:
1078 text = text.replace("\"", "&quot;")
1079 return text
1080 except (TypeError, AttributeError):
1081 _raise_serialization_error(text)
1082
1083# --------------------------------------------------------------------
1084
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001085def tostring(element, encoding=None, method=None, *,
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001086 xml_declaration=None, default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001087 short_empty_elements=True):
Eli Bendersky84fae782013-03-09 07:12:48 -08001088 """Generate string representation of XML element.
1089
1090 All subelements are included. If encoding is "unicode", a string
1091 is returned. Otherwise a bytestring is returned.
1092
1093 *element* is an Element instance, *encoding* is an optional output
1094 encoding defaulting to US-ASCII, *method* is an optional output which can
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001095 be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
1096 sets the default XML namespace (for "xmlns").
Eli Bendersky84fae782013-03-09 07:12:48 -08001097
1098 Returns an (optionally) encoded string containing the XML data.
1099
1100 """
Eli Bendersky00f402b2012-07-15 06:02:22 +03001101 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001102 ElementTree(element).write(stream, encoding,
1103 xml_declaration=xml_declaration,
1104 default_namespace=default_namespace,
1105 method=method,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001106 short_empty_elements=short_empty_elements)
Eli Bendersky00f402b2012-07-15 06:02:22 +03001107 return stream.getvalue()
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001108
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001109class _ListDataStream(io.BufferedIOBase):
Eli Bendersky84fae782013-03-09 07:12:48 -08001110 """An auxiliary stream accumulating into a list reference."""
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001111 def __init__(self, lst):
1112 self.lst = lst
Eli Benderskyf90fc682012-07-17 15:09:56 +03001113
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001114 def writable(self):
1115 return True
1116
1117 def seekable(self):
1118 return True
1119
1120 def write(self, b):
1121 self.lst.append(b)
1122
1123 def tell(self):
1124 return len(self.lst)
1125
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001126def tostringlist(element, encoding=None, method=None, *,
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001127 xml_declaration=None, default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001128 short_empty_elements=True):
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001129 lst = []
1130 stream = _ListDataStream(lst)
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001131 ElementTree(element).write(stream, encoding,
1132 xml_declaration=xml_declaration,
1133 default_namespace=default_namespace,
1134 method=method,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001135 short_empty_elements=short_empty_elements)
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001136 return lst
Armin Rigo9ed73062005-12-14 18:10:45 +00001137
Armin Rigo9ed73062005-12-14 18:10:45 +00001138
1139def dump(elem):
Eli Bendersky84fae782013-03-09 07:12:48 -08001140 """Write element tree or element structure to sys.stdout.
1141
1142 This function should be used for debugging only.
1143
1144 *elem* is either an ElementTree, or a single Element. The exact output
1145 format is implementation dependent. In this version, it's written as an
1146 ordinary XML file.
1147
1148 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001149 # debugging
1150 if not isinstance(elem, ElementTree):
1151 elem = ElementTree(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +00001152 elem.write(sys.stdout, encoding="unicode")
Armin Rigo9ed73062005-12-14 18:10:45 +00001153 tail = elem.getroot().tail
1154 if not tail or tail[-1] != "\n":
1155 sys.stdout.write("\n")
1156
Stefan Behnelb5d3cee2019-08-23 16:44:25 +02001157
1158def indent(tree, space=" ", level=0):
1159 """Indent an XML document by inserting newlines and indentation space
1160 after elements.
1161
1162 *tree* is the ElementTree or Element to modify. The (root) element
1163 itself will not be changed, but the tail text of all elements in its
1164 subtree will be adapted.
1165
1166 *space* is the whitespace to insert for each indentation level, two
1167 space characters by default.
1168
1169 *level* is the initial indentation level. Setting this to a higher
1170 value than 0 can be used for indenting subtrees that are more deeply
1171 nested inside of a document.
1172 """
1173 if isinstance(tree, ElementTree):
1174 tree = tree.getroot()
1175 if level < 0:
1176 raise ValueError(f"Initial indentation level must be >= 0, got {level}")
1177 if not len(tree):
1178 return
1179
1180 # Reduce the memory consumption by reusing indentation strings.
1181 indentations = ["\n" + level * space]
1182
1183 def _indent_children(elem, level):
1184 # Start a new indentation level for the first child.
1185 child_level = level + 1
1186 try:
1187 child_indentation = indentations[child_level]
1188 except IndexError:
1189 child_indentation = indentations[level] + space
1190 indentations.append(child_indentation)
1191
1192 if not elem.text or not elem.text.strip():
1193 elem.text = child_indentation
1194
1195 for child in elem:
1196 if len(child):
1197 _indent_children(child, child_level)
1198 if not child.tail or not child.tail.strip():
1199 child.tail = child_indentation
1200
1201 # Dedent after the last child by overwriting the previous indentation.
1202 if not child.tail.strip():
1203 child.tail = indentations[level]
1204
1205 _indent_children(tree, 0)
1206
1207
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001208# --------------------------------------------------------------------
1209# parsing
Armin Rigo9ed73062005-12-14 18:10:45 +00001210
Armin Rigo9ed73062005-12-14 18:10:45 +00001211
1212def parse(source, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001213 """Parse XML document into element tree.
1214
1215 *source* is a filename or file object containing XML data,
1216 *parser* is an optional parser instance defaulting to XMLParser.
1217
1218 Return an ElementTree instance.
1219
1220 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001221 tree = ElementTree()
1222 tree.parse(source, parser)
1223 return tree
1224
Armin Rigo9ed73062005-12-14 18:10:45 +00001225
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001226def iterparse(source, events=None, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001227 """Incrementally parse XML document into ElementTree.
1228
1229 This class also reports what's going on to the user based on the
1230 *events* it is initialized with. The supported events are the strings
1231 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1232 detailed namespace information). If *events* is omitted, only
1233 "end" events are reported.
1234
1235 *source* is a filename or file object containing XML data, *events* is
1236 a list of events to report back, *parser* is an optional parser instance.
1237
1238 Returns an iterator providing (event, elem) pairs.
1239
1240 """
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001241 # Use the internal, undocumented _parser argument for now; When the
1242 # parser argument of iterparse is removed, this can be killed.
1243 pullparser = XMLPullParser(events=events, _parser=parser)
1244 def iterator():
1245 try:
1246 while True:
1247 yield from pullparser.read_events()
1248 # load event buffer
1249 data = source.read(16 * 1024)
1250 if not data:
1251 break
1252 pullparser.feed(data)
1253 root = pullparser._close_and_return_root()
1254 yield from pullparser.read_events()
1255 it.root = root
1256 finally:
1257 if close_source:
1258 source.close()
1259
Serhiy Storchaka2e576f52017-04-24 09:05:00 +03001260 class IterParseIterator(collections.abc.Iterator):
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001261 __next__ = iterator().__next__
1262 it = IterParseIterator()
1263 it.root = None
1264 del iterator, IterParseIterator
1265
Antoine Pitroue033e062010-10-29 10:38:18 +00001266 close_source = False
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001267 if not hasattr(source, "read"):
1268 source = open(source, "rb")
Antoine Pitroue033e062010-10-29 10:38:18 +00001269 close_source = True
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001270
1271 return it
Armin Rigo9ed73062005-12-14 18:10:45 +00001272
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001273
Eli Benderskyb5869342013-08-30 05:51:20 -07001274class XMLPullParser:
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001275
Eli Benderskyb5869342013-08-30 05:51:20 -07001276 def __init__(self, events=None, *, _parser=None):
1277 # The _parser argument is for internal use only and must not be relied
1278 # upon in user code. It will be removed in a future release.
1279 # See http://bugs.python.org/issue17741 for more details.
1280
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001281 self._events_queue = collections.deque()
Eli Benderskyb5869342013-08-30 05:51:20 -07001282 self._parser = _parser or XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001283 # wire up the parser for event reporting
Armin Rigo9ed73062005-12-14 18:10:45 +00001284 if events is None:
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001285 events = ("end",)
1286 self._parser._setevents(self._events_queue, events)
1287
Eli Benderskyb5869342013-08-30 05:51:20 -07001288 def feed(self, data):
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001289 """Feed encoded data to parser."""
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001290 if self._parser is None:
Eli Benderskyb5869342013-08-30 05:51:20 -07001291 raise ValueError("feed() called after end of stream")
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001292 if data:
1293 try:
1294 self._parser.feed(data)
1295 except SyntaxError as exc:
1296 self._events_queue.append(exc)
1297
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001298 def _close_and_return_root(self):
1299 # iterparse needs this to set its root attribute properly :(
1300 root = self._parser.close()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001301 self._parser = None
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001302 return root
1303
1304 def close(self):
1305 """Finish feeding data to parser.
1306
1307 Unlike XMLParser, does not return the root element. Use
1308 read_events() to consume elements from XMLPullParser.
1309 """
1310 self._close_and_return_root()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001311
Eli Benderskyb5869342013-08-30 05:51:20 -07001312 def read_events(self):
R David Murray410d3202014-01-04 23:52:50 -05001313 """Return an iterator over currently available (event, elem) pairs.
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001314
1315 Events are consumed from the internal event queue as they are
1316 retrieved from the iterator.
1317 """
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001318 events = self._events_queue
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001319 while events:
1320 event = events.popleft()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001321 if isinstance(event, Exception):
1322 raise event
1323 else:
1324 yield event
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001325
1326
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001327def XML(text, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001328 """Parse XML document from string constant.
1329
1330 This function can be used to embed "XML Literals" in Python code.
1331
1332 *text* is a string containing XML data, *parser* is an
1333 optional parser instance, defaulting to the standard XMLParser.
1334
1335 Returns an Element instance.
1336
1337 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001338 if not parser:
1339 parser = XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001340 parser.feed(text)
1341 return parser.close()
1342
Armin Rigo9ed73062005-12-14 18:10:45 +00001343
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001344def XMLID(text, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001345 """Parse XML document from string constant for its IDs.
1346
1347 *text* is a string containing XML data, *parser* is an
1348 optional parser instance, defaulting to the standard XMLParser.
1349
1350 Returns an (Element, dict) tuple, in which the
1351 dict maps element id:s to elements.
1352
1353 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001354 if not parser:
1355 parser = XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001356 parser.feed(text)
1357 tree = parser.close()
1358 ids = {}
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001359 for elem in tree.iter():
Armin Rigo9ed73062005-12-14 18:10:45 +00001360 id = elem.get("id")
1361 if id:
1362 ids[id] = elem
1363 return tree, ids
1364
Victor Stinner765531d2013-03-26 01:11:54 +01001365# Parse XML document from string constant. Alias for XML().
Armin Rigo9ed73062005-12-14 18:10:45 +00001366fromstring = XML
Armin Rigo9ed73062005-12-14 18:10:45 +00001367
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001368def fromstringlist(sequence, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001369 """Parse XML document from sequence of string fragments.
1370
1371 *sequence* is a list of other sequence, *parser* is an optional parser
1372 instance, defaulting to the standard XMLParser.
1373
1374 Returns an Element instance.
1375
1376 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001377 if not parser:
1378 parser = XMLParser(target=TreeBuilder())
1379 for text in sequence:
1380 parser.feed(text)
1381 return parser.close()
1382
1383# --------------------------------------------------------------------
Armin Rigo9ed73062005-12-14 18:10:45 +00001384
Armin Rigo9ed73062005-12-14 18:10:45 +00001385
1386class TreeBuilder:
Eli Bendersky84fae782013-03-09 07:12:48 -08001387 """Generic element structure builder.
Armin Rigo9ed73062005-12-14 18:10:45 +00001388
Eli Bendersky84fae782013-03-09 07:12:48 -08001389 This builder converts a sequence of start, data, and end method
1390 calls to a well-formed element structure.
1391
1392 You can use this class to build an element structure using a custom XML
1393 parser, or a parser for some other XML-like format.
1394
1395 *element_factory* is an optional element factory which is called
1396 to create new Element instances, as necessary.
1397
Stefan Behnel43851a22019-05-01 21:20:38 +02001398 *comment_factory* is a factory to create comments to be used instead of
1399 the standard factory. If *insert_comments* is false (the default),
1400 comments will not be inserted into the tree.
1401
1402 *pi_factory* is a factory to create processing instructions to be used
1403 instead of the standard factory. If *insert_pis* is false (the default),
1404 processing instructions will not be inserted into the tree.
Eli Bendersky84fae782013-03-09 07:12:48 -08001405 """
Stefan Behnel43851a22019-05-01 21:20:38 +02001406 def __init__(self, element_factory=None, *,
1407 comment_factory=None, pi_factory=None,
1408 insert_comments=False, insert_pis=False):
Armin Rigo9ed73062005-12-14 18:10:45 +00001409 self._data = [] # data collector
1410 self._elem = [] # element stack
1411 self._last = None # last element
Stefan Behnel43851a22019-05-01 21:20:38 +02001412 self._root = None # root element
Armin Rigo9ed73062005-12-14 18:10:45 +00001413 self._tail = None # true if we're after an end tag
Stefan Behnel43851a22019-05-01 21:20:38 +02001414 if comment_factory is None:
1415 comment_factory = Comment
1416 self._comment_factory = comment_factory
1417 self.insert_comments = insert_comments
1418 if pi_factory is None:
1419 pi_factory = ProcessingInstruction
1420 self._pi_factory = pi_factory
1421 self.insert_pis = insert_pis
Armin Rigo9ed73062005-12-14 18:10:45 +00001422 if element_factory is None:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001423 element_factory = Element
Armin Rigo9ed73062005-12-14 18:10:45 +00001424 self._factory = element_factory
1425
Armin Rigo9ed73062005-12-14 18:10:45 +00001426 def close(self):
Eli Bendersky84fae782013-03-09 07:12:48 -08001427 """Flush builder buffers and return toplevel document Element."""
Armin Rigo9ed73062005-12-14 18:10:45 +00001428 assert len(self._elem) == 0, "missing end tags"
Stefan Behnel43851a22019-05-01 21:20:38 +02001429 assert self._root is not None, "missing toplevel element"
1430 return self._root
Armin Rigo9ed73062005-12-14 18:10:45 +00001431
1432 def _flush(self):
1433 if self._data:
1434 if self._last is not None:
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001435 text = "".join(self._data)
Armin Rigo9ed73062005-12-14 18:10:45 +00001436 if self._tail:
1437 assert self._last.tail is None, "internal error (tail)"
1438 self._last.tail = text
1439 else:
1440 assert self._last.text is None, "internal error (text)"
1441 self._last.text = text
1442 self._data = []
1443
Armin Rigo9ed73062005-12-14 18:10:45 +00001444 def data(self, data):
Eli Bendersky84fae782013-03-09 07:12:48 -08001445 """Add text to current element."""
Armin Rigo9ed73062005-12-14 18:10:45 +00001446 self._data.append(data)
1447
Armin Rigo9ed73062005-12-14 18:10:45 +00001448 def start(self, tag, attrs):
Eli Bendersky84fae782013-03-09 07:12:48 -08001449 """Open new element and return it.
1450
1451 *tag* is the element name, *attrs* is a dict containing element
1452 attributes.
1453
1454 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001455 self._flush()
1456 self._last = elem = self._factory(tag, attrs)
1457 if self._elem:
1458 self._elem[-1].append(elem)
Stefan Behnel43851a22019-05-01 21:20:38 +02001459 elif self._root is None:
1460 self._root = elem
Armin Rigo9ed73062005-12-14 18:10:45 +00001461 self._elem.append(elem)
1462 self._tail = 0
1463 return elem
1464
Armin Rigo9ed73062005-12-14 18:10:45 +00001465 def end(self, tag):
Eli Bendersky84fae782013-03-09 07:12:48 -08001466 """Close and return current Element.
1467
1468 *tag* is the element name.
1469
1470 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001471 self._flush()
1472 self._last = self._elem.pop()
1473 assert self._last.tag == tag,\
1474 "end tag mismatch (expected %s, got %s)" % (
1475 self._last.tag, tag)
1476 self._tail = 1
1477 return self._last
1478
Stefan Behnel43851a22019-05-01 21:20:38 +02001479 def comment(self, text):
1480 """Create a comment using the comment_factory.
1481
1482 *text* is the text of the comment.
1483 """
1484 return self._handle_single(
1485 self._comment_factory, self.insert_comments, text)
1486
1487 def pi(self, target, text=None):
1488 """Create a processing instruction using the pi_factory.
1489
1490 *target* is the target name of the processing instruction.
1491 *text* is the data of the processing instruction, or ''.
1492 """
1493 return self._handle_single(
1494 self._pi_factory, self.insert_pis, target, text)
1495
1496 def _handle_single(self, factory, insert, *args):
1497 elem = factory(*args)
1498 if insert:
1499 self._flush()
1500 self._last = elem
1501 if self._elem:
1502 self._elem[-1].append(elem)
1503 self._tail = 1
1504 return elem
1505
Armin Rigo9ed73062005-12-14 18:10:45 +00001506
Eli Bendersky84fae782013-03-09 07:12:48 -08001507# also see ElementTree and TreeBuilder
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001508class XMLParser:
Eli Bendersky84fae782013-03-09 07:12:48 -08001509 """Element structure builder for XML source data based on the expat parser.
1510
Eli Bendersky84fae782013-03-09 07:12:48 -08001511 *target* is an optional target object which defaults to an instance of the
1512 standard TreeBuilder class, *encoding* is an optional encoding string
1513 which if given, overrides the encoding specified in the XML file:
1514 http://www.iana.org/assignments/character-sets
1515
1516 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001517
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001518 def __init__(self, *, target=None, encoding=None):
Armin Rigo9ed73062005-12-14 18:10:45 +00001519 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001520 from xml.parsers import expat
Brett Cannoncd171c82013-07-04 17:43:24 -04001521 except ImportError:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001522 try:
1523 import pyexpat as expat
Brett Cannoncd171c82013-07-04 17:43:24 -04001524 except ImportError:
1525 raise ImportError(
1526 "No module named expat; use SimpleXMLTreeBuilder instead"
1527 )
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001528 parser = expat.ParserCreate(encoding, "}")
Armin Rigo9ed73062005-12-14 18:10:45 +00001529 if target is None:
1530 target = TreeBuilder()
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001531 # underscored names are provided for compatibility only
1532 self.parser = self._parser = parser
1533 self.target = self._target = target
1534 self._error = expat.error
Armin Rigo9ed73062005-12-14 18:10:45 +00001535 self._names = {} # name memo cache
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001536 # main callbacks
Armin Rigo9ed73062005-12-14 18:10:45 +00001537 parser.DefaultHandlerExpand = self._default
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001538 if hasattr(target, 'start'):
1539 parser.StartElementHandler = self._start
1540 if hasattr(target, 'end'):
1541 parser.EndElementHandler = self._end
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001542 if hasattr(target, 'start_ns'):
1543 parser.StartNamespaceDeclHandler = self._start_ns
1544 if hasattr(target, 'end_ns'):
1545 parser.EndNamespaceDeclHandler = self._end_ns
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001546 if hasattr(target, 'data'):
1547 parser.CharacterDataHandler = target.data
1548 # miscellaneous callbacks
1549 if hasattr(target, 'comment'):
1550 parser.CommentHandler = target.comment
1551 if hasattr(target, 'pi'):
1552 parser.ProcessingInstructionHandler = target.pi
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001553 # Configure pyexpat: buffering, new-style attribute handling.
1554 parser.buffer_text = 1
1555 parser.ordered_attributes = 1
1556 parser.specified_attributes = 1
Armin Rigo9ed73062005-12-14 18:10:45 +00001557 self._doctype = None
1558 self.entity = {}
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001559 try:
1560 self.version = "Expat %d.%d.%d" % expat.version_info
1561 except AttributeError:
1562 pass # unknown
1563
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001564 def _setevents(self, events_queue, events_to_report):
Eli Benderskyb5869342013-08-30 05:51:20 -07001565 # Internal API for XMLPullParser
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001566 # events_to_report: a list of events to report during parsing (same as
Eli Benderskyb5869342013-08-30 05:51:20 -07001567 # the *events* of XMLPullParser's constructor.
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001568 # events_queue: a list of actual parsing events that will be populated
1569 # by the underlying parser.
1570 #
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001571 parser = self._parser
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001572 append = events_queue.append
1573 for event_name in events_to_report:
1574 if event_name == "start":
Eli Benderskyc9f5ca22013-04-20 09:11:37 -07001575 parser.ordered_attributes = 1
1576 parser.specified_attributes = 1
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001577 def handler(tag, attrib_in, event=event_name, append=append,
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001578 start=self._start):
Eli Benderskyc9f5ca22013-04-20 09:11:37 -07001579 append((event, start(tag, attrib_in)))
1580 parser.StartElementHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001581 elif event_name == "end":
1582 def handler(tag, event=event_name, append=append,
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001583 end=self._end):
1584 append((event, end(tag)))
1585 parser.EndElementHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001586 elif event_name == "start-ns":
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001587 # TreeBuilder does not implement .start_ns()
1588 if hasattr(self.target, "start_ns"):
1589 def handler(prefix, uri, event=event_name, append=append,
1590 start_ns=self._start_ns):
1591 append((event, start_ns(prefix, uri)))
1592 else:
1593 def handler(prefix, uri, event=event_name, append=append):
1594 append((event, (prefix or '', uri or '')))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001595 parser.StartNamespaceDeclHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001596 elif event_name == "end-ns":
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001597 # TreeBuilder does not implement .end_ns()
1598 if hasattr(self.target, "end_ns"):
1599 def handler(prefix, event=event_name, append=append,
1600 end_ns=self._end_ns):
1601 append((event, end_ns(prefix)))
1602 else:
1603 def handler(prefix, event=event_name, append=append):
1604 append((event, None))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001605 parser.EndNamespaceDeclHandler = handler
Stefan Behnel43851a22019-05-01 21:20:38 +02001606 elif event_name == 'comment':
1607 def handler(text, event=event_name, append=append, self=self):
1608 append((event, self.target.comment(text)))
1609 parser.CommentHandler = handler
1610 elif event_name == 'pi':
1611 def handler(pi_target, data, event=event_name, append=append,
1612 self=self):
1613 append((event, self.target.pi(pi_target, data)))
1614 parser.ProcessingInstructionHandler = handler
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001615 else:
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001616 raise ValueError("unknown event %r" % event_name)
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001617
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001618 def _raiseerror(self, value):
1619 err = ParseError(value)
1620 err.code = value.code
1621 err.position = value.lineno, value.offset
1622 raise err
Armin Rigo9ed73062005-12-14 18:10:45 +00001623
Armin Rigo9ed73062005-12-14 18:10:45 +00001624 def _fixname(self, key):
1625 # expand qname, and convert name string to ascii, if possible
1626 try:
1627 name = self._names[key]
1628 except KeyError:
1629 name = key
1630 if "}" in name:
1631 name = "{" + name
Martin v. Löwisf30bb0e2007-07-28 11:40:46 +00001632 self._names[key] = name
Armin Rigo9ed73062005-12-14 18:10:45 +00001633 return name
1634
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001635 def _start_ns(self, prefix, uri):
1636 return self.target.start_ns(prefix or '', uri or '')
1637
1638 def _end_ns(self, prefix):
1639 return self.target.end_ns(prefix or '')
1640
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001641 def _start(self, tag, attr_list):
1642 # Handler for expat's StartElementHandler. Since ordered_attributes
1643 # is set, the attributes are reported as a list of alternating
1644 # attribute name,value.
Armin Rigo9ed73062005-12-14 18:10:45 +00001645 fixname = self._fixname
1646 tag = fixname(tag)
1647 attrib = {}
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001648 if attr_list:
1649 for i in range(0, len(attr_list), 2):
1650 attrib[fixname(attr_list[i])] = attr_list[i+1]
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001651 return self.target.start(tag, attrib)
Armin Rigo9ed73062005-12-14 18:10:45 +00001652
Armin Rigo9ed73062005-12-14 18:10:45 +00001653 def _end(self, tag):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001654 return self.target.end(self._fixname(tag))
1655
Armin Rigo9ed73062005-12-14 18:10:45 +00001656 def _default(self, text):
1657 prefix = text[:1]
1658 if prefix == "&":
1659 # deal with undefined entities
1660 try:
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001661 data_handler = self.target.data
1662 except AttributeError:
1663 return
1664 try:
1665 data_handler(self.entity[text[1:-1]])
Armin Rigo9ed73062005-12-14 18:10:45 +00001666 except KeyError:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001667 from xml.parsers import expat
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001668 err = expat.error(
Armin Rigo9ed73062005-12-14 18:10:45 +00001669 "undefined entity %s: line %d, column %d" %
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001670 (text, self.parser.ErrorLineNumber,
1671 self.parser.ErrorColumnNumber)
Armin Rigo9ed73062005-12-14 18:10:45 +00001672 )
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001673 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001674 err.lineno = self.parser.ErrorLineNumber
1675 err.offset = self.parser.ErrorColumnNumber
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001676 raise err
Armin Rigo9ed73062005-12-14 18:10:45 +00001677 elif prefix == "<" and text[:9] == "<!DOCTYPE":
1678 self._doctype = [] # inside a doctype declaration
1679 elif self._doctype is not None:
1680 # parse doctype contents
1681 if prefix == ">":
1682 self._doctype = None
1683 return
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001684 text = text.strip()
Armin Rigo9ed73062005-12-14 18:10:45 +00001685 if not text:
1686 return
1687 self._doctype.append(text)
1688 n = len(self._doctype)
1689 if n > 2:
1690 type = self._doctype[1]
1691 if type == "PUBLIC" and n == 4:
1692 name, type, pubid, system = self._doctype
Florent Xiclunaa1c974a2012-07-07 13:16:44 +02001693 if pubid:
1694 pubid = pubid[1:-1]
Armin Rigo9ed73062005-12-14 18:10:45 +00001695 elif type == "SYSTEM" and n == 3:
1696 name, type, system = self._doctype
1697 pubid = None
1698 else:
1699 return
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001700 if hasattr(self.target, "doctype"):
1701 self.target.doctype(name, pubid, system[1:-1])
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001702 elif hasattr(self, "doctype"):
1703 warnings.warn(
1704 "The doctype() method of XMLParser is ignored. "
1705 "Define doctype() method on the TreeBuilder target.",
1706 RuntimeWarning)
1707
Armin Rigo9ed73062005-12-14 18:10:45 +00001708 self._doctype = None
1709
Armin Rigo9ed73062005-12-14 18:10:45 +00001710 def feed(self, data):
Eli Bendersky84fae782013-03-09 07:12:48 -08001711 """Feed encoded data to parser."""
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001712 try:
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001713 self.parser.Parse(data, 0)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001714 except self._error as v:
1715 self._raiseerror(v)
Armin Rigo9ed73062005-12-14 18:10:45 +00001716
Armin Rigo9ed73062005-12-14 18:10:45 +00001717 def close(self):
Eli Bendersky84fae782013-03-09 07:12:48 -08001718 """Finish feeding data to parser and return element structure."""
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001719 try:
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001720 self.parser.Parse("", 1) # end of data
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001721 except self._error as v:
1722 self._raiseerror(v)
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001723 try:
Florent Xiclunafb067462012-03-05 11:42:49 +01001724 close_handler = self.target.close
1725 except AttributeError:
1726 pass
1727 else:
1728 return close_handler()
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001729 finally:
1730 # get rid of circular references
1731 del self.parser, self._parser
1732 del self.target, self._target
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001733
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01001734
Stefan Behnele1d5dd62019-05-01 22:34:13 +02001735# --------------------------------------------------------------------
1736# C14N 2.0
1737
1738def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
1739 """Convert XML to its C14N 2.0 serialised form.
1740
1741 If *out* is provided, it must be a file or file-like object that receives
1742 the serialised canonical XML output (text, not bytes) through its ``.write()``
1743 method. To write to a file, open it in text mode with encoding "utf-8".
1744 If *out* is not provided, this function returns the output as text string.
1745
1746 Either *xml_data* (an XML string) or *from_file* (a file path or
1747 file-like object) must be provided as input.
1748
1749 The configuration options are the same as for the ``C14NWriterTarget``.
1750 """
1751 if xml_data is None and from_file is None:
1752 raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
1753 sio = None
1754 if out is None:
1755 sio = out = io.StringIO()
1756
1757 parser = XMLParser(target=C14NWriterTarget(out.write, **options))
1758
1759 if xml_data is not None:
1760 parser.feed(xml_data)
1761 parser.close()
1762 elif from_file is not None:
1763 parse(from_file, parser=parser)
1764
1765 return sio.getvalue() if sio is not None else None
1766
1767
1768_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
1769
1770
1771class C14NWriterTarget:
1772 """
1773 Canonicalization writer target for the XMLParser.
1774
1775 Serialises parse events to XML C14N 2.0.
1776
1777 The *write* function is used for writing out the resulting data stream
1778 as text (not bytes). To write to a file, open it in text mode with encoding
1779 "utf-8" and pass its ``.write`` method.
1780
1781 Configuration options:
1782
1783 - *with_comments*: set to true to include comments
1784 - *strip_text*: set to true to strip whitespace before and after text content
1785 - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
1786 - *qname_aware_tags*: a set of qname aware tag names in which prefixes
1787 should be replaced in text content
1788 - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
1789 should be replaced in text content
1790 - *exclude_attrs*: a set of attribute names that should not be serialised
1791 - *exclude_tags*: a set of tag names that should not be serialised
1792 """
1793 def __init__(self, write, *,
1794 with_comments=False, strip_text=False, rewrite_prefixes=False,
1795 qname_aware_tags=None, qname_aware_attrs=None,
1796 exclude_attrs=None, exclude_tags=None):
1797 self._write = write
1798 self._data = []
1799 self._with_comments = with_comments
1800 self._strip_text = strip_text
1801 self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
1802 self._exclude_tags = set(exclude_tags) if exclude_tags else None
1803
1804 self._rewrite_prefixes = rewrite_prefixes
1805 if qname_aware_tags:
1806 self._qname_aware_tags = set(qname_aware_tags)
1807 else:
1808 self._qname_aware_tags = None
1809 if qname_aware_attrs:
1810 self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
1811 else:
1812 self._find_qname_aware_attrs = None
1813
1814 # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
1815 self._declared_ns_stack = [[
1816 ("http://www.w3.org/XML/1998/namespace", "xml"),
1817 ]]
1818 # Stack with user declared namespace prefixes as (uri, prefix) pairs.
1819 self._ns_stack = []
1820 if not rewrite_prefixes:
1821 self._ns_stack.append(list(_namespace_map.items()))
1822 self._ns_stack.append([])
1823 self._prefix_map = {}
1824 self._preserve_space = [False]
1825 self._pending_start = None
1826 self._root_seen = False
1827 self._root_done = False
1828 self._ignored_depth = 0
1829
1830 def _iter_namespaces(self, ns_stack, _reversed=reversed):
1831 for namespaces in _reversed(ns_stack):
1832 if namespaces: # almost no element declares new namespaces
1833 yield from namespaces
1834
1835 def _resolve_prefix_name(self, prefixed_name):
1836 prefix, name = prefixed_name.split(':', 1)
1837 for uri, p in self._iter_namespaces(self._ns_stack):
1838 if p == prefix:
1839 return f'{{{uri}}}{name}'
1840 raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
1841
1842 def _qname(self, qname, uri=None):
1843 if uri is None:
1844 uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
1845 else:
1846 tag = qname
1847
1848 prefixes_seen = set()
1849 for u, prefix in self._iter_namespaces(self._declared_ns_stack):
1850 if u == uri and prefix not in prefixes_seen:
1851 return f'{prefix}:{tag}' if prefix else tag, tag, uri
1852 prefixes_seen.add(prefix)
1853
1854 # Not declared yet => add new declaration.
1855 if self._rewrite_prefixes:
1856 if uri in self._prefix_map:
1857 prefix = self._prefix_map[uri]
1858 else:
1859 prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
1860 self._declared_ns_stack[-1].append((uri, prefix))
1861 return f'{prefix}:{tag}', tag, uri
1862
1863 if not uri and '' not in prefixes_seen:
1864 # No default namespace declared => no prefix needed.
1865 return tag, tag, uri
1866
1867 for u, prefix in self._iter_namespaces(self._ns_stack):
1868 if u == uri:
1869 self._declared_ns_stack[-1].append((uri, prefix))
1870 return f'{prefix}:{tag}' if prefix else tag, tag, uri
1871
1872 raise ValueError(f'Namespace "{uri}" is not declared in scope')
1873
1874 def data(self, data):
1875 if not self._ignored_depth:
1876 self._data.append(data)
1877
1878 def _flush(self, _join_text=''.join):
1879 data = _join_text(self._data)
1880 del self._data[:]
1881 if self._strip_text and not self._preserve_space[-1]:
1882 data = data.strip()
1883 if self._pending_start is not None:
1884 args, self._pending_start = self._pending_start, None
1885 qname_text = data if data and _looks_like_prefix_name(data) else None
1886 self._start(*args, qname_text)
1887 if qname_text is not None:
1888 return
1889 if data and self._root_seen:
1890 self._write(_escape_cdata_c14n(data))
1891
1892 def start_ns(self, prefix, uri):
1893 if self._ignored_depth:
1894 return
1895 # we may have to resolve qnames in text content
1896 if self._data:
1897 self._flush()
1898 self._ns_stack[-1].append((uri, prefix))
1899
1900 def start(self, tag, attrs):
1901 if self._exclude_tags is not None and (
1902 self._ignored_depth or tag in self._exclude_tags):
1903 self._ignored_depth += 1
1904 return
1905 if self._data:
1906 self._flush()
1907
1908 new_namespaces = []
1909 self._declared_ns_stack.append(new_namespaces)
1910
1911 if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
1912 # Need to parse text first to see if it requires a prefix declaration.
1913 self._pending_start = (tag, attrs, new_namespaces)
1914 return
1915 self._start(tag, attrs, new_namespaces)
1916
1917 def _start(self, tag, attrs, new_namespaces, qname_text=None):
1918 if self._exclude_attrs is not None and attrs:
1919 attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
1920
1921 qnames = {tag, *attrs}
1922 resolved_names = {}
1923
1924 # Resolve prefixes in attribute and tag text.
1925 if qname_text is not None:
1926 qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
1927 qnames.add(qname)
1928 if self._find_qname_aware_attrs is not None and attrs:
1929 qattrs = self._find_qname_aware_attrs(attrs)
1930 if qattrs:
1931 for attr_name in qattrs:
1932 value = attrs[attr_name]
1933 if _looks_like_prefix_name(value):
1934 qname = resolved_names[value] = self._resolve_prefix_name(value)
1935 qnames.add(qname)
1936 else:
1937 qattrs = None
1938 else:
1939 qattrs = None
1940
1941 # Assign prefixes in lexicographical order of used URIs.
1942 parse_qname = self._qname
1943 parsed_qnames = {n: parse_qname(n) for n in sorted(
1944 qnames, key=lambda n: n.split('}', 1))}
1945
1946 # Write namespace declarations in prefix order ...
1947 if new_namespaces:
1948 attr_list = [
1949 ('xmlns:' + prefix if prefix else 'xmlns', uri)
1950 for uri, prefix in new_namespaces
1951 ]
1952 attr_list.sort()
1953 else:
1954 # almost always empty
1955 attr_list = []
1956
1957 # ... followed by attributes in URI+name order
1958 if attrs:
1959 for k, v in sorted(attrs.items()):
1960 if qattrs is not None and k in qattrs and v in resolved_names:
1961 v = parsed_qnames[resolved_names[v]][0]
1962 attr_qname, attr_name, uri = parsed_qnames[k]
1963 # No prefix for attributes in default ('') namespace.
1964 attr_list.append((attr_qname if uri else attr_name, v))
1965
1966 # Honour xml:space attributes.
1967 space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
1968 self._preserve_space.append(
1969 space_behaviour == 'preserve' if space_behaviour
1970 else self._preserve_space[-1])
1971
1972 # Write the tag.
1973 write = self._write
1974 write('<' + parsed_qnames[tag][0])
1975 if attr_list:
1976 write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
1977 write('>')
1978
1979 # Write the resolved qname text content.
1980 if qname_text is not None:
1981 write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
1982
1983 self._root_seen = True
1984 self._ns_stack.append([])
1985
1986 def end(self, tag):
1987 if self._ignored_depth:
1988 self._ignored_depth -= 1
1989 return
1990 if self._data:
1991 self._flush()
1992 self._write(f'</{self._qname(tag)[0]}>')
1993 self._preserve_space.pop()
1994 self._root_done = len(self._preserve_space) == 1
1995 self._declared_ns_stack.pop()
1996 self._ns_stack.pop()
1997
1998 def comment(self, text):
1999 if not self._with_comments:
2000 return
2001 if self._ignored_depth:
2002 return
2003 if self._root_done:
2004 self._write('\n')
2005 elif self._root_seen and self._data:
2006 self._flush()
2007 self._write(f'<!--{_escape_cdata_c14n(text)}-->')
2008 if not self._root_seen:
2009 self._write('\n')
2010
2011 def pi(self, target, data):
2012 if self._ignored_depth:
2013 return
2014 if self._root_done:
2015 self._write('\n')
2016 elif self._root_seen and self._data:
2017 self._flush()
2018 self._write(
2019 f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
2020 if not self._root_seen:
2021 self._write('\n')
2022
2023
2024def _escape_cdata_c14n(text):
2025 # escape character data
2026 try:
2027 # it's worth avoiding do-nothing calls for strings that are
2028 # shorter than 500 character, or so. assume that's, by far,
2029 # the most common case in most applications.
2030 if '&' in text:
2031 text = text.replace('&', '&amp;')
2032 if '<' in text:
2033 text = text.replace('<', '&lt;')
2034 if '>' in text:
2035 text = text.replace('>', '&gt;')
2036 if '\r' in text:
2037 text = text.replace('\r', '&#xD;')
2038 return text
2039 except (TypeError, AttributeError):
2040 _raise_serialization_error(text)
2041
2042
2043def _escape_attrib_c14n(text):
2044 # escape attribute value
2045 try:
2046 if '&' in text:
2047 text = text.replace('&', '&amp;')
2048 if '<' in text:
2049 text = text.replace('<', '&lt;')
2050 if '"' in text:
2051 text = text.replace('"', '&quot;')
2052 if '\t' in text:
2053 text = text.replace('\t', '&#x9;')
2054 if '\n' in text:
2055 text = text.replace('\n', '&#xA;')
2056 if '\r' in text:
2057 text = text.replace('\r', '&#xD;')
2058 return text
2059 except (TypeError, AttributeError):
2060 _raise_serialization_error(text)
2061
2062
2063# --------------------------------------------------------------------
2064
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002065# Import the C accelerators
2066try:
Eli Bendersky46955b22013-05-19 09:20:50 -07002067 # Element is going to be shadowed by the C implementation. We need to keep
2068 # the Python version of it accessible for some "creative" by external code
2069 # (see tests)
2070 _Element_Py = Element
2071
Stefan Behnel43851a22019-05-01 21:20:38 +02002072 # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002073 from _elementtree import *
Stefan Behnel43851a22019-05-01 21:20:38 +02002074 from _elementtree import _set_factories
Eli Benderskyc4e98a62013-05-19 09:24:43 -07002075except ImportError:
2076 pass
Stefan Behnel43851a22019-05-01 21:20:38 +02002077else:
2078 _set_factories(Comment, ProcessingInstruction)