blob: c8d898f32816dcae866aa3a077a17133ce21a765 [file] [log] [blame]
Eli Bendersky84fae782013-03-09 07:12:48 -08001"""Lightweight XML support for Python.
2
3 XML is an inherently hierarchical data format, and the most natural way to
4 represent it is with a tree. This module has two classes for this purpose:
5
6 1. ElementTree represents the whole XML document as a tree and
7
8 2. Element represents a single node in this tree.
9
10 Interactions with the whole document (reading and writing to/from files) are
11 usually done on the ElementTree level. Interactions with a single XML element
12 and its sub-elements are done on the Element level.
13
14 Element is a flexible container object designed to store hierarchical data
15 structures in memory. It can be described as a cross between a list and a
16 dictionary. Each Element has a number of properties associated with it:
17
18 'tag' - a string containing the element's name.
19
20 'attributes' - a Python dictionary storing the element's attributes.
21
22 'text' - a string containing the element's text content.
23
24 'tail' - an optional string containing text after the element's end tag.
25
26 And a number of child elements stored in a Python sequence.
27
28 To create an element instance, use the Element constructor,
29 or the SubElement factory function.
30
31 You can also use the ElementTree class to wrap an element structure
32 and convert it to and from XML.
33
34"""
35
Eli Benderskybf05df22013-04-20 05:44:01 -070036#---------------------------------------------------------------------
37# Licensed to PSF under a Contributor Agreement.
38# See http://www.python.org/psf/license for licensing details.
Armin Rigo9ed73062005-12-14 18:10:45 +000039#
40# ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +000041# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
Armin Rigo9ed73062005-12-14 18:10:45 +000042#
43# fredrik@pythonware.com
44# http://www.pythonware.com
Armin Rigo9ed73062005-12-14 18:10:45 +000045# --------------------------------------------------------------------
46# The ElementTree toolkit is
47#
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048# Copyright (c) 1999-2008 by Fredrik Lundh
Armin Rigo9ed73062005-12-14 18:10:45 +000049#
50# By obtaining, using, and/or copying this software and/or its
51# associated documentation, you agree that you have read, understood,
52# and will comply with the following terms and conditions:
53#
54# Permission to use, copy, modify, and distribute this software and
55# its associated documentation for any purpose and without fee is
56# hereby granted, provided that the above copyright notice appears in
57# all copies, and that both that copyright notice and this permission
58# notice appear in supporting documentation, and that the name of
59# Secret Labs AB or the author not be used in advertising or publicity
60# pertaining to distribution of the software without specific, written
61# prior permission.
62#
63# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
64# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
65# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
66# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
67# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
68# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
69# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
70# OF THIS SOFTWARE.
71# --------------------------------------------------------------------
72
73__all__ = [
74 # public symbols
75 "Comment",
76 "dump",
77 "Element", "ElementTree",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000078 "fromstring", "fromstringlist",
Stefan Behnelb5d3cee2019-08-23 16:44:25 +020079 "indent", "iselement", "iterparse",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000080 "parse", "ParseError",
Armin Rigo9ed73062005-12-14 18:10:45 +000081 "PI", "ProcessingInstruction",
82 "QName",
83 "SubElement",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000084 "tostring", "tostringlist",
Armin Rigo9ed73062005-12-14 18:10:45 +000085 "TreeBuilder",
Florent Xiclunaf15351d2010-03-13 23:24:31 +000086 "VERSION",
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010087 "XML", "XMLID",
Martin Panterdcfebb32016-04-01 06:55:55 +000088 "XMLParser", "XMLPullParser",
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010089 "register_namespace",
Stefan Behnele1d5dd62019-05-01 22:34:13 +020090 "canonicalize", "C14NWriterTarget",
Armin Rigo9ed73062005-12-14 18:10:45 +000091 ]
92
Florent Xiclunaf15351d2010-03-13 23:24:31 +000093VERSION = "1.3.0"
94
Florent Xiclunaf15351d2010-03-13 23:24:31 +000095import sys
96import re
97import warnings
Eli Bendersky00f402b2012-07-15 06:02:22 +030098import io
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +020099import collections
Serhiy Storchaka2e576f52017-04-24 09:05:00 +0300100import collections.abc
Eli Bendersky00f402b2012-07-15 06:02:22 +0300101import contextlib
Armin Rigo9ed73062005-12-14 18:10:45 +0000102
Eli Bendersky27cbb192012-06-15 09:03:19 +0300103from . import ElementPath
Armin Rigo9ed73062005-12-14 18:10:45 +0000104
Armin Rigo9ed73062005-12-14 18:10:45 +0000105
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106class ParseError(SyntaxError):
Eli Bendersky84fae782013-03-09 07:12:48 -0800107 """An error when parsing an XML document.
108
109 In addition to its exception value, a ParseError contains
110 two extra attributes:
111 'code' - the specific exception code
112 'position' - the line and column of the error
113
114 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000115 pass
116
117# --------------------------------------------------------------------
118
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000119
120def iselement(element):
Eli Bendersky84fae782013-03-09 07:12:48 -0800121 """Return True if *element* appears to be an Element."""
Florent Xiclunaa72a98f2012-02-13 11:03:30 +0100122 return hasattr(element, 'tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000123
Armin Rigo9ed73062005-12-14 18:10:45 +0000124
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000125class Element:
Eli Bendersky84fae782013-03-09 07:12:48 -0800126 """An XML element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000127
Eli Bendersky84fae782013-03-09 07:12:48 -0800128 This class is the reference implementation of the Element interface.
129
130 An element's length is its number of subelements. That means if you
Serhiy Storchaka56a6d852014-12-01 18:28:43 +0200131 want to check if an element is truly empty, you should check BOTH
Eli Bendersky84fae782013-03-09 07:12:48 -0800132 its length AND its text attribute.
133
134 The element tag, attribute names, and attribute values can be either
135 bytes or strings.
136
137 *tag* is the element name. *attrib* is an optional dictionary containing
138 element attributes. *extra* are additional element attributes given as
139 keyword arguments.
140
141 Example form:
142 <tag attrib>text<child/>...</tag>tail
143
144 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000145
146 tag = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800147 """The element's name."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000148
149 attrib = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800150 """Dictionary of the element's attributes."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000151
152 text = None
Eli Bendersky84fae782013-03-09 07:12:48 -0800153 """
154 Text before first subelement. This is either a string or the value None.
155 Note that if there is no text, this attribute may be either
156 None or the empty string, depending on the parser.
Armin Rigo9ed73062005-12-14 18:10:45 +0000157
Eli Bendersky84fae782013-03-09 07:12:48 -0800158 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000159
Eli Bendersky84fae782013-03-09 07:12:48 -0800160 tail = None
161 """
162 Text after this element's end tag, but before the next sibling element's
163 start tag. This is either a string or the value None. Note that if there
164 was no text, this attribute may be either None or an empty string,
165 depending on the parser.
Armin Rigo9ed73062005-12-14 18:10:45 +0000166
Eli Bendersky84fae782013-03-09 07:12:48 -0800167 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000168
169 def __init__(self, tag, attrib={}, **extra):
Eli Bendersky737b1732012-05-29 06:02:56 +0300170 if not isinstance(attrib, dict):
171 raise TypeError("attrib must be dict, not %s" % (
172 attrib.__class__.__name__,))
Armin Rigo9ed73062005-12-14 18:10:45 +0000173 self.tag = tag
Serhiy Storchakada084702019-03-27 08:02:28 +0200174 self.attrib = {**attrib, **extra}
Armin Rigo9ed73062005-12-14 18:10:45 +0000175 self._children = []
176
177 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300178 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
Armin Rigo9ed73062005-12-14 18:10:45 +0000179
Armin Rigo9ed73062005-12-14 18:10:45 +0000180 def makeelement(self, tag, attrib):
Eli Bendersky84fae782013-03-09 07:12:48 -0800181 """Create a new element with the same type.
182
183 *tag* is a string containing the element name.
184 *attrib* is a dictionary containing the element attributes.
185
186 Do not call this method, use the SubElement factory function instead.
187
188 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000189 return self.__class__(tag, attrib)
Armin Rigo9ed73062005-12-14 18:10:45 +0000190
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000191 def copy(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800192 """Return copy of current element.
193
194 This creates a shallow copy. Subelements will be shared with the
195 original tree.
196
197 """
Gordon P. Hemsley7d952de2019-09-10 11:22:01 -0400198 warnings.warn(
199 "elem.copy() is deprecated. Use copy.copy(elem) instead.",
200 DeprecationWarning
201 )
202 return self.__copy__()
203
204 def __copy__(self):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000205 elem = self.makeelement(self.tag, self.attrib)
206 elem.text = self.text
207 elem.tail = self.tail
208 elem[:] = self
209 return elem
210
Armin Rigo9ed73062005-12-14 18:10:45 +0000211 def __len__(self):
212 return len(self._children)
213
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000214 def __bool__(self):
215 warnings.warn(
216 "The behavior of this method will change in future versions. "
217 "Use specific 'len(elem)' or 'elem is not None' test instead.",
218 FutureWarning, stacklevel=2
219 )
220 return len(self._children) != 0 # emulate old behaviour, for now
221
Armin Rigo9ed73062005-12-14 18:10:45 +0000222 def __getitem__(self, index):
223 return self._children[index]
224
Armin Rigo9ed73062005-12-14 18:10:45 +0000225 def __setitem__(self, index, element):
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300226 if isinstance(index, slice):
227 for elt in element:
228 self._assert_is_element(elt)
229 else:
230 self._assert_is_element(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000231 self._children[index] = element
232
Armin Rigo9ed73062005-12-14 18:10:45 +0000233 def __delitem__(self, index):
234 del self._children[index]
235
Eli Bendersky84fae782013-03-09 07:12:48 -0800236 def append(self, subelement):
237 """Add *subelement* to the end of this element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000238
Eli Bendersky84fae782013-03-09 07:12:48 -0800239 The new element will appear in document order after the last existing
240 subelement (or directly after the text, if it's the first subelement),
241 but before the end tag for this element.
Armin Rigo9ed73062005-12-14 18:10:45 +0000242
Eli Bendersky84fae782013-03-09 07:12:48 -0800243 """
244 self._assert_is_element(subelement)
245 self._children.append(subelement)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000246
247 def extend(self, elements):
Eli Bendersky84fae782013-03-09 07:12:48 -0800248 """Append subelements from a sequence.
249
250 *elements* is a sequence with zero or more elements.
251
252 """
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200253 for element in elements:
254 self._assert_is_element(element)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000255 self._children.extend(elements)
256
Eli Bendersky84fae782013-03-09 07:12:48 -0800257 def insert(self, index, subelement):
258 """Insert *subelement* at position *index*."""
259 self._assert_is_element(subelement)
260 self._children.insert(index, subelement)
Armin Rigo9ed73062005-12-14 18:10:45 +0000261
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200262 def _assert_is_element(self, e):
Antoine Pitrouee329312012-10-04 19:53:29 +0200263 # Need to refer to the actual Python implementation, not the
264 # shadowing C implementation.
Eli Bendersky46955b22013-05-19 09:20:50 -0700265 if not isinstance(e, _Element_Py):
Eli Bendersky396e8fc2012-03-23 14:24:20 +0200266 raise TypeError('expected an Element, not %s' % type(e).__name__)
267
Eli Bendersky84fae782013-03-09 07:12:48 -0800268 def remove(self, subelement):
269 """Remove matching subelement.
270
271 Unlike the find methods, this method compares elements based on
272 identity, NOT ON tag value or contents. To remove subelements by
273 other means, the easiest way is to use a list comprehension to
274 select what elements to keep, and then use slice assignment to update
275 the parent element.
276
277 ValueError is raised if a matching element could not be found.
278
279 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000280 # assert iselement(element)
Eli Bendersky84fae782013-03-09 07:12:48 -0800281 self._children.remove(subelement)
Armin Rigo9ed73062005-12-14 18:10:45 +0000282
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000283 def find(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800284 """Find first matching element by tag name or path.
285
286 *path* is a string having either an element tag or an XPath,
287 *namespaces* is an optional mapping from namespace prefix to full name.
288
289 Return the first matching element, or None if no element was found.
290
291 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000292 return ElementPath.find(self, path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000293
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000294 def findtext(self, path, default=None, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800295 """Find text for first matching element by tag name or path.
296
297 *path* is a string having either an element tag or an XPath,
298 *default* is the value to return if the element was not found,
299 *namespaces* is an optional mapping from namespace prefix to full name.
300
301 Return text content of first matching element, or default value if
302 none was found. Note that if an element is found having no text
303 content, the empty string is returned.
304
305 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000306 return ElementPath.findtext(self, path, default, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000307
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000308 def findall(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800309 """Find all matching subelements by tag name or path.
310
311 *path* is a string having either an element tag or an XPath,
312 *namespaces* is an optional mapping from namespace prefix to full name.
313
314 Returns list containing all matching elements in document order.
315
316 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000317 return ElementPath.findall(self, path, namespaces)
318
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000319 def iterfind(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800320 """Find all matching subelements by tag name or path.
321
322 *path* is a string having either an element tag or an XPath,
323 *namespaces* is an optional mapping from namespace prefix to full name.
324
325 Return an iterable yielding all matching elements in document order.
326
327 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000328 return ElementPath.iterfind(self, path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000329
Armin Rigo9ed73062005-12-14 18:10:45 +0000330 def clear(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800331 """Reset element.
332
333 This function removes all subelements, clears all attributes, and sets
334 the text and tail attributes to None.
335
336 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000337 self.attrib.clear()
338 self._children = []
339 self.text = self.tail = None
340
Armin Rigo9ed73062005-12-14 18:10:45 +0000341 def get(self, key, default=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800342 """Get element attribute.
343
344 Equivalent to attrib.get, but some implementations may handle this a
345 bit more efficiently. *key* is what attribute to look for, and
346 *default* is what to return if the attribute was not found.
347
348 Returns a string containing the attribute value, or the default if
349 attribute was not found.
350
351 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000352 return self.attrib.get(key, default)
353
Armin Rigo9ed73062005-12-14 18:10:45 +0000354 def set(self, key, value):
Eli Bendersky84fae782013-03-09 07:12:48 -0800355 """Set element attribute.
356
357 Equivalent to attrib[key] = value, but some implementations may handle
358 this a bit more efficiently. *key* is what attribute to set, and
359 *value* is the attribute value to set it to.
360
361 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000362 self.attrib[key] = value
363
Armin Rigo9ed73062005-12-14 18:10:45 +0000364 def keys(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800365 """Get list of attribute names.
366
367 Names are returned in an arbitrary order, just like an ordinary
368 Python dict. Equivalent to attrib.keys()
369
370 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000371 return self.attrib.keys()
372
Armin Rigo9ed73062005-12-14 18:10:45 +0000373 def items(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800374 """Get element attributes as a sequence.
375
376 The attributes are returned in arbitrary order. Equivalent to
377 attrib.items().
378
379 Return a list of (name, value) tuples.
380
381 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000382 return self.attrib.items()
383
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000384 def iter(self, tag=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800385 """Create tree iterator.
386
387 The iterator loops over the element and all subelements in document
388 order, returning all elements with a matching tag.
389
390 If the tree structure is modified during iteration, new or removed
391 elements may or may not be included. To get a stable set, use the
392 list() function on the iterator, and loop over the resulting list.
393
394 *tag* is what tags to look for (default is to return all elements)
395
396 Return an iterator containing all the matching elements.
397
398 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000399 if tag == "*":
400 tag = None
401 if tag is None or self.tag == tag:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000402 yield self
403 for e in self._children:
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700404 yield from e.iter(tag)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000405
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000406 def itertext(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800407 """Create text iterator.
408
409 The iterator loops over the element and all subelements in document
410 order, returning all inner text.
411
412 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000413 tag = self.tag
414 if not isinstance(tag, str) and tag is not None:
415 return
Serhiy Storchaka66c08d92015-12-21 11:09:48 +0200416 t = self.text
417 if t:
418 yield t
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000419 for e in self:
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700420 yield from e.itertext()
Serhiy Storchaka66c08d92015-12-21 11:09:48 +0200421 t = e.tail
422 if t:
423 yield t
Armin Rigo9ed73062005-12-14 18:10:45 +0000424
Armin Rigo9ed73062005-12-14 18:10:45 +0000425
426def SubElement(parent, tag, attrib={}, **extra):
Eli Bendersky84fae782013-03-09 07:12:48 -0800427 """Subelement factory which creates an element instance, and appends it
428 to an existing parent.
429
430 The element tag, attribute names, and attribute values can be either
431 bytes or Unicode strings.
432
433 *parent* is the parent element, *tag* is the subelements name, *attrib* is
434 an optional directory containing element attributes, *extra* are
435 additional attributes given as keyword arguments.
436
437 """
Serhiy Storchakada084702019-03-27 08:02:28 +0200438 attrib = {**attrib, **extra}
Armin Rigo9ed73062005-12-14 18:10:45 +0000439 element = parent.makeelement(tag, attrib)
440 parent.append(element)
441 return element
442
Armin Rigo9ed73062005-12-14 18:10:45 +0000443
444def Comment(text=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800445 """Comment element factory.
446
447 This function creates a special element which the standard serializer
448 serializes as an XML comment.
449
450 *text* is a string containing the comment string.
451
452 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000453 element = Element(Comment)
454 element.text = text
455 return element
456
Armin Rigo9ed73062005-12-14 18:10:45 +0000457
458def ProcessingInstruction(target, text=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800459 """Processing Instruction element factory.
460
461 This function creates a special element which the standard serializer
462 serializes as an XML comment.
463
464 *target* is a string containing the processing instruction, *text* is a
465 string containing the processing instruction contents, if any.
466
467 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000468 element = Element(ProcessingInstruction)
469 element.text = target
470 if text:
471 element.text = element.text + " " + text
472 return element
473
474PI = ProcessingInstruction
475
Armin Rigo9ed73062005-12-14 18:10:45 +0000476
477class QName:
Eli Bendersky84fae782013-03-09 07:12:48 -0800478 """Qualified name wrapper.
479
480 This class can be used to wrap a QName attribute value in order to get
481 proper namespace handing on output.
482
483 *text_or_uri* is a string containing the QName value either in the form
484 {uri}local, or if the tag argument is given, the URI part of a QName.
485
486 *tag* is an optional argument which if given, will make the first
487 argument (text_or_uri) be interpreted as a URI, and this argument (tag)
488 be interpreted as a local name.
489
490 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000491 def __init__(self, text_or_uri, tag=None):
492 if tag:
493 text_or_uri = "{%s}%s" % (text_or_uri, tag)
494 self.text = text_or_uri
495 def __str__(self):
496 return self.text
Georg Brandlb56c0e22010-12-09 18:10:27 +0000497 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300498 return '<%s %r>' % (self.__class__.__name__, self.text)
Armin Rigo9ed73062005-12-14 18:10:45 +0000499 def __hash__(self):
500 return hash(self.text)
Mark Dickinsona56c4672009-01-27 18:17:45 +0000501 def __le__(self, other):
Armin Rigo9ed73062005-12-14 18:10:45 +0000502 if isinstance(other, QName):
Mark Dickinsona56c4672009-01-27 18:17:45 +0000503 return self.text <= other.text
504 return self.text <= other
505 def __lt__(self, other):
506 if isinstance(other, QName):
507 return self.text < other.text
508 return self.text < other
509 def __ge__(self, other):
510 if isinstance(other, QName):
511 return self.text >= other.text
512 return self.text >= other
513 def __gt__(self, other):
514 if isinstance(other, QName):
515 return self.text > other.text
516 return self.text > other
517 def __eq__(self, other):
518 if isinstance(other, QName):
519 return self.text == other.text
520 return self.text == other
Armin Rigo9ed73062005-12-14 18:10:45 +0000521
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000522# --------------------------------------------------------------------
523
Armin Rigo9ed73062005-12-14 18:10:45 +0000524
525class ElementTree:
Eli Bendersky84fae782013-03-09 07:12:48 -0800526 """An XML element hierarchy.
Armin Rigo9ed73062005-12-14 18:10:45 +0000527
Eli Bendersky84fae782013-03-09 07:12:48 -0800528 This class also provides support for serialization to and from
529 standard XML.
530
531 *element* is an optional root element node,
532 *file* is an optional file handle or file name of an XML file whose
533 contents will be used to initialize the tree with.
534
535 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000536 def __init__(self, element=None, file=None):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000537 # assert element is None or iselement(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000538 self._root = element # first node
539 if file:
540 self.parse(file)
541
Armin Rigo9ed73062005-12-14 18:10:45 +0000542 def getroot(self):
Eli Bendersky84fae782013-03-09 07:12:48 -0800543 """Return root element of this tree."""
Armin Rigo9ed73062005-12-14 18:10:45 +0000544 return self._root
545
Armin Rigo9ed73062005-12-14 18:10:45 +0000546 def _setroot(self, element):
Eli Bendersky84fae782013-03-09 07:12:48 -0800547 """Replace root element of this tree.
548
549 This will discard the current contents of the tree and replace it
550 with the given element. Use with care!
551
552 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000553 # assert iselement(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000554 self._root = element
555
Armin Rigo9ed73062005-12-14 18:10:45 +0000556 def parse(self, source, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800557 """Load external XML document into element tree.
558
559 *source* is a file name or file object, *parser* is an optional parser
560 instance that defaults to XMLParser.
561
562 ParseError is raised if the parser fails to parse the document.
563
564 Returns the root element of the given source document.
565
566 """
Antoine Pitroue033e062010-10-29 10:38:18 +0000567 close_source = False
Armin Rigo9ed73062005-12-14 18:10:45 +0000568 if not hasattr(source, "read"):
569 source = open(source, "rb")
Antoine Pitroue033e062010-10-29 10:38:18 +0000570 close_source = True
571 try:
Eli Benderskya3699232013-05-19 18:47:23 -0700572 if parser is None:
573 # If no parser was specified, create a default XMLParser
574 parser = XMLParser()
575 if hasattr(parser, '_parse_whole'):
576 # The default XMLParser, when it comes from an accelerator,
577 # can define an internal _parse_whole API for efficiency.
578 # It can be used to parse the whole source without feeding
579 # it with chunks.
580 self._root = parser._parse_whole(source)
581 return self._root
582 while True:
Antoine Pitroue033e062010-10-29 10:38:18 +0000583 data = source.read(65536)
584 if not data:
585 break
586 parser.feed(data)
587 self._root = parser.close()
588 return self._root
589 finally:
590 if close_source:
591 source.close()
Armin Rigo9ed73062005-12-14 18:10:45 +0000592
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000593 def iter(self, tag=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800594 """Create and return tree iterator for the root element.
595
596 The iterator loops over all elements in this tree, in document order.
597
598 *tag* is a string with the tag name to iterate over
599 (default is to return all elements).
600
601 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000602 # assert self._root is not None
603 return self._root.iter(tag)
604
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000605 def find(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800606 """Find first matching element by tag name or path.
607
608 Same as getroot().find(path), which is Element.find()
609
610 *path* is a string having either an element tag or an XPath,
611 *namespaces* is an optional mapping from namespace prefix to full name.
612
613 Return the first matching element, or None if no element was found.
614
615 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000616 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000617 if path[:1] == "/":
618 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000619 warnings.warn(
620 "This search is broken in 1.3 and earlier, and will be "
621 "fixed in a future version. If you rely on the current "
622 "behaviour, change it to %r" % path,
623 FutureWarning, stacklevel=2
624 )
625 return self._root.find(path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000626
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000627 def findtext(self, path, default=None, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800628 """Find first matching element by tag name or path.
629
630 Same as getroot().findtext(path), which is Element.findtext()
631
632 *path* is a string having either an element tag or an XPath,
633 *namespaces* is an optional mapping from namespace prefix to full name.
634
635 Return the first matching element, or None if no element was found.
636
637 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000638 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000639 if path[:1] == "/":
640 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000641 warnings.warn(
642 "This search is broken in 1.3 and earlier, and will be "
643 "fixed in a future version. If you rely on the current "
644 "behaviour, change it to %r" % path,
645 FutureWarning, stacklevel=2
646 )
647 return self._root.findtext(path, default, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000648
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000649 def findall(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800650 """Find all matching subelements by tag name or path.
651
652 Same as getroot().findall(path), which is Element.findall().
653
654 *path* is a string having either an element tag or an XPath,
655 *namespaces* is an optional mapping from namespace prefix to full name.
656
657 Return list containing all matching elements in document order.
658
659 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000660 # assert self._root is not None
Armin Rigo9ed73062005-12-14 18:10:45 +0000661 if path[:1] == "/":
662 path = "." + path
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000663 warnings.warn(
664 "This search is broken in 1.3 and earlier, and will be "
665 "fixed in a future version. If you rely on the current "
666 "behaviour, change it to %r" % path,
667 FutureWarning, stacklevel=2
668 )
669 return self._root.findall(path, namespaces)
670
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000671 def iterfind(self, path, namespaces=None):
Eli Bendersky84fae782013-03-09 07:12:48 -0800672 """Find all matching subelements by tag name or path.
673
674 Same as getroot().iterfind(path), which is element.iterfind()
675
676 *path* is a string having either an element tag or an XPath,
677 *namespaces* is an optional mapping from namespace prefix to full name.
678
679 Return an iterable yielding all matching elements in document order.
680
681 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000682 # assert self._root is not None
683 if path[:1] == "/":
684 path = "." + path
685 warnings.warn(
686 "This search is broken in 1.3 and earlier, and will be "
687 "fixed in a future version. If you rely on the current "
688 "behaviour, change it to %r" % path,
689 FutureWarning, stacklevel=2
690 )
691 return self._root.iterfind(path, namespaces)
Armin Rigo9ed73062005-12-14 18:10:45 +0000692
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000693 def write(self, file_or_filename,
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000694 encoding=None,
695 xml_declaration=None,
696 default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800697 method=None, *,
698 short_empty_elements=True):
Eli Bendersky84fae782013-03-09 07:12:48 -0800699 """Write element tree to a file as XML.
700
701 Arguments:
702 *file_or_filename* -- file name or a file object opened for writing
703
704 *encoding* -- the output encoding (default: US-ASCII)
705
706 *xml_declaration* -- bool indicating if an XML declaration should be
707 added to the output. If None, an XML declaration
708 is added if encoding IS NOT either of:
709 US-ASCII, UTF-8, or Unicode
710
711 *default_namespace* -- sets the default XML namespace (for "xmlns")
712
713 *method* -- either "xml" (default), "html, "text", or "c14n"
714
715 *short_empty_elements* -- controls the formatting of elements
716 that contain no content. If True (default)
717 they are emitted as a single self-closed
718 tag, otherwise they are emitted as a pair
719 of start/end tags
Eli Benderskye9af8272013-01-13 06:27:51 -0800720
721 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000722 if not method:
723 method = "xml"
724 elif method not in _serialize:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000725 raise ValueError("unknown method %r" % method)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000726 if not encoding:
727 if method == "c14n":
728 encoding = "utf-8"
729 else:
730 encoding = "us-ascii"
Martin Panter89f76d32015-09-23 01:14:35 +0000731 enc_lower = encoding.lower()
732 with _get_writer(file_or_filename, enc_lower) as write:
Eli Bendersky00f402b2012-07-15 06:02:22 +0300733 if method == "xml" and (xml_declaration or
734 (xml_declaration is None and
Martin Panter89f76d32015-09-23 01:14:35 +0000735 enc_lower not in ("utf-8", "us-ascii", "unicode"))):
Eli Bendersky00f402b2012-07-15 06:02:22 +0300736 declared_encoding = encoding
Martin Panter89f76d32015-09-23 01:14:35 +0000737 if enc_lower == "unicode":
Eli Bendersky00f402b2012-07-15 06:02:22 +0300738 # Retrieve the default encoding for the xml declaration
739 import locale
740 declared_encoding = locale.getpreferredencoding()
741 write("<?xml version='1.0' encoding='%s'?>\n" % (
742 declared_encoding,))
743 if method == "text":
744 _serialize_text(write, self._root)
Armin Rigo9ed73062005-12-14 18:10:45 +0000745 else:
Eli Bendersky00f402b2012-07-15 06:02:22 +0300746 qnames, namespaces = _namespaces(self._root, default_namespace)
747 serialize = _serialize[method]
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800748 serialize(write, self._root, qnames, namespaces,
749 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000750
751 def write_c14n(self, file):
752 # lxml.etree compatibility. use output method instead
753 return self.write(file, method="c14n")
Armin Rigo9ed73062005-12-14 18:10:45 +0000754
755# --------------------------------------------------------------------
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000756# serialization support
757
Eli Bendersky00f402b2012-07-15 06:02:22 +0300758@contextlib.contextmanager
759def _get_writer(file_or_filename, encoding):
Ezio Melottib5bc3532013-08-17 16:11:40 +0300760 # returns text write method and release all resources after using
Eli Bendersky00f402b2012-07-15 06:02:22 +0300761 try:
762 write = file_or_filename.write
763 except AttributeError:
764 # file_or_filename is a file name
765 if encoding == "unicode":
766 file = open(file_or_filename, "w")
767 else:
768 file = open(file_or_filename, "w", encoding=encoding,
769 errors="xmlcharrefreplace")
770 with file:
771 yield file.write
772 else:
773 # file_or_filename is a file-like object
774 # encoding determines if it is a text or binary writer
775 if encoding == "unicode":
776 # use a text writer as is
777 yield write
778 else:
779 # wrap a binary writer with TextIOWrapper
780 with contextlib.ExitStack() as stack:
781 if isinstance(file_or_filename, io.BufferedIOBase):
782 file = file_or_filename
783 elif isinstance(file_or_filename, io.RawIOBase):
784 file = io.BufferedWriter(file_or_filename)
785 # Keep the original file open when the BufferedWriter is
786 # destroyed
787 stack.callback(file.detach)
788 else:
789 # This is to handle passed objects that aren't in the
790 # IOBase hierarchy, but just have a write method
791 file = io.BufferedIOBase()
792 file.writable = lambda: True
793 file.write = write
794 try:
795 # TextIOWrapper uses this methods to determine
796 # if BOM (for UTF-16, etc) should be added
797 file.seekable = file_or_filename.seekable
798 file.tell = file_or_filename.tell
799 except AttributeError:
800 pass
801 file = io.TextIOWrapper(file,
802 encoding=encoding,
803 errors="xmlcharrefreplace",
804 newline="\n")
805 # Keep the original file open when the TextIOWrapper is
806 # destroyed
807 stack.callback(file.detach)
808 yield file.write
809
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000810def _namespaces(elem, default_namespace=None):
811 # identify namespaces used in this tree
812
813 # maps qnames to *encoded* prefix:local names
814 qnames = {None: None}
815
816 # maps uri:s to prefixes
817 namespaces = {}
818 if default_namespace:
819 namespaces[default_namespace] = ""
820
821 def add_qname(qname):
822 # calculate serialized qname representation
823 try:
824 if qname[:1] == "{":
825 uri, tag = qname[1:].rsplit("}", 1)
826 prefix = namespaces.get(uri)
827 if prefix is None:
828 prefix = _namespace_map.get(uri)
829 if prefix is None:
830 prefix = "ns%d" % len(namespaces)
831 if prefix != "xml":
832 namespaces[uri] = prefix
833 if prefix:
834 qnames[qname] = "%s:%s" % (prefix, tag)
835 else:
836 qnames[qname] = tag # default element
837 else:
838 if default_namespace:
839 # FIXME: can this be handled in XML 1.0?
840 raise ValueError(
841 "cannot use non-qualified names with "
842 "default_namespace option"
843 )
844 qnames[qname] = qname
845 except TypeError:
846 _raise_serialization_error(qname)
847
848 # populate qname and namespaces table
Eli Bendersky64d11e62012-06-15 07:42:50 +0300849 for elem in elem.iter():
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000850 tag = elem.tag
Senthil Kumaranec30b3d2010-11-09 02:36:59 +0000851 if isinstance(tag, QName):
852 if tag.text not in qnames:
853 add_qname(tag.text)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000854 elif isinstance(tag, str):
855 if tag not in qnames:
856 add_qname(tag)
857 elif tag is not None and tag is not Comment and tag is not PI:
858 _raise_serialization_error(tag)
859 for key, value in elem.items():
860 if isinstance(key, QName):
861 key = key.text
862 if key not in qnames:
863 add_qname(key)
864 if isinstance(value, QName) and value.text not in qnames:
865 add_qname(value.text)
866 text = elem.text
867 if isinstance(text, QName) and text.text not in qnames:
868 add_qname(text.text)
869 return qnames, namespaces
870
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800871def _serialize_xml(write, elem, qnames, namespaces,
872 short_empty_elements, **kwargs):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000873 tag = elem.tag
874 text = elem.text
875 if tag is Comment:
876 write("<!--%s-->" % text)
877 elif tag is ProcessingInstruction:
878 write("<?%s?>" % text)
879 else:
880 tag = qnames[tag]
881 if tag is None:
882 if text:
883 write(_escape_cdata(text))
884 for e in elem:
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800885 _serialize_xml(write, e, qnames, None,
886 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000887 else:
888 write("<" + tag)
889 items = list(elem.items())
890 if items or namespaces:
891 if namespaces:
892 for v, k in sorted(namespaces.items(),
893 key=lambda x: x[1]): # sort on prefix
894 if k:
895 k = ":" + k
896 write(" xmlns%s=\"%s\"" % (
897 k,
898 _escape_attrib(v)
899 ))
Raymond Hettingere3685fd2018-10-28 11:18:22 -0700900 for k, v in items:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000901 if isinstance(k, QName):
902 k = k.text
903 if isinstance(v, QName):
904 v = qnames[v.text]
905 else:
906 v = _escape_attrib(v)
907 write(" %s=\"%s\"" % (qnames[k], v))
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800908 if text or len(elem) or not short_empty_elements:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000909 write(">")
910 if text:
911 write(_escape_cdata(text))
912 for e in elem:
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800913 _serialize_xml(write, e, qnames, None,
914 short_empty_elements=short_empty_elements)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000915 write("</" + tag + ">")
916 else:
917 write(" />")
918 if elem.tail:
919 write(_escape_cdata(elem.tail))
920
921HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
Ezio Melottic90111f2012-09-19 08:19:12 +0300922 "img", "input", "isindex", "link", "meta", "param")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000923
924try:
925 HTML_EMPTY = set(HTML_EMPTY)
926except NameError:
927 pass
928
Eli Benderskya9a2ef52013-01-13 06:04:43 -0800929def _serialize_html(write, elem, qnames, namespaces, **kwargs):
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000930 tag = elem.tag
931 text = elem.text
932 if tag is Comment:
933 write("<!--%s-->" % _escape_cdata(text))
934 elif tag is ProcessingInstruction:
935 write("<?%s?>" % _escape_cdata(text))
936 else:
937 tag = qnames[tag]
938 if tag is None:
939 if text:
940 write(_escape_cdata(text))
941 for e in elem:
942 _serialize_html(write, e, qnames, None)
943 else:
944 write("<" + tag)
945 items = list(elem.items())
946 if items or namespaces:
947 if namespaces:
948 for v, k in sorted(namespaces.items(),
949 key=lambda x: x[1]): # sort on prefix
950 if k:
951 k = ":" + k
952 write(" xmlns%s=\"%s\"" % (
953 k,
954 _escape_attrib(v)
955 ))
Serhiy Storchaka3b05ad72018-10-29 19:31:04 +0200956 for k, v in items:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000957 if isinstance(k, QName):
958 k = k.text
959 if isinstance(v, QName):
960 v = qnames[v.text]
961 else:
962 v = _escape_attrib_html(v)
963 # FIXME: handle boolean attributes
964 write(" %s=\"%s\"" % (qnames[k], v))
965 write(">")
Christian Heimes54ad7e32013-07-05 01:39:49 +0200966 ltag = tag.lower()
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000967 if text:
Christian Heimes54ad7e32013-07-05 01:39:49 +0200968 if ltag == "script" or ltag == "style":
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000969 write(text)
970 else:
971 write(_escape_cdata(text))
972 for e in elem:
973 _serialize_html(write, e, qnames, None)
Christian Heimes54ad7e32013-07-05 01:39:49 +0200974 if ltag not in HTML_EMPTY:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000975 write("</" + tag + ">")
976 if elem.tail:
977 write(_escape_cdata(elem.tail))
978
979def _serialize_text(write, elem):
980 for part in elem.itertext():
981 write(part)
982 if elem.tail:
983 write(elem.tail)
984
985_serialize = {
986 "xml": _serialize_xml,
987 "html": _serialize_html,
988 "text": _serialize_text,
989# this optional method is imported at the end of the module
990# "c14n": _serialize_c14n,
991}
Armin Rigo9ed73062005-12-14 18:10:45 +0000992
Armin Rigo9ed73062005-12-14 18:10:45 +0000993
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000994def register_namespace(prefix, uri):
Eli Bendersky84fae782013-03-09 07:12:48 -0800995 """Register a namespace prefix.
996
997 The registry is global, and any existing mapping for either the
998 given prefix or the namespace URI will be removed.
999
1000 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
1001 attributes in this namespace will be serialized with prefix if possible.
1002
1003 ValueError is raised if prefix is reserved or is invalid.
1004
1005 """
R David Murray44b548d2016-09-08 13:59:53 -04001006 if re.match(r"ns\d+$", prefix):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001007 raise ValueError("Prefix format reserved for internal use")
Georg Brandl90b20672010-12-28 10:38:33 +00001008 for k, v in list(_namespace_map.items()):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001009 if k == uri or v == prefix:
1010 del _namespace_map[k]
1011 _namespace_map[uri] = prefix
1012
1013_namespace_map = {
1014 # "well-known" namespace prefixes
1015 "http://www.w3.org/XML/1998/namespace": "xml",
1016 "http://www.w3.org/1999/xhtml": "html",
1017 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1018 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1019 # xml schema
1020 "http://www.w3.org/2001/XMLSchema": "xs",
1021 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1022 # dublin core
1023 "http://purl.org/dc/elements/1.1/": "dc",
1024}
Florent Xicluna16395052012-02-16 23:28:35 +01001025# For tests and troubleshooting
1026register_namespace._namespace_map = _namespace_map
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001027
1028def _raise_serialization_error(text):
1029 raise TypeError(
1030 "cannot serialize %r (type %s)" % (text, type(text).__name__)
1031 )
1032
1033def _escape_cdata(text):
1034 # escape character data
1035 try:
1036 # it's worth avoiding do-nothing calls for strings that are
Mike53f7a7c2017-12-14 14:04:53 +03001037 # shorter than 500 characters, or so. assume that's, by far,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001038 # the most common case in most applications.
1039 if "&" in text:
1040 text = text.replace("&", "&amp;")
1041 if "<" in text:
1042 text = text.replace("<", "&lt;")
1043 if ">" in text:
1044 text = text.replace(">", "&gt;")
1045 return text
1046 except (TypeError, AttributeError):
1047 _raise_serialization_error(text)
1048
1049def _escape_attrib(text):
1050 # escape attribute value
1051 try:
1052 if "&" in text:
1053 text = text.replace("&", "&amp;")
1054 if "<" in text:
1055 text = text.replace("<", "&lt;")
1056 if ">" in text:
1057 text = text.replace(">", "&gt;")
1058 if "\"" in text:
1059 text = text.replace("\"", "&quot;")
Raymond Hettinger076366c2016-09-11 23:18:03 -07001060 # The following business with carriage returns is to satisfy
Raymond Hettinger11fa3ff2016-09-11 23:23:24 -07001061 # Section 2.11 of the XML specification, stating that
Raymond Hettinger076366c2016-09-11 23:18:03 -07001062 # CR or CR LN should be replaced with just LN
1063 # http://www.w3.org/TR/REC-xml/#sec-line-ends
1064 if "\r\n" in text:
1065 text = text.replace("\r\n", "\n")
1066 if "\r" in text:
1067 text = text.replace("\r", "\n")
1068 #The following four lines are issue 17582
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001069 if "\n" in text:
1070 text = text.replace("\n", "&#10;")
Raymond Hettinger076366c2016-09-11 23:18:03 -07001071 if "\t" in text:
1072 text = text.replace("\t", "&#09;")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001073 return text
1074 except (TypeError, AttributeError):
1075 _raise_serialization_error(text)
1076
1077def _escape_attrib_html(text):
1078 # escape attribute value
1079 try:
1080 if "&" in text:
1081 text = text.replace("&", "&amp;")
1082 if ">" in text:
1083 text = text.replace(">", "&gt;")
1084 if "\"" in text:
1085 text = text.replace("\"", "&quot;")
1086 return text
1087 except (TypeError, AttributeError):
1088 _raise_serialization_error(text)
1089
1090# --------------------------------------------------------------------
1091
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001092def tostring(element, encoding=None, method=None, *,
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001093 xml_declaration=None, default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001094 short_empty_elements=True):
Eli Bendersky84fae782013-03-09 07:12:48 -08001095 """Generate string representation of XML element.
1096
1097 All subelements are included. If encoding is "unicode", a string
1098 is returned. Otherwise a bytestring is returned.
1099
1100 *element* is an Element instance, *encoding* is an optional output
1101 encoding defaulting to US-ASCII, *method* is an optional output which can
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001102 be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
1103 sets the default XML namespace (for "xmlns").
Eli Bendersky84fae782013-03-09 07:12:48 -08001104
1105 Returns an (optionally) encoded string containing the XML data.
1106
1107 """
Eli Bendersky00f402b2012-07-15 06:02:22 +03001108 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001109 ElementTree(element).write(stream, encoding,
1110 xml_declaration=xml_declaration,
1111 default_namespace=default_namespace,
1112 method=method,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001113 short_empty_elements=short_empty_elements)
Eli Bendersky00f402b2012-07-15 06:02:22 +03001114 return stream.getvalue()
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001115
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001116class _ListDataStream(io.BufferedIOBase):
Eli Bendersky84fae782013-03-09 07:12:48 -08001117 """An auxiliary stream accumulating into a list reference."""
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001118 def __init__(self, lst):
1119 self.lst = lst
Eli Benderskyf90fc682012-07-17 15:09:56 +03001120
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001121 def writable(self):
1122 return True
1123
1124 def seekable(self):
1125 return True
1126
1127 def write(self, b):
1128 self.lst.append(b)
1129
1130 def tell(self):
1131 return len(self.lst)
1132
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001133def tostringlist(element, encoding=None, method=None, *,
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001134 xml_declaration=None, default_namespace=None,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001135 short_empty_elements=True):
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001136 lst = []
1137 stream = _ListDataStream(lst)
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +02001138 ElementTree(element).write(stream, encoding,
1139 xml_declaration=xml_declaration,
1140 default_namespace=default_namespace,
1141 method=method,
Eli Benderskya9a2ef52013-01-13 06:04:43 -08001142 short_empty_elements=short_empty_elements)
Eli Bendersky43cc5f22012-07-17 15:09:12 +03001143 return lst
Armin Rigo9ed73062005-12-14 18:10:45 +00001144
Armin Rigo9ed73062005-12-14 18:10:45 +00001145
1146def dump(elem):
Eli Bendersky84fae782013-03-09 07:12:48 -08001147 """Write element tree or element structure to sys.stdout.
1148
1149 This function should be used for debugging only.
1150
1151 *elem* is either an ElementTree, or a single Element. The exact output
1152 format is implementation dependent. In this version, it's written as an
1153 ordinary XML file.
1154
1155 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001156 # debugging
1157 if not isinstance(elem, ElementTree):
1158 elem = ElementTree(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +00001159 elem.write(sys.stdout, encoding="unicode")
Armin Rigo9ed73062005-12-14 18:10:45 +00001160 tail = elem.getroot().tail
1161 if not tail or tail[-1] != "\n":
1162 sys.stdout.write("\n")
1163
Stefan Behnelb5d3cee2019-08-23 16:44:25 +02001164
1165def indent(tree, space=" ", level=0):
1166 """Indent an XML document by inserting newlines and indentation space
1167 after elements.
1168
1169 *tree* is the ElementTree or Element to modify. The (root) element
1170 itself will not be changed, but the tail text of all elements in its
1171 subtree will be adapted.
1172
1173 *space* is the whitespace to insert for each indentation level, two
1174 space characters by default.
1175
1176 *level* is the initial indentation level. Setting this to a higher
1177 value than 0 can be used for indenting subtrees that are more deeply
1178 nested inside of a document.
1179 """
1180 if isinstance(tree, ElementTree):
1181 tree = tree.getroot()
1182 if level < 0:
1183 raise ValueError(f"Initial indentation level must be >= 0, got {level}")
1184 if not len(tree):
1185 return
1186
1187 # Reduce the memory consumption by reusing indentation strings.
1188 indentations = ["\n" + level * space]
1189
1190 def _indent_children(elem, level):
1191 # Start a new indentation level for the first child.
1192 child_level = level + 1
1193 try:
1194 child_indentation = indentations[child_level]
1195 except IndexError:
1196 child_indentation = indentations[level] + space
1197 indentations.append(child_indentation)
1198
1199 if not elem.text or not elem.text.strip():
1200 elem.text = child_indentation
1201
1202 for child in elem:
1203 if len(child):
1204 _indent_children(child, child_level)
1205 if not child.tail or not child.tail.strip():
1206 child.tail = child_indentation
1207
1208 # Dedent after the last child by overwriting the previous indentation.
1209 if not child.tail.strip():
1210 child.tail = indentations[level]
1211
1212 _indent_children(tree, 0)
1213
1214
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001215# --------------------------------------------------------------------
1216# parsing
Armin Rigo9ed73062005-12-14 18:10:45 +00001217
Armin Rigo9ed73062005-12-14 18:10:45 +00001218
1219def parse(source, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001220 """Parse XML document into element tree.
1221
1222 *source* is a filename or file object containing XML data,
1223 *parser* is an optional parser instance defaulting to XMLParser.
1224
1225 Return an ElementTree instance.
1226
1227 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001228 tree = ElementTree()
1229 tree.parse(source, parser)
1230 return tree
1231
Armin Rigo9ed73062005-12-14 18:10:45 +00001232
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001233def iterparse(source, events=None, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001234 """Incrementally parse XML document into ElementTree.
1235
1236 This class also reports what's going on to the user based on the
1237 *events* it is initialized with. The supported events are the strings
1238 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1239 detailed namespace information). If *events* is omitted, only
1240 "end" events are reported.
1241
1242 *source* is a filename or file object containing XML data, *events* is
1243 a list of events to report back, *parser* is an optional parser instance.
1244
1245 Returns an iterator providing (event, elem) pairs.
1246
1247 """
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001248 # Use the internal, undocumented _parser argument for now; When the
1249 # parser argument of iterparse is removed, this can be killed.
1250 pullparser = XMLPullParser(events=events, _parser=parser)
1251 def iterator():
1252 try:
1253 while True:
1254 yield from pullparser.read_events()
1255 # load event buffer
1256 data = source.read(16 * 1024)
1257 if not data:
1258 break
1259 pullparser.feed(data)
1260 root = pullparser._close_and_return_root()
1261 yield from pullparser.read_events()
1262 it.root = root
1263 finally:
1264 if close_source:
1265 source.close()
1266
Serhiy Storchaka2e576f52017-04-24 09:05:00 +03001267 class IterParseIterator(collections.abc.Iterator):
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001268 __next__ = iterator().__next__
1269 it = IterParseIterator()
1270 it.root = None
1271 del iterator, IterParseIterator
1272
Antoine Pitroue033e062010-10-29 10:38:18 +00001273 close_source = False
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001274 if not hasattr(source, "read"):
1275 source = open(source, "rb")
Antoine Pitroue033e062010-10-29 10:38:18 +00001276 close_source = True
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001277
1278 return it
Armin Rigo9ed73062005-12-14 18:10:45 +00001279
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001280
Eli Benderskyb5869342013-08-30 05:51:20 -07001281class XMLPullParser:
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001282
Eli Benderskyb5869342013-08-30 05:51:20 -07001283 def __init__(self, events=None, *, _parser=None):
1284 # The _parser argument is for internal use only and must not be relied
1285 # upon in user code. It will be removed in a future release.
1286 # See http://bugs.python.org/issue17741 for more details.
1287
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001288 self._events_queue = collections.deque()
Eli Benderskyb5869342013-08-30 05:51:20 -07001289 self._parser = _parser or XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001290 # wire up the parser for event reporting
Armin Rigo9ed73062005-12-14 18:10:45 +00001291 if events is None:
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001292 events = ("end",)
1293 self._parser._setevents(self._events_queue, events)
1294
Eli Benderskyb5869342013-08-30 05:51:20 -07001295 def feed(self, data):
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001296 """Feed encoded data to parser."""
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001297 if self._parser is None:
Eli Benderskyb5869342013-08-30 05:51:20 -07001298 raise ValueError("feed() called after end of stream")
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001299 if data:
1300 try:
1301 self._parser.feed(data)
1302 except SyntaxError as exc:
1303 self._events_queue.append(exc)
1304
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001305 def _close_and_return_root(self):
1306 # iterparse needs this to set its root attribute properly :(
1307 root = self._parser.close()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001308 self._parser = None
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001309 return root
1310
1311 def close(self):
1312 """Finish feeding data to parser.
1313
1314 Unlike XMLParser, does not return the root element. Use
1315 read_events() to consume elements from XMLPullParser.
1316 """
1317 self._close_and_return_root()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001318
Eli Benderskyb5869342013-08-30 05:51:20 -07001319 def read_events(self):
R David Murray410d3202014-01-04 23:52:50 -05001320 """Return an iterator over currently available (event, elem) pairs.
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001321
1322 Events are consumed from the internal event queue as they are
1323 retrieved from the iterator.
1324 """
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001325 events = self._events_queue
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02001326 while events:
1327 event = events.popleft()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001328 if isinstance(event, Exception):
1329 raise event
1330 else:
1331 yield event
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001332
1333
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001334def XML(text, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001335 """Parse XML document from string constant.
1336
1337 This function can be used to embed "XML Literals" in Python code.
1338
1339 *text* is a string containing XML data, *parser* is an
1340 optional parser instance, defaulting to the standard XMLParser.
1341
1342 Returns an Element instance.
1343
1344 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001345 if not parser:
1346 parser = XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001347 parser.feed(text)
1348 return parser.close()
1349
Armin Rigo9ed73062005-12-14 18:10:45 +00001350
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001351def XMLID(text, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001352 """Parse XML document from string constant for its IDs.
1353
1354 *text* is a string containing XML data, *parser* is an
1355 optional parser instance, defaulting to the standard XMLParser.
1356
1357 Returns an (Element, dict) tuple, in which the
1358 dict maps element id:s to elements.
1359
1360 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001361 if not parser:
1362 parser = XMLParser(target=TreeBuilder())
Armin Rigo9ed73062005-12-14 18:10:45 +00001363 parser.feed(text)
1364 tree = parser.close()
1365 ids = {}
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001366 for elem in tree.iter():
Armin Rigo9ed73062005-12-14 18:10:45 +00001367 id = elem.get("id")
1368 if id:
1369 ids[id] = elem
1370 return tree, ids
1371
Victor Stinner765531d2013-03-26 01:11:54 +01001372# Parse XML document from string constant. Alias for XML().
Armin Rigo9ed73062005-12-14 18:10:45 +00001373fromstring = XML
Armin Rigo9ed73062005-12-14 18:10:45 +00001374
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001375def fromstringlist(sequence, parser=None):
Eli Bendersky84fae782013-03-09 07:12:48 -08001376 """Parse XML document from sequence of string fragments.
1377
1378 *sequence* is a list of other sequence, *parser* is an optional parser
1379 instance, defaulting to the standard XMLParser.
1380
1381 Returns an Element instance.
1382
1383 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001384 if not parser:
1385 parser = XMLParser(target=TreeBuilder())
1386 for text in sequence:
1387 parser.feed(text)
1388 return parser.close()
1389
1390# --------------------------------------------------------------------
Armin Rigo9ed73062005-12-14 18:10:45 +00001391
Armin Rigo9ed73062005-12-14 18:10:45 +00001392
1393class TreeBuilder:
Eli Bendersky84fae782013-03-09 07:12:48 -08001394 """Generic element structure builder.
Armin Rigo9ed73062005-12-14 18:10:45 +00001395
Eli Bendersky84fae782013-03-09 07:12:48 -08001396 This builder converts a sequence of start, data, and end method
1397 calls to a well-formed element structure.
1398
1399 You can use this class to build an element structure using a custom XML
1400 parser, or a parser for some other XML-like format.
1401
1402 *element_factory* is an optional element factory which is called
1403 to create new Element instances, as necessary.
1404
Stefan Behnel43851a22019-05-01 21:20:38 +02001405 *comment_factory* is a factory to create comments to be used instead of
1406 the standard factory. If *insert_comments* is false (the default),
1407 comments will not be inserted into the tree.
1408
1409 *pi_factory* is a factory to create processing instructions to be used
1410 instead of the standard factory. If *insert_pis* is false (the default),
1411 processing instructions will not be inserted into the tree.
Eli Bendersky84fae782013-03-09 07:12:48 -08001412 """
Stefan Behnel43851a22019-05-01 21:20:38 +02001413 def __init__(self, element_factory=None, *,
1414 comment_factory=None, pi_factory=None,
1415 insert_comments=False, insert_pis=False):
Armin Rigo9ed73062005-12-14 18:10:45 +00001416 self._data = [] # data collector
1417 self._elem = [] # element stack
1418 self._last = None # last element
Stefan Behnel43851a22019-05-01 21:20:38 +02001419 self._root = None # root element
Armin Rigo9ed73062005-12-14 18:10:45 +00001420 self._tail = None # true if we're after an end tag
Stefan Behnel43851a22019-05-01 21:20:38 +02001421 if comment_factory is None:
1422 comment_factory = Comment
1423 self._comment_factory = comment_factory
1424 self.insert_comments = insert_comments
1425 if pi_factory is None:
1426 pi_factory = ProcessingInstruction
1427 self._pi_factory = pi_factory
1428 self.insert_pis = insert_pis
Armin Rigo9ed73062005-12-14 18:10:45 +00001429 if element_factory is None:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001430 element_factory = Element
Armin Rigo9ed73062005-12-14 18:10:45 +00001431 self._factory = element_factory
1432
Armin Rigo9ed73062005-12-14 18:10:45 +00001433 def close(self):
Eli Bendersky84fae782013-03-09 07:12:48 -08001434 """Flush builder buffers and return toplevel document Element."""
Armin Rigo9ed73062005-12-14 18:10:45 +00001435 assert len(self._elem) == 0, "missing end tags"
Stefan Behnel43851a22019-05-01 21:20:38 +02001436 assert self._root is not None, "missing toplevel element"
1437 return self._root
Armin Rigo9ed73062005-12-14 18:10:45 +00001438
1439 def _flush(self):
1440 if self._data:
1441 if self._last is not None:
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001442 text = "".join(self._data)
Armin Rigo9ed73062005-12-14 18:10:45 +00001443 if self._tail:
1444 assert self._last.tail is None, "internal error (tail)"
1445 self._last.tail = text
1446 else:
1447 assert self._last.text is None, "internal error (text)"
1448 self._last.text = text
1449 self._data = []
1450
Armin Rigo9ed73062005-12-14 18:10:45 +00001451 def data(self, data):
Eli Bendersky84fae782013-03-09 07:12:48 -08001452 """Add text to current element."""
Armin Rigo9ed73062005-12-14 18:10:45 +00001453 self._data.append(data)
1454
Armin Rigo9ed73062005-12-14 18:10:45 +00001455 def start(self, tag, attrs):
Eli Bendersky84fae782013-03-09 07:12:48 -08001456 """Open new element and return it.
1457
1458 *tag* is the element name, *attrs* is a dict containing element
1459 attributes.
1460
1461 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001462 self._flush()
1463 self._last = elem = self._factory(tag, attrs)
1464 if self._elem:
1465 self._elem[-1].append(elem)
Stefan Behnel43851a22019-05-01 21:20:38 +02001466 elif self._root is None:
1467 self._root = elem
Armin Rigo9ed73062005-12-14 18:10:45 +00001468 self._elem.append(elem)
1469 self._tail = 0
1470 return elem
1471
Armin Rigo9ed73062005-12-14 18:10:45 +00001472 def end(self, tag):
Eli Bendersky84fae782013-03-09 07:12:48 -08001473 """Close and return current Element.
1474
1475 *tag* is the element name.
1476
1477 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001478 self._flush()
1479 self._last = self._elem.pop()
1480 assert self._last.tag == tag,\
1481 "end tag mismatch (expected %s, got %s)" % (
1482 self._last.tag, tag)
1483 self._tail = 1
1484 return self._last
1485
Stefan Behnel43851a22019-05-01 21:20:38 +02001486 def comment(self, text):
1487 """Create a comment using the comment_factory.
1488
1489 *text* is the text of the comment.
1490 """
1491 return self._handle_single(
1492 self._comment_factory, self.insert_comments, text)
1493
1494 def pi(self, target, text=None):
1495 """Create a processing instruction using the pi_factory.
1496
1497 *target* is the target name of the processing instruction.
1498 *text* is the data of the processing instruction, or ''.
1499 """
1500 return self._handle_single(
1501 self._pi_factory, self.insert_pis, target, text)
1502
1503 def _handle_single(self, factory, insert, *args):
1504 elem = factory(*args)
1505 if insert:
1506 self._flush()
1507 self._last = elem
1508 if self._elem:
1509 self._elem[-1].append(elem)
1510 self._tail = 1
1511 return elem
1512
Armin Rigo9ed73062005-12-14 18:10:45 +00001513
Eli Bendersky84fae782013-03-09 07:12:48 -08001514# also see ElementTree and TreeBuilder
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001515class XMLParser:
Eli Bendersky84fae782013-03-09 07:12:48 -08001516 """Element structure builder for XML source data based on the expat parser.
1517
Eli Bendersky84fae782013-03-09 07:12:48 -08001518 *target* is an optional target object which defaults to an instance of the
1519 standard TreeBuilder class, *encoding* is an optional encoding string
1520 which if given, overrides the encoding specified in the XML file:
1521 http://www.iana.org/assignments/character-sets
1522
1523 """
Armin Rigo9ed73062005-12-14 18:10:45 +00001524
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001525 def __init__(self, *, target=None, encoding=None):
Armin Rigo9ed73062005-12-14 18:10:45 +00001526 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001527 from xml.parsers import expat
Brett Cannoncd171c82013-07-04 17:43:24 -04001528 except ImportError:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001529 try:
1530 import pyexpat as expat
Brett Cannoncd171c82013-07-04 17:43:24 -04001531 except ImportError:
1532 raise ImportError(
1533 "No module named expat; use SimpleXMLTreeBuilder instead"
1534 )
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001535 parser = expat.ParserCreate(encoding, "}")
Armin Rigo9ed73062005-12-14 18:10:45 +00001536 if target is None:
1537 target = TreeBuilder()
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001538 # underscored names are provided for compatibility only
1539 self.parser = self._parser = parser
1540 self.target = self._target = target
1541 self._error = expat.error
Armin Rigo9ed73062005-12-14 18:10:45 +00001542 self._names = {} # name memo cache
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001543 # main callbacks
Armin Rigo9ed73062005-12-14 18:10:45 +00001544 parser.DefaultHandlerExpand = self._default
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001545 if hasattr(target, 'start'):
1546 parser.StartElementHandler = self._start
1547 if hasattr(target, 'end'):
1548 parser.EndElementHandler = self._end
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001549 if hasattr(target, 'start_ns'):
1550 parser.StartNamespaceDeclHandler = self._start_ns
1551 if hasattr(target, 'end_ns'):
1552 parser.EndNamespaceDeclHandler = self._end_ns
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001553 if hasattr(target, 'data'):
1554 parser.CharacterDataHandler = target.data
1555 # miscellaneous callbacks
1556 if hasattr(target, 'comment'):
1557 parser.CommentHandler = target.comment
1558 if hasattr(target, 'pi'):
1559 parser.ProcessingInstructionHandler = target.pi
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001560 # Configure pyexpat: buffering, new-style attribute handling.
1561 parser.buffer_text = 1
1562 parser.ordered_attributes = 1
1563 parser.specified_attributes = 1
Armin Rigo9ed73062005-12-14 18:10:45 +00001564 self._doctype = None
1565 self.entity = {}
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001566 try:
1567 self.version = "Expat %d.%d.%d" % expat.version_info
1568 except AttributeError:
1569 pass # unknown
1570
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001571 def _setevents(self, events_queue, events_to_report):
Eli Benderskyb5869342013-08-30 05:51:20 -07001572 # Internal API for XMLPullParser
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001573 # events_to_report: a list of events to report during parsing (same as
Eli Benderskyb5869342013-08-30 05:51:20 -07001574 # the *events* of XMLPullParser's constructor.
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001575 # events_queue: a list of actual parsing events that will be populated
1576 # by the underlying parser.
1577 #
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001578 parser = self._parser
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001579 append = events_queue.append
1580 for event_name in events_to_report:
1581 if event_name == "start":
Eli Benderskyc9f5ca22013-04-20 09:11:37 -07001582 parser.ordered_attributes = 1
1583 parser.specified_attributes = 1
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001584 def handler(tag, attrib_in, event=event_name, append=append,
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001585 start=self._start):
Eli Benderskyc9f5ca22013-04-20 09:11:37 -07001586 append((event, start(tag, attrib_in)))
1587 parser.StartElementHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001588 elif event_name == "end":
1589 def handler(tag, event=event_name, append=append,
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001590 end=self._end):
1591 append((event, end(tag)))
1592 parser.EndElementHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001593 elif event_name == "start-ns":
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001594 # TreeBuilder does not implement .start_ns()
1595 if hasattr(self.target, "start_ns"):
1596 def handler(prefix, uri, event=event_name, append=append,
1597 start_ns=self._start_ns):
1598 append((event, start_ns(prefix, uri)))
1599 else:
1600 def handler(prefix, uri, event=event_name, append=append):
1601 append((event, (prefix or '', uri or '')))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001602 parser.StartNamespaceDeclHandler = handler
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001603 elif event_name == "end-ns":
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001604 # TreeBuilder does not implement .end_ns()
1605 if hasattr(self.target, "end_ns"):
1606 def handler(prefix, event=event_name, append=append,
1607 end_ns=self._end_ns):
1608 append((event, end_ns(prefix)))
1609 else:
1610 def handler(prefix, event=event_name, append=append):
1611 append((event, None))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001612 parser.EndNamespaceDeclHandler = handler
Stefan Behnel43851a22019-05-01 21:20:38 +02001613 elif event_name == 'comment':
1614 def handler(text, event=event_name, append=append, self=self):
1615 append((event, self.target.comment(text)))
1616 parser.CommentHandler = handler
1617 elif event_name == 'pi':
1618 def handler(pi_target, data, event=event_name, append=append,
1619 self=self):
1620 append((event, self.target.pi(pi_target, data)))
1621 parser.ProcessingInstructionHandler = handler
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001622 else:
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001623 raise ValueError("unknown event %r" % event_name)
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001624
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001625 def _raiseerror(self, value):
1626 err = ParseError(value)
1627 err.code = value.code
1628 err.position = value.lineno, value.offset
1629 raise err
Armin Rigo9ed73062005-12-14 18:10:45 +00001630
Armin Rigo9ed73062005-12-14 18:10:45 +00001631 def _fixname(self, key):
1632 # expand qname, and convert name string to ascii, if possible
1633 try:
1634 name = self._names[key]
1635 except KeyError:
1636 name = key
1637 if "}" in name:
1638 name = "{" + name
Martin v. Löwisf30bb0e2007-07-28 11:40:46 +00001639 self._names[key] = name
Armin Rigo9ed73062005-12-14 18:10:45 +00001640 return name
1641
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001642 def _start_ns(self, prefix, uri):
1643 return self.target.start_ns(prefix or '', uri or '')
1644
1645 def _end_ns(self, prefix):
1646 return self.target.end_ns(prefix or '')
1647
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001648 def _start(self, tag, attr_list):
1649 # Handler for expat's StartElementHandler. Since ordered_attributes
1650 # is set, the attributes are reported as a list of alternating
1651 # attribute name,value.
Armin Rigo9ed73062005-12-14 18:10:45 +00001652 fixname = self._fixname
1653 tag = fixname(tag)
1654 attrib = {}
Eli Bendersky6206a7e2013-08-25 18:58:18 -07001655 if attr_list:
1656 for i in range(0, len(attr_list), 2):
1657 attrib[fixname(attr_list[i])] = attr_list[i+1]
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001658 return self.target.start(tag, attrib)
Armin Rigo9ed73062005-12-14 18:10:45 +00001659
Armin Rigo9ed73062005-12-14 18:10:45 +00001660 def _end(self, tag):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001661 return self.target.end(self._fixname(tag))
1662
Armin Rigo9ed73062005-12-14 18:10:45 +00001663 def _default(self, text):
1664 prefix = text[:1]
1665 if prefix == "&":
1666 # deal with undefined entities
1667 try:
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001668 data_handler = self.target.data
1669 except AttributeError:
1670 return
1671 try:
1672 data_handler(self.entity[text[1:-1]])
Armin Rigo9ed73062005-12-14 18:10:45 +00001673 except KeyError:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001674 from xml.parsers import expat
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001675 err = expat.error(
Armin Rigo9ed73062005-12-14 18:10:45 +00001676 "undefined entity %s: line %d, column %d" %
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001677 (text, self.parser.ErrorLineNumber,
1678 self.parser.ErrorColumnNumber)
Armin Rigo9ed73062005-12-14 18:10:45 +00001679 )
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001680 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001681 err.lineno = self.parser.ErrorLineNumber
1682 err.offset = self.parser.ErrorColumnNumber
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683 raise err
Armin Rigo9ed73062005-12-14 18:10:45 +00001684 elif prefix == "<" and text[:9] == "<!DOCTYPE":
1685 self._doctype = [] # inside a doctype declaration
1686 elif self._doctype is not None:
1687 # parse doctype contents
1688 if prefix == ">":
1689 self._doctype = None
1690 return
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001691 text = text.strip()
Armin Rigo9ed73062005-12-14 18:10:45 +00001692 if not text:
1693 return
1694 self._doctype.append(text)
1695 n = len(self._doctype)
1696 if n > 2:
1697 type = self._doctype[1]
1698 if type == "PUBLIC" and n == 4:
1699 name, type, pubid, system = self._doctype
Florent Xiclunaa1c974a2012-07-07 13:16:44 +02001700 if pubid:
1701 pubid = pubid[1:-1]
Armin Rigo9ed73062005-12-14 18:10:45 +00001702 elif type == "SYSTEM" and n == 3:
1703 name, type, system = self._doctype
1704 pubid = None
1705 else:
1706 return
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001707 if hasattr(self.target, "doctype"):
1708 self.target.doctype(name, pubid, system[1:-1])
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001709 elif hasattr(self, "doctype"):
1710 warnings.warn(
1711 "The doctype() method of XMLParser is ignored. "
1712 "Define doctype() method on the TreeBuilder target.",
1713 RuntimeWarning)
1714
Armin Rigo9ed73062005-12-14 18:10:45 +00001715 self._doctype = None
1716
Armin Rigo9ed73062005-12-14 18:10:45 +00001717 def feed(self, data):
Eli Bendersky84fae782013-03-09 07:12:48 -08001718 """Feed encoded data to parser."""
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001719 try:
Serhiy Storchakaeb897462019-09-01 12:11:43 +03001720 self.parser.Parse(data, False)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001721 except self._error as v:
1722 self._raiseerror(v)
Armin Rigo9ed73062005-12-14 18:10:45 +00001723
Armin Rigo9ed73062005-12-14 18:10:45 +00001724 def close(self):
Eli Bendersky84fae782013-03-09 07:12:48 -08001725 """Finish feeding data to parser and return element structure."""
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726 try:
Serhiy Storchakaeb897462019-09-01 12:11:43 +03001727 self.parser.Parse(b"", True) # end of data
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001728 except self._error as v:
1729 self._raiseerror(v)
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001730 try:
Florent Xiclunafb067462012-03-05 11:42:49 +01001731 close_handler = self.target.close
1732 except AttributeError:
1733 pass
1734 else:
1735 return close_handler()
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001736 finally:
1737 # get rid of circular references
1738 del self.parser, self._parser
1739 del self.target, self._target
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001740
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01001741
Stefan Behnele1d5dd62019-05-01 22:34:13 +02001742# --------------------------------------------------------------------
1743# C14N 2.0
1744
1745def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
1746 """Convert XML to its C14N 2.0 serialised form.
1747
1748 If *out* is provided, it must be a file or file-like object that receives
1749 the serialised canonical XML output (text, not bytes) through its ``.write()``
1750 method. To write to a file, open it in text mode with encoding "utf-8".
1751 If *out* is not provided, this function returns the output as text string.
1752
1753 Either *xml_data* (an XML string) or *from_file* (a file path or
1754 file-like object) must be provided as input.
1755
1756 The configuration options are the same as for the ``C14NWriterTarget``.
1757 """
1758 if xml_data is None and from_file is None:
1759 raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
1760 sio = None
1761 if out is None:
1762 sio = out = io.StringIO()
1763
1764 parser = XMLParser(target=C14NWriterTarget(out.write, **options))
1765
1766 if xml_data is not None:
1767 parser.feed(xml_data)
1768 parser.close()
1769 elif from_file is not None:
1770 parse(from_file, parser=parser)
1771
1772 return sio.getvalue() if sio is not None else None
1773
1774
1775_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
1776
1777
1778class C14NWriterTarget:
1779 """
1780 Canonicalization writer target for the XMLParser.
1781
1782 Serialises parse events to XML C14N 2.0.
1783
1784 The *write* function is used for writing out the resulting data stream
1785 as text (not bytes). To write to a file, open it in text mode with encoding
1786 "utf-8" and pass its ``.write`` method.
1787
1788 Configuration options:
1789
1790 - *with_comments*: set to true to include comments
1791 - *strip_text*: set to true to strip whitespace before and after text content
1792 - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
1793 - *qname_aware_tags*: a set of qname aware tag names in which prefixes
1794 should be replaced in text content
1795 - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
1796 should be replaced in text content
1797 - *exclude_attrs*: a set of attribute names that should not be serialised
1798 - *exclude_tags*: a set of tag names that should not be serialised
1799 """
1800 def __init__(self, write, *,
1801 with_comments=False, strip_text=False, rewrite_prefixes=False,
1802 qname_aware_tags=None, qname_aware_attrs=None,
1803 exclude_attrs=None, exclude_tags=None):
1804 self._write = write
1805 self._data = []
1806 self._with_comments = with_comments
1807 self._strip_text = strip_text
1808 self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
1809 self._exclude_tags = set(exclude_tags) if exclude_tags else None
1810
1811 self._rewrite_prefixes = rewrite_prefixes
1812 if qname_aware_tags:
1813 self._qname_aware_tags = set(qname_aware_tags)
1814 else:
1815 self._qname_aware_tags = None
1816 if qname_aware_attrs:
1817 self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
1818 else:
1819 self._find_qname_aware_attrs = None
1820
1821 # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
1822 self._declared_ns_stack = [[
1823 ("http://www.w3.org/XML/1998/namespace", "xml"),
1824 ]]
1825 # Stack with user declared namespace prefixes as (uri, prefix) pairs.
1826 self._ns_stack = []
1827 if not rewrite_prefixes:
1828 self._ns_stack.append(list(_namespace_map.items()))
1829 self._ns_stack.append([])
1830 self._prefix_map = {}
1831 self._preserve_space = [False]
1832 self._pending_start = None
1833 self._root_seen = False
1834 self._root_done = False
1835 self._ignored_depth = 0
1836
1837 def _iter_namespaces(self, ns_stack, _reversed=reversed):
1838 for namespaces in _reversed(ns_stack):
1839 if namespaces: # almost no element declares new namespaces
1840 yield from namespaces
1841
1842 def _resolve_prefix_name(self, prefixed_name):
1843 prefix, name = prefixed_name.split(':', 1)
1844 for uri, p in self._iter_namespaces(self._ns_stack):
1845 if p == prefix:
1846 return f'{{{uri}}}{name}'
1847 raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
1848
1849 def _qname(self, qname, uri=None):
1850 if uri is None:
1851 uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
1852 else:
1853 tag = qname
1854
1855 prefixes_seen = set()
1856 for u, prefix in self._iter_namespaces(self._declared_ns_stack):
1857 if u == uri and prefix not in prefixes_seen:
1858 return f'{prefix}:{tag}' if prefix else tag, tag, uri
1859 prefixes_seen.add(prefix)
1860
1861 # Not declared yet => add new declaration.
1862 if self._rewrite_prefixes:
1863 if uri in self._prefix_map:
1864 prefix = self._prefix_map[uri]
1865 else:
1866 prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
1867 self._declared_ns_stack[-1].append((uri, prefix))
1868 return f'{prefix}:{tag}', tag, uri
1869
1870 if not uri and '' not in prefixes_seen:
1871 # No default namespace declared => no prefix needed.
1872 return tag, tag, uri
1873
1874 for u, prefix in self._iter_namespaces(self._ns_stack):
1875 if u == uri:
1876 self._declared_ns_stack[-1].append((uri, prefix))
1877 return f'{prefix}:{tag}' if prefix else tag, tag, uri
1878
1879 raise ValueError(f'Namespace "{uri}" is not declared in scope')
1880
1881 def data(self, data):
1882 if not self._ignored_depth:
1883 self._data.append(data)
1884
1885 def _flush(self, _join_text=''.join):
1886 data = _join_text(self._data)
1887 del self._data[:]
1888 if self._strip_text and not self._preserve_space[-1]:
1889 data = data.strip()
1890 if self._pending_start is not None:
1891 args, self._pending_start = self._pending_start, None
1892 qname_text = data if data and _looks_like_prefix_name(data) else None
1893 self._start(*args, qname_text)
1894 if qname_text is not None:
1895 return
1896 if data and self._root_seen:
1897 self._write(_escape_cdata_c14n(data))
1898
1899 def start_ns(self, prefix, uri):
1900 if self._ignored_depth:
1901 return
1902 # we may have to resolve qnames in text content
1903 if self._data:
1904 self._flush()
1905 self._ns_stack[-1].append((uri, prefix))
1906
1907 def start(self, tag, attrs):
1908 if self._exclude_tags is not None and (
1909 self._ignored_depth or tag in self._exclude_tags):
1910 self._ignored_depth += 1
1911 return
1912 if self._data:
1913 self._flush()
1914
1915 new_namespaces = []
1916 self._declared_ns_stack.append(new_namespaces)
1917
1918 if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
1919 # Need to parse text first to see if it requires a prefix declaration.
1920 self._pending_start = (tag, attrs, new_namespaces)
1921 return
1922 self._start(tag, attrs, new_namespaces)
1923
1924 def _start(self, tag, attrs, new_namespaces, qname_text=None):
1925 if self._exclude_attrs is not None and attrs:
1926 attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
1927
1928 qnames = {tag, *attrs}
1929 resolved_names = {}
1930
1931 # Resolve prefixes in attribute and tag text.
1932 if qname_text is not None:
1933 qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
1934 qnames.add(qname)
1935 if self._find_qname_aware_attrs is not None and attrs:
1936 qattrs = self._find_qname_aware_attrs(attrs)
1937 if qattrs:
1938 for attr_name in qattrs:
1939 value = attrs[attr_name]
1940 if _looks_like_prefix_name(value):
1941 qname = resolved_names[value] = self._resolve_prefix_name(value)
1942 qnames.add(qname)
1943 else:
1944 qattrs = None
1945 else:
1946 qattrs = None
1947
1948 # Assign prefixes in lexicographical order of used URIs.
1949 parse_qname = self._qname
1950 parsed_qnames = {n: parse_qname(n) for n in sorted(
1951 qnames, key=lambda n: n.split('}', 1))}
1952
1953 # Write namespace declarations in prefix order ...
1954 if new_namespaces:
1955 attr_list = [
1956 ('xmlns:' + prefix if prefix else 'xmlns', uri)
1957 for uri, prefix in new_namespaces
1958 ]
1959 attr_list.sort()
1960 else:
1961 # almost always empty
1962 attr_list = []
1963
1964 # ... followed by attributes in URI+name order
1965 if attrs:
1966 for k, v in sorted(attrs.items()):
1967 if qattrs is not None and k in qattrs and v in resolved_names:
1968 v = parsed_qnames[resolved_names[v]][0]
1969 attr_qname, attr_name, uri = parsed_qnames[k]
1970 # No prefix for attributes in default ('') namespace.
1971 attr_list.append((attr_qname if uri else attr_name, v))
1972
1973 # Honour xml:space attributes.
1974 space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
1975 self._preserve_space.append(
1976 space_behaviour == 'preserve' if space_behaviour
1977 else self._preserve_space[-1])
1978
1979 # Write the tag.
1980 write = self._write
1981 write('<' + parsed_qnames[tag][0])
1982 if attr_list:
1983 write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
1984 write('>')
1985
1986 # Write the resolved qname text content.
1987 if qname_text is not None:
1988 write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
1989
1990 self._root_seen = True
1991 self._ns_stack.append([])
1992
1993 def end(self, tag):
1994 if self._ignored_depth:
1995 self._ignored_depth -= 1
1996 return
1997 if self._data:
1998 self._flush()
1999 self._write(f'</{self._qname(tag)[0]}>')
2000 self._preserve_space.pop()
2001 self._root_done = len(self._preserve_space) == 1
2002 self._declared_ns_stack.pop()
2003 self._ns_stack.pop()
2004
2005 def comment(self, text):
2006 if not self._with_comments:
2007 return
2008 if self._ignored_depth:
2009 return
2010 if self._root_done:
2011 self._write('\n')
2012 elif self._root_seen and self._data:
2013 self._flush()
2014 self._write(f'<!--{_escape_cdata_c14n(text)}-->')
2015 if not self._root_seen:
2016 self._write('\n')
2017
2018 def pi(self, target, data):
2019 if self._ignored_depth:
2020 return
2021 if self._root_done:
2022 self._write('\n')
2023 elif self._root_seen and self._data:
2024 self._flush()
2025 self._write(
2026 f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
2027 if not self._root_seen:
2028 self._write('\n')
2029
2030
2031def _escape_cdata_c14n(text):
2032 # escape character data
2033 try:
2034 # it's worth avoiding do-nothing calls for strings that are
2035 # shorter than 500 character, or so. assume that's, by far,
2036 # the most common case in most applications.
2037 if '&' in text:
2038 text = text.replace('&', '&amp;')
2039 if '<' in text:
2040 text = text.replace('<', '&lt;')
2041 if '>' in text:
2042 text = text.replace('>', '&gt;')
2043 if '\r' in text:
2044 text = text.replace('\r', '&#xD;')
2045 return text
2046 except (TypeError, AttributeError):
2047 _raise_serialization_error(text)
2048
2049
2050def _escape_attrib_c14n(text):
2051 # escape attribute value
2052 try:
2053 if '&' in text:
2054 text = text.replace('&', '&amp;')
2055 if '<' in text:
2056 text = text.replace('<', '&lt;')
2057 if '"' in text:
2058 text = text.replace('"', '&quot;')
2059 if '\t' in text:
2060 text = text.replace('\t', '&#x9;')
2061 if '\n' in text:
2062 text = text.replace('\n', '&#xA;')
2063 if '\r' in text:
2064 text = text.replace('\r', '&#xD;')
2065 return text
2066 except (TypeError, AttributeError):
2067 _raise_serialization_error(text)
2068
2069
2070# --------------------------------------------------------------------
2071
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002072# Import the C accelerators
2073try:
Eli Bendersky46955b22013-05-19 09:20:50 -07002074 # Element is going to be shadowed by the C implementation. We need to keep
2075 # the Python version of it accessible for some "creative" by external code
2076 # (see tests)
2077 _Element_Py = Element
2078
Stefan Behnel43851a22019-05-01 21:20:38 +02002079 # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01002080 from _elementtree import *
Stefan Behnel43851a22019-05-01 21:20:38 +02002081 from _elementtree import _set_factories
Eli Benderskyc4e98a62013-05-19 09:24:43 -07002082except ImportError:
2083 pass
Stefan Behnel43851a22019-05-01 21:20:38 +02002084else:
2085 _set_factories(Comment, ProcessingInstruction)