Blame - Lib/xml/etree/ElementTree.py - platform/external/python/cpython3

blob: 641d787dab3e3f0d531000811b8e583ecf41d1e6 [file] [log] [blame]

Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1	#
				2	# ElementTree
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	3	# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	4	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	5	# light-weight XML support for Python 2.3 and later.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	6	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	7	# history (since 1.2.6):
				8	# 2005-11-12 fl added tostringlist/fromstringlist helpers
				9	# 2006-07-05 fl merged in selected changes from the 1.3 sandbox
				10	# 2006-07-05 fl removed support for 2.1 and earlier
				11	# 2007-06-21 fl added deprecation/future warnings
				12	# 2007-08-25 fl added doctype hook, added parser version attribute etc
				13	# 2007-08-26 fl added new serializer code (better namespace handling, etc)
				14	# 2007-08-27 fl warn for broken /tag searches on tree level
				15	# 2007-09-02 fl added html/text methods to serializer (experimental)
				16	# 2007-09-05 fl added method argument to tostring/tostringlist
				17	# 2007-09-06 fl improved error handling
				18	# 2007-09-13 fl added itertext, iterfind; assorted cleanups
				19	# 2007-12-15 fl added C14N hooks, copy method (experimental)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	20	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	21	# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	22	#
				23	# fredrik@pythonware.com
				24	# http://www.pythonware.com
				25	#
				26	# --------------------------------------------------------------------
				27	# The ElementTree toolkit is
				28	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	29	# Copyright (c) 1999-2008 by Fredrik Lundh
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	30	#
				31	# By obtaining, using, and/or copying this software and/or its
				32	# associated documentation, you agree that you have read, understood,
				33	# and will comply with the following terms and conditions:
				34	#
				35	# Permission to use, copy, modify, and distribute this software and
				36	# its associated documentation for any purpose and without fee is
				37	# hereby granted, provided that the above copyright notice appears in
				38	# all copies, and that both that copyright notice and this permission
				39	# notice appear in supporting documentation, and that the name of
				40	# Secret Labs AB or the author not be used in advertising or publicity
				41	# pertaining to distribution of the software without specific, written
				42	# prior permission.
				43	#
				44	# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
				45	# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
				46	# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
				47	# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
				48	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
				49	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
				50	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
				51	# OF THIS SOFTWARE.
				52	# --------------------------------------------------------------------
				53
Fredrik Lundh	63168a5	2005-12-14 22:29:34 +0000	[diff] [blame]	54	# Licensed to PSF under a Contributor Agreement.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	55	# See http://www.python.org/psf/license for licensing details.
Fredrik Lundh	63168a5	2005-12-14 22:29:34 +0000	[diff] [blame]	56
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	57	__all__ = [
				58	# public symbols
				59	"Comment",
				60	"dump",
				61	"Element", "ElementTree",
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	62	"fromstring", "fromstringlist",
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	63	"iselement", "iterparse",
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	64	"parse", "ParseError",
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	65	"PI", "ProcessingInstruction",
				66	"QName",
				67	"SubElement",
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	68	"tostring", "tostringlist",
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	69	"TreeBuilder",
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	70	"VERSION",
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	71	"XML", "XMLID",
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	72	"XMLParser", "XMLTreeBuilder",
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	73	"register_namespace",
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	74	]
				75
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	76	VERSION = "1.3.0"
				77
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	78	##
				79	# The <b>Element</b> type is a flexible container object, designed to
				80	# store hierarchical data structures in memory. The type can be
				81	# described as a cross between a list and a dictionary.
				82	# <p>
				83	# Each element has a number of properties associated with it:
				84	# <ul>
				85	# <li>a <i>tag</i>. This is a string identifying what kind of data
				86	# this element represents (the element type, in other words).</li>
				87	# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
				88	# <li>a <i>text</i> string.</li>
				89	# <li>an optional <i>tail</i> string.</li>
				90	# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
				91	# </ul>
				92	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	93	# To create an element instance, use the {@link #Element} constructor
				94	# or the {@link #SubElement} factory function.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	95	# <p>
				96	# The {@link #ElementTree} class can be used to wrap an element
				97	# structure, and convert it from and to XML.
				98	##
				99
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	100	import sys
				101	import re
				102	import warnings
Eli Bendersky	00f402b	2012-07-15 06:02:22 +0300	[diff] [blame]	103	import io
				104	import contextlib
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	105
Eli Bendersky	27cbb19	2012-06-15 09:03:19 +0300	[diff] [blame]	106	from . import ElementPath
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	107
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	108
				109	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	110	# Parser error. This is a subclass of <b>SyntaxError</b>.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	111	# <p>
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	112	# In addition to the exception value, an exception instance contains a
				113	# specific exception code in the <b>code</b> attribute, and the line and
				114	# column of the error in the <b>position</b> attribute.
				115
				116	class ParseError(SyntaxError):
				117	pass
				118
				119	# --------------------------------------------------------------------
				120
				121	##
				122	# Checks if an object appears to be a valid element object.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	123	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	124	# @param An element instance.
				125	# @return A true value if this is an element object.
				126	# @defreturn flag
				127
				128	def iselement(element):
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	129	# FIXME: not sure about this;
				130	# isinstance(element, Element) or look for tag/attrib/text attributes
				131	return hasattr(element, 'tag')
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	132
				133	##
				134	# Element class. This class defines the Element interface, and
				135	# provides a reference implementation of this interface.
				136	# <p>
				137	# The element name, attribute names, and attribute values can be
				138	# either ASCII strings (ordinary Python strings containing only 7-bit
				139	# ASCII characters) or Unicode strings.
				140	#
				141	# @param tag The element name.
				142	# @param attrib An optional dictionary, containing element attributes.
				143	# @param **extra Additional attributes, given as keyword arguments.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	144	# @see Element
				145	# @see SubElement
				146	# @see Comment
				147	# @see ProcessingInstruction
				148
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	149	class Element:
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	150	# <tag attrib>text<child/>...</tag>tail
				151
				152	##
				153	# (Attribute) Element tag.
				154
				155	tag = None
				156
				157	##
				158	# (Attribute) Element attribute dictionary. Where possible, use
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	159	# {@link #Element.get},
				160	# {@link #Element.set},
				161	# {@link #Element.keys}, and
				162	# {@link #Element.items} to access
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	163	# element attributes.
				164
				165	attrib = None
				166
				167	##
				168	# (Attribute) Text before first subelement. This is either a
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	169	# string or the value None. Note that if there was no text, this
				170	# attribute may be either None or an empty string, depending on
				171	# the parser.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	172
				173	text = None
				174
				175	##
				176	# (Attribute) Text after this element's end tag, but before the
				177	# next sibling element's start tag. This is either a string or
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	178	# the value None. Note that if there was no text, this attribute
				179	# may be either None or an empty string, depending on the parser.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	180
				181	tail = None # text after end tag, if any
				182
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	183	# constructor
				184
				185	def __init__(self, tag, attrib={}, **extra):
Eli Bendersky	737b173	2012-05-29 06:02:56 +0300	[diff] [blame]	186	if not isinstance(attrib, dict):
				187	raise TypeError("attrib must be dict, not %s" % (
				188	attrib.__class__.__name__,))
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	189	attrib = attrib.copy()
				190	attrib.update(extra)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	191	self.tag = tag
				192	self.attrib = attrib
				193	self._children = []
				194
				195	def __repr__(self):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	196	return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	197
				198	##
				199	# Creates a new element object of the same type as this element.
				200	#
				201	# @param tag Element tag.
				202	# @param attrib Element attributes, given as a dictionary.
				203	# @return A new element instance.
				204
				205	def makeelement(self, tag, attrib):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	206	return self.__class__(tag, attrib)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	207
				208	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	209	# (Experimental) Copies the current element. This creates a
				210	# shallow copy; subelements will be shared with the original tree.
				211	#
				212	# @return A new element instance.
				213
				214	def copy(self):
				215	elem = self.makeelement(self.tag, self.attrib)
				216	elem.text = self.text
				217	elem.tail = self.tail
				218	elem[:] = self
				219	return elem
				220
				221	##
				222	# Returns the number of subelements. Note that this only counts
				223	# full elements; to check if there's any content in an element, you
				224	# have to check both the length and the <b>text</b> attribute.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	225	#
				226	# @return The number of subelements.
				227
				228	def __len__(self):
				229	return len(self._children)
				230
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	231	def __bool__(self):
				232	warnings.warn(
				233	"The behavior of this method will change in future versions. "
				234	"Use specific 'len(elem)' or 'elem is not None' test instead.",
				235	FutureWarning, stacklevel=2
				236	)
				237	return len(self._children) != 0 # emulate old behaviour, for now
				238
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	239	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	240	# Returns the given subelement, by index.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	241	#
				242	# @param index What subelement to return.
				243	# @return The given subelement.
				244	# @exception IndexError If the given element does not exist.
				245
				246	def __getitem__(self, index):
				247	return self._children[index]
				248
				249	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	250	# Replaces the given subelement, by index.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	251	#
				252	# @param index What subelement to replace.
				253	# @param element The new element value.
				254	# @exception IndexError If the given element does not exist.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	255
				256	def __setitem__(self, index, element):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	257	# if isinstance(index, slice):
				258	# for elt in element:
				259	# assert iselement(elt)
				260	# else:
				261	# assert iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	262	self._children[index] = element
				263
				264	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	265	# Deletes the given subelement, by index.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	266	#
				267	# @param index What subelement to delete.
				268	# @exception IndexError If the given element does not exist.
				269
				270	def __delitem__(self, index):
				271	del self._children[index]
				272
				273	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	274	# Adds a subelement to the end of this element. In document order,
				275	# the new element will appear after the last existing subelement (or
				276	# directly after the text, if it's the first subelement), but before
				277	# the end tag for this element.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	278	#
				279	# @param element The element to add.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	280
				281	def append(self, element):
Eli Bendersky	396e8fc	2012-03-23 14:24:20 +0200	[diff] [blame]	282	self._assert_is_element(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	283	self._children.append(element)
				284
				285	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	286	# Appends subelements from a sequence.
				287	#
				288	# @param elements A sequence object with zero or more elements.
				289	# @since 1.3
				290
				291	def extend(self, elements):
Eli Bendersky	396e8fc	2012-03-23 14:24:20 +0200	[diff] [blame]	292	for element in elements:
				293	self._assert_is_element(element)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	294	self._children.extend(elements)
				295
				296	##
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	297	# Inserts a subelement at the given position in this element.
				298	#
				299	# @param index Where to insert the new subelement.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	300
				301	def insert(self, index, element):
Eli Bendersky	396e8fc	2012-03-23 14:24:20 +0200	[diff] [blame]	302	self._assert_is_element(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	303	self._children.insert(index, element)
				304
Eli Bendersky	396e8fc	2012-03-23 14:24:20 +0200	[diff] [blame]	305	def _assert_is_element(self, e):
Antoine Pitrou	ee32931	2012-10-04 19:53:29 +0200	[diff] [blame]	306	# Need to refer to the actual Python implementation, not the
				307	# shadowing C implementation.
				308	if not isinstance(e, _Element):
Eli Bendersky	396e8fc	2012-03-23 14:24:20 +0200	[diff] [blame]	309	raise TypeError('expected an Element, not %s' % type(e).__name__)
				310
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	311	##
				312	# Removes a matching subelement. Unlike the <b>find</b> methods,
				313	# this method compares elements based on identity, not on tag
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	314	# value or contents. To remove subelements by other means, the
				315	# easiest way is often to use a list comprehension to select what
				316	# elements to keep, and use slice assignment to update the parent
				317	# element.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	318	#
				319	# @param element What element to remove.
				320	# @exception ValueError If a matching element could not be found.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	321
				322	def remove(self, element):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	323	# assert iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	324	self._children.remove(element)
				325
				326	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	327	# (Deprecated) Returns all subelements. The elements are returned
				328	# in document order.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	329	#
				330	# @return A list of subelements.
				331	# @defreturn list of Element instances
				332
				333	def getchildren(self):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	334	warnings.warn(
				335	"This method will be removed in future versions. "
				336	"Use 'list(elem)' or iteration over elem instead.",
				337	DeprecationWarning, stacklevel=2
				338	)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	339	return self._children
				340
				341	##
				342	# Finds the first matching subelement, by tag name or path.
				343	#
				344	# @param path What element to look for.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	345	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	346	# @return The first matching element, or None if no element was found.
				347	# @defreturn Element or None
				348
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	349	def find(self, path, namespaces=None):
				350	return ElementPath.find(self, path, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	351
				352	##
				353	# Finds text for the first matching subelement, by tag name or path.
				354	#
				355	# @param path What element to look for.
				356	# @param default What to return if the element was not found.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	357	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	358	# @return The text content of the first matching element, or the
				359	# default value no element was found. Note that if the element
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	360	# is found, but has no text content, this method returns an
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	361	# empty string.
				362	# @defreturn string
				363
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	364	def findtext(self, path, default=None, namespaces=None):
				365	return ElementPath.findtext(self, path, default, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	366
				367	##
				368	# Finds all matching subelements, by tag name or path.
				369	#
				370	# @param path What element to look for.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	371	# @keyparam namespaces Optional namespace prefix map.
				372	# @return A list or other sequence containing all matching elements,
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	373	# in document order.
				374	# @defreturn list of Element instances
				375
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	376	def findall(self, path, namespaces=None):
				377	return ElementPath.findall(self, path, namespaces)
				378
				379	##
				380	# Finds all matching subelements, by tag name or path.
				381	#
				382	# @param path What element to look for.
				383	# @keyparam namespaces Optional namespace prefix map.
				384	# @return An iterator or sequence containing all matching elements,
				385	# in document order.
				386	# @defreturn a generated sequence of Element instances
				387
				388	def iterfind(self, path, namespaces=None):
				389	return ElementPath.iterfind(self, path, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	390
				391	##
				392	# Resets an element. This function removes all subelements, clears
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	393	# all attributes, and sets the <b>text</b> and <b>tail</b> attributes
				394	# to None.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	395
				396	def clear(self):
				397	self.attrib.clear()
				398	self._children = []
				399	self.text = self.tail = None
				400
				401	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	402	# Gets an element attribute. Equivalent to <b>attrib.get</b>, but
				403	# some implementations may handle this a bit more efficiently.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	404	#
				405	# @param key What attribute to look for.
				406	# @param default What to return if the attribute was not found.
				407	# @return The attribute value, or the default value, if the
				408	# attribute was not found.
				409	# @defreturn string or None
				410
				411	def get(self, key, default=None):
				412	return self.attrib.get(key, default)
				413
				414	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	415	# Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
				416	# but some implementations may handle this a bit more efficiently.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	417	#
				418	# @param key What attribute to set.
				419	# @param value The attribute value.
				420
				421	def set(self, key, value):
				422	self.attrib[key] = value
				423
				424	##
				425	# Gets a list of attribute names. The names are returned in an
				426	# arbitrary order (just like for an ordinary Python dictionary).
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	427	# Equivalent to <b>attrib.keys()</b>.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	428	#
				429	# @return A list of element attribute names.
				430	# @defreturn list of strings
				431
				432	def keys(self):
				433	return self.attrib.keys()
				434
				435	##
				436	# Gets element attributes, as a sequence. The attributes are
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	437	# returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	438	#
				439	# @return A list of (name, value) tuples for all attributes.
				440	# @defreturn list of (string, string) tuples
				441
				442	def items(self):
				443	return self.attrib.items()
				444
				445	##
				446	# Creates a tree iterator. The iterator loops over this element
				447	# and all subelements, in document order, and returns all elements
				448	# with a matching tag.
				449	# <p>
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	450	# If the tree structure is modified during iteration, new or removed
				451	# elements may or may not be included. To get a stable set, use the
				452	# list() function on the iterator, and loop over the resulting list.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	453	#
				454	# @param tag What tags to look for (default is to return all elements).
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	455	# @return An iterator containing all the matching elements.
				456	# @defreturn iterator
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	457
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	458	def iter(self, tag=None):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	459	if tag == "*":
				460	tag = None
				461	if tag is None or self.tag == tag:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	462	yield self
				463	for e in self._children:
				464	for e in e.iter(tag):
				465	yield e
				466
				467	# compatibility
				468	def getiterator(self, tag=None):
				469	# Change for a DeprecationWarning in 1.4
				470	warnings.warn(
				471	"This method will be removed in future versions. "
				472	"Use 'elem.iter()' or 'list(elem.iter())' instead.",
				473	PendingDeprecationWarning, stacklevel=2
				474	)
				475	return list(self.iter(tag))
				476
				477	##
				478	# Creates a text iterator. The iterator loops over this element
				479	# and all subelements, in document order, and returns all inner
				480	# text.
				481	#
				482	# @return An iterator containing all inner text.
				483	# @defreturn iterator
				484
				485	def itertext(self):
				486	tag = self.tag
				487	if not isinstance(tag, str) and tag is not None:
				488	return
				489	if self.text:
				490	yield self.text
				491	for e in self:
				492	for s in e.itertext():
				493	yield s
				494	if e.tail:
				495	yield e.tail
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	496
				497	# compatibility
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	498	_Element = _ElementInterface = Element
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	499
				500	##
				501	# Subelement factory. This function creates an element instance, and
				502	# appends it to an existing element.
				503	# <p>
				504	# The element name, attribute names, and attribute values can be
				505	# either 8-bit ASCII strings or Unicode strings.
				506	#
				507	# @param parent The parent element.
				508	# @param tag The subelement name.
				509	# @param attrib An optional dictionary, containing element attributes.
				510	# @param **extra Additional attributes, given as keyword arguments.
				511	# @return An element instance.
				512	# @defreturn Element
				513
				514	def SubElement(parent, tag, attrib={}, **extra):
				515	attrib = attrib.copy()
				516	attrib.update(extra)
				517	element = parent.makeelement(tag, attrib)
				518	parent.append(element)
				519	return element
				520
				521	##
				522	# Comment element factory. This factory function creates a special
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	523	# element that will be serialized as an XML comment by the standard
				524	# serializer.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	525	# <p>
				526	# The comment string can be either an 8-bit ASCII string or a Unicode
				527	# string.
				528	#
				529	# @param text A string containing the comment string.
				530	# @return An element instance, representing a comment.
				531	# @defreturn Element
				532
				533	def Comment(text=None):
				534	element = Element(Comment)
				535	element.text = text
				536	return element
				537
				538	##
				539	# PI element factory. This factory function creates a special element
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	540	# that will be serialized as an XML processing instruction by the standard
				541	# serializer.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	542	#
				543	# @param target A string containing the PI target.
				544	# @param text A string containing the PI contents, if any.
				545	# @return An element instance, representing a PI.
				546	# @defreturn Element
				547
				548	def ProcessingInstruction(target, text=None):
				549	element = Element(ProcessingInstruction)
				550	element.text = target
				551	if text:
				552	element.text = element.text + " " + text
				553	return element
				554
				555	PI = ProcessingInstruction
				556
				557	##
				558	# QName wrapper. This can be used to wrap a QName attribute value, in
				559	# order to get proper namespace handling on output.
				560	#
				561	# @param text A string containing the QName value, in the form {uri}local,
				562	# or, if the tag argument is given, the URI part of a QName.
				563	# @param tag Optional tag. If given, the first argument is interpreted as
				564	# an URI, and this argument is interpreted as a local name.
				565	# @return An opaque object, representing the QName.
				566
				567	class QName:
				568	def __init__(self, text_or_uri, tag=None):
				569	if tag:
				570	text_or_uri = "{%s}%s" % (text_or_uri, tag)
				571	self.text = text_or_uri
				572	def __str__(self):
				573	return self.text
Georg Brandl	b56c0e2	2010-12-09 18:10:27 +0000	[diff] [blame]	574	def __repr__(self):
Georg Brandl	c95c918	2010-12-09 18:26:02 +0000	[diff] [blame]	575	return '<QName %r>' % (self.text,)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	576	def __hash__(self):
				577	return hash(self.text)
Mark Dickinson	a56c467	2009-01-27 18:17:45 +0000	[diff] [blame]	578	def __le__(self, other):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	579	if isinstance(other, QName):
Mark Dickinson	a56c467	2009-01-27 18:17:45 +0000	[diff] [blame]	580	return self.text <= other.text
				581	return self.text <= other
				582	def __lt__(self, other):
				583	if isinstance(other, QName):
				584	return self.text < other.text
				585	return self.text < other
				586	def __ge__(self, other):
				587	if isinstance(other, QName):
				588	return self.text >= other.text
				589	return self.text >= other
				590	def __gt__(self, other):
				591	if isinstance(other, QName):
				592	return self.text > other.text
				593	return self.text > other
				594	def __eq__(self, other):
				595	if isinstance(other, QName):
				596	return self.text == other.text
				597	return self.text == other
				598	def __ne__(self, other):
				599	if isinstance(other, QName):
				600	return self.text != other.text
				601	return self.text != other
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	602
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	603	# --------------------------------------------------------------------
				604
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	605	##
				606	# ElementTree wrapper class. This class represents an entire element
				607	# hierarchy, and adds some extra support for serialization to and from
				608	# standard XML.
				609	#
				610	# @param element Optional root element.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	611	# @keyparam file Optional file handle or file name. If given, the
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	612	# tree is initialized with the contents of this XML file.
				613
				614	class ElementTree:
				615
				616	def __init__(self, element=None, file=None):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	617	# assert element is None or iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	618	self._root = element # first node
				619	if file:
				620	self.parse(file)
				621
				622	##
				623	# Gets the root element for this tree.
				624	#
				625	# @return An element instance.
				626	# @defreturn Element
				627
				628	def getroot(self):
				629	return self._root
				630
				631	##
				632	# Replaces the root element for this tree. This discards the
				633	# current contents of the tree, and replaces it with the given
				634	# element. Use with care.
				635	#
				636	# @param element An element instance.
				637
				638	def _setroot(self, element):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	639	# assert iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	640	self._root = element
				641
				642	##
				643	# Loads an external XML document into this element tree.
				644	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	645	# @param source A file name or file object. If a file object is
				646	# given, it only has to implement a <b>read(n)</b> method.
				647	# @keyparam parser An optional parser instance. If not given, the
				648	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	649	# @return The document root element.
				650	# @defreturn Element
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	651	# @exception ParseError If the parser fails to parse the document.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	652
				653	def parse(self, source, parser=None):
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	654	close_source = False
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	655	if not hasattr(source, "read"):
				656	source = open(source, "rb")
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	657	close_source = True
				658	try:
				659	if not parser:
				660	parser = XMLParser(target=TreeBuilder())
				661	while 1:
				662	data = source.read(65536)
				663	if not data:
				664	break
				665	parser.feed(data)
				666	self._root = parser.close()
				667	return self._root
				668	finally:
				669	if close_source:
				670	source.close()
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	671
				672	##
				673	# Creates a tree iterator for the root element. The iterator loops
				674	# over all elements in this tree, in document order.
				675	#
				676	# @param tag What tags to look for (default is to return all elements)
				677	# @return An iterator.
				678	# @defreturn iterator
				679
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	680	def iter(self, tag=None):
				681	# assert self._root is not None
				682	return self._root.iter(tag)
				683
				684	# compatibility
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	685	def getiterator(self, tag=None):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	686	# Change for a DeprecationWarning in 1.4
				687	warnings.warn(
				688	"This method will be removed in future versions. "
				689	"Use 'tree.iter()' or 'list(tree.iter())' instead.",
				690	PendingDeprecationWarning, stacklevel=2
				691	)
				692	return list(self.iter(tag))
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	693
				694	##
				695	# Finds the first toplevel element with given tag.
				696	# Same as getroot().find(path).
				697	#
				698	# @param path What element to look for.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	699	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	700	# @return The first matching element, or None if no element was found.
				701	# @defreturn Element or None
				702
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	703	def find(self, path, namespaces=None):
				704	# assert self._root is not None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	705	if path[:1] == "/":
				706	path = "." + path
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	707	warnings.warn(
				708	"This search is broken in 1.3 and earlier, and will be "
				709	"fixed in a future version. If you rely on the current "
				710	"behaviour, change it to %r" % path,
				711	FutureWarning, stacklevel=2
				712	)
				713	return self._root.find(path, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	714
				715	##
				716	# Finds the element text for the first toplevel element with given
				717	# tag. Same as getroot().findtext(path).
				718	#
				719	# @param path What toplevel element to look for.
				720	# @param default What to return if the element was not found.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	721	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	722	# @return The text content of the first matching element, or the
				723	# default value no element was found. Note that if the element
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	724	# is found, but has no text content, this method returns an
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	725	# empty string.
				726	# @defreturn string
				727
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	728	def findtext(self, path, default=None, namespaces=None):
				729	# assert self._root is not None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	730	if path[:1] == "/":
				731	path = "." + path
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	732	warnings.warn(
				733	"This search is broken in 1.3 and earlier, and will be "
				734	"fixed in a future version. If you rely on the current "
				735	"behaviour, change it to %r" % path,
				736	FutureWarning, stacklevel=2
				737	)
				738	return self._root.findtext(path, default, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	739
				740	##
				741	# Finds all toplevel elements with the given tag.
				742	# Same as getroot().findall(path).
				743	#
				744	# @param path What element to look for.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	745	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	746	# @return A list or iterator containing all matching elements,
				747	# in document order.
				748	# @defreturn list of Element instances
				749
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	750	def findall(self, path, namespaces=None):
				751	# assert self._root is not None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	752	if path[:1] == "/":
				753	path = "." + path
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	754	warnings.warn(
				755	"This search is broken in 1.3 and earlier, and will be "
				756	"fixed in a future version. If you rely on the current "
				757	"behaviour, change it to %r" % path,
				758	FutureWarning, stacklevel=2
				759	)
				760	return self._root.findall(path, namespaces)
				761
				762	##
				763	# Finds all matching subelements, by tag name or path.
				764	# Same as getroot().iterfind(path).
				765	#
				766	# @param path What element to look for.
				767	# @keyparam namespaces Optional namespace prefix map.
				768	# @return An iterator or sequence containing all matching elements,
				769	# in document order.
				770	# @defreturn a generated sequence of Element instances
				771
				772	def iterfind(self, path, namespaces=None):
				773	# assert self._root is not None
				774	if path[:1] == "/":
				775	path = "." + path
				776	warnings.warn(
				777	"This search is broken in 1.3 and earlier, and will be "
				778	"fixed in a future version. If you rely on the current "
				779	"behaviour, change it to %r" % path,
				780	FutureWarning, stacklevel=2
				781	)
				782	return self._root.iterfind(path, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	783
				784	##
				785	# Writes the element tree to a file, as XML.
				786	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	787	# @def write(file, **options)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	788	# @param file A file name, or a file object opened for writing.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	789	# @param **options Options, given as keyword arguments.
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	790	# @keyparam encoding Optional output encoding (default is US-ASCII).
				791	# Use "unicode" to return a Unicode string.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	792	# @keyparam xml_declaration Controls if an XML declaration should
				793	# be added to the file. Use False for never, True for always,
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	794	# None for only if not US-ASCII or UTF-8 or Unicode. None is default.
Serhiy Storchaka	03530b9	2013-01-13 21:58:04 +0200	[diff] [blame]	795	# @keyparam default_namespace Sets the default XML namespace (for "xmlns").
				796	# @keyparam method Optional output method ("xml", "html", "text" or
				797	# "c14n"; default is "xml").
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	798
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	799	def write(self, file_or_filename,
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	800	encoding=None,
				801	xml_declaration=None,
				802	default_namespace=None,
				803	method=None):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	804	if not method:
				805	method = "xml"
				806	elif method not in _serialize:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	807	raise ValueError("unknown method %r" % method)
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	808	if not encoding:
				809	if method == "c14n":
				810	encoding = "utf-8"
				811	else:
				812	encoding = "us-ascii"
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	813	else:
				814	encoding = encoding.lower()
Eli Bendersky	00f402b	2012-07-15 06:02:22 +0300	[diff] [blame]	815	with _get_writer(file_or_filename, encoding) as write:
				816	if method == "xml" and (xml_declaration or
				817	(xml_declaration is None and
				818	encoding not in ("utf-8", "us-ascii", "unicode"))):
				819	declared_encoding = encoding
				820	if encoding == "unicode":
				821	# Retrieve the default encoding for the xml declaration
				822	import locale
				823	declared_encoding = locale.getpreferredencoding()
				824	write("<?xml version='1.0' encoding='%s'?>\n" % (
				825	declared_encoding,))
				826	if method == "text":
				827	_serialize_text(write, self._root)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	828	else:
Eli Bendersky	00f402b	2012-07-15 06:02:22 +0300	[diff] [blame]	829	qnames, namespaces = _namespaces(self._root, default_namespace)
				830	serialize = _serialize[method]
				831	serialize(write, self._root, qnames, namespaces)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	832
				833	def write_c14n(self, file):
				834	# lxml.etree compatibility. use output method instead
				835	return self.write(file, method="c14n")
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	836
				837	# --------------------------------------------------------------------
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	838	# serialization support
				839
Eli Bendersky	00f402b	2012-07-15 06:02:22 +0300	[diff] [blame]	840	@contextlib.contextmanager
				841	def _get_writer(file_or_filename, encoding):
				842	# returns text write method and release all resourses after using
				843	try:
				844	write = file_or_filename.write
				845	except AttributeError:
				846	# file_or_filename is a file name
				847	if encoding == "unicode":
				848	file = open(file_or_filename, "w")
				849	else:
				850	file = open(file_or_filename, "w", encoding=encoding,
				851	errors="xmlcharrefreplace")
				852	with file:
				853	yield file.write
				854	else:
				855	# file_or_filename is a file-like object
				856	# encoding determines if it is a text or binary writer
				857	if encoding == "unicode":
				858	# use a text writer as is
				859	yield write
				860	else:
				861	# wrap a binary writer with TextIOWrapper
				862	with contextlib.ExitStack() as stack:
				863	if isinstance(file_or_filename, io.BufferedIOBase):
				864	file = file_or_filename
				865	elif isinstance(file_or_filename, io.RawIOBase):
				866	file = io.BufferedWriter(file_or_filename)
				867	# Keep the original file open when the BufferedWriter is
				868	# destroyed
				869	stack.callback(file.detach)
				870	else:
				871	# This is to handle passed objects that aren't in the
				872	# IOBase hierarchy, but just have a write method
				873	file = io.BufferedIOBase()
				874	file.writable = lambda: True
				875	file.write = write
				876	try:
				877	# TextIOWrapper uses this methods to determine
				878	# if BOM (for UTF-16, etc) should be added
				879	file.seekable = file_or_filename.seekable
				880	file.tell = file_or_filename.tell
				881	except AttributeError:
				882	pass
				883	file = io.TextIOWrapper(file,
				884	encoding=encoding,
				885	errors="xmlcharrefreplace",
				886	newline="\n")
				887	# Keep the original file open when the TextIOWrapper is
				888	# destroyed
				889	stack.callback(file.detach)
				890	yield file.write
				891
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	892	def _namespaces(elem, default_namespace=None):
				893	# identify namespaces used in this tree
				894
				895	# maps qnames to encoded prefix:local names
				896	qnames = {None: None}
				897
				898	# maps uri:s to prefixes
				899	namespaces = {}
				900	if default_namespace:
				901	namespaces[default_namespace] = ""
				902
				903	def add_qname(qname):
				904	# calculate serialized qname representation
				905	try:
				906	if qname[:1] == "{":
				907	uri, tag = qname[1:].rsplit("}", 1)
				908	prefix = namespaces.get(uri)
				909	if prefix is None:
				910	prefix = _namespace_map.get(uri)
				911	if prefix is None:
				912	prefix = "ns%d" % len(namespaces)
				913	if prefix != "xml":
				914	namespaces[uri] = prefix
				915	if prefix:
				916	qnames[qname] = "%s:%s" % (prefix, tag)
				917	else:
				918	qnames[qname] = tag # default element
				919	else:
				920	if default_namespace:
				921	# FIXME: can this be handled in XML 1.0?
				922	raise ValueError(
				923	"cannot use non-qualified names with "
				924	"default_namespace option"
				925	)
				926	qnames[qname] = qname
				927	except TypeError:
				928	_raise_serialization_error(qname)
				929
				930	# populate qname and namespaces table
Eli Bendersky	64d11e6	2012-06-15 07:42:50 +0300	[diff] [blame]	931	for elem in elem.iter():
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	932	tag = elem.tag
Senthil Kumaran	ec30b3d	2010-11-09 02:36:59 +0000	[diff] [blame]	933	if isinstance(tag, QName):
				934	if tag.text not in qnames:
				935	add_qname(tag.text)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	936	elif isinstance(tag, str):
				937	if tag not in qnames:
				938	add_qname(tag)
				939	elif tag is not None and tag is not Comment and tag is not PI:
				940	_raise_serialization_error(tag)
				941	for key, value in elem.items():
				942	if isinstance(key, QName):
				943	key = key.text
				944	if key not in qnames:
				945	add_qname(key)
				946	if isinstance(value, QName) and value.text not in qnames:
				947	add_qname(value.text)
				948	text = elem.text
				949	if isinstance(text, QName) and text.text not in qnames:
				950	add_qname(text.text)
				951	return qnames, namespaces
				952
				953	def _serialize_xml(write, elem, qnames, namespaces):
				954	tag = elem.tag
				955	text = elem.text
				956	if tag is Comment:
				957	write("<!--%s-->" % text)
				958	elif tag is ProcessingInstruction:
				959	write("<?%s?>" % text)
				960	else:
				961	tag = qnames[tag]
				962	if tag is None:
				963	if text:
				964	write(_escape_cdata(text))
				965	for e in elem:
				966	_serialize_xml(write, e, qnames, None)
				967	else:
				968	write("<" + tag)
				969	items = list(elem.items())
				970	if items or namespaces:
				971	if namespaces:
				972	for v, k in sorted(namespaces.items(),
				973	key=lambda x: x[1]): # sort on prefix
				974	if k:
				975	k = ":" + k
				976	write(" xmlns%s=\"%s\"" % (
				977	k,
				978	_escape_attrib(v)
				979	))
				980	for k, v in sorted(items): # lexical order
				981	if isinstance(k, QName):
				982	k = k.text
				983	if isinstance(v, QName):
				984	v = qnames[v.text]
				985	else:
				986	v = _escape_attrib(v)
				987	write(" %s=\"%s\"" % (qnames[k], v))
				988	if text or len(elem):
				989	write(">")
				990	if text:
				991	write(_escape_cdata(text))
				992	for e in elem:
				993	_serialize_xml(write, e, qnames, None)
				994	write("</" + tag + ">")
				995	else:
				996	write(" />")
				997	if elem.tail:
				998	write(_escape_cdata(elem.tail))
				999
				1000	HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
Ezio Melotti	c90111f	2012-09-19 08:19:12 +0300	[diff] [blame]	1001	"img", "input", "isindex", "link", "meta", "param")
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1002
				1003	try:
				1004	HTML_EMPTY = set(HTML_EMPTY)
				1005	except NameError:
				1006	pass
				1007
				1008	def _serialize_html(write, elem, qnames, namespaces):
				1009	tag = elem.tag
				1010	text = elem.text
				1011	if tag is Comment:
				1012	write("<!--%s-->" % _escape_cdata(text))
				1013	elif tag is ProcessingInstruction:
				1014	write("<?%s?>" % _escape_cdata(text))
				1015	else:
				1016	tag = qnames[tag]
				1017	if tag is None:
				1018	if text:
				1019	write(_escape_cdata(text))
				1020	for e in elem:
				1021	_serialize_html(write, e, qnames, None)
				1022	else:
				1023	write("<" + tag)
				1024	items = list(elem.items())
				1025	if items or namespaces:
				1026	if namespaces:
				1027	for v, k in sorted(namespaces.items(),
				1028	key=lambda x: x[1]): # sort on prefix
				1029	if k:
				1030	k = ":" + k
				1031	write(" xmlns%s=\"%s\"" % (
				1032	k,
				1033	_escape_attrib(v)
				1034	))
				1035	for k, v in sorted(items): # lexical order
				1036	if isinstance(k, QName):
				1037	k = k.text
				1038	if isinstance(v, QName):
				1039	v = qnames[v.text]
				1040	else:
				1041	v = _escape_attrib_html(v)
				1042	# FIXME: handle boolean attributes
				1043	write(" %s=\"%s\"" % (qnames[k], v))
				1044	write(">")
				1045	tag = tag.lower()
				1046	if text:
				1047	if tag == "script" or tag == "style":
				1048	write(text)
				1049	else:
				1050	write(_escape_cdata(text))
				1051	for e in elem:
				1052	_serialize_html(write, e, qnames, None)
				1053	if tag not in HTML_EMPTY:
				1054	write("</" + tag + ">")
				1055	if elem.tail:
				1056	write(_escape_cdata(elem.tail))
				1057
				1058	def _serialize_text(write, elem):
				1059	for part in elem.itertext():
				1060	write(part)
				1061	if elem.tail:
				1062	write(elem.tail)
				1063
				1064	_serialize = {
				1065	"xml": _serialize_xml,
				1066	"html": _serialize_html,
				1067	"text": _serialize_text,
				1068	# this optional method is imported at the end of the module
				1069	# "c14n": _serialize_c14n,
				1070	}
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1071
				1072	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1073	# Registers a namespace prefix. The registry is global, and any
				1074	# existing mapping for either the given prefix or the namespace URI
				1075	# will be removed.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1076	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1077	# @param prefix Namespace prefix.
				1078	# @param uri Namespace uri. Tags and attributes in this namespace
				1079	# will be serialized with the given prefix, if at all possible.
				1080	# @exception ValueError If the prefix is reserved, or is otherwise
				1081	# invalid.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1082
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1083	def register_namespace(prefix, uri):
				1084	if re.match("ns\d+$", prefix):
				1085	raise ValueError("Prefix format reserved for internal use")
Georg Brandl	90b2067	2010-12-28 10:38:33 +0000	[diff] [blame]	1086	for k, v in list(_namespace_map.items()):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1087	if k == uri or v == prefix:
				1088	del _namespace_map[k]
				1089	_namespace_map[uri] = prefix
				1090
				1091	_namespace_map = {
				1092	# "well-known" namespace prefixes
				1093	"http://www.w3.org/XML/1998/namespace": "xml",
				1094	"http://www.w3.org/1999/xhtml": "html",
				1095	"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
				1096	"http://schemas.xmlsoap.org/wsdl/": "wsdl",
				1097	# xml schema
				1098	"http://www.w3.org/2001/XMLSchema": "xs",
				1099	"http://www.w3.org/2001/XMLSchema-instance": "xsi",
				1100	# dublin core
				1101	"http://purl.org/dc/elements/1.1/": "dc",
				1102	}
Florent Xicluna	1639505	2012-02-16 23:28:35 +0100	[diff] [blame]	1103	# For tests and troubleshooting
				1104	register_namespace._namespace_map = _namespace_map
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1105
				1106	def _raise_serialization_error(text):
				1107	raise TypeError(
				1108	"cannot serialize %r (type %s)" % (text, type(text).__name__)
				1109	)
				1110
				1111	def _escape_cdata(text):
				1112	# escape character data
				1113	try:
				1114	# it's worth avoiding do-nothing calls for strings that are
				1115	# shorter than 500 character, or so. assume that's, by far,
				1116	# the most common case in most applications.
				1117	if "&" in text:
				1118	text = text.replace("&", "&")
				1119	if "<" in text:
				1120	text = text.replace("<", "<")
				1121	if ">" in text:
				1122	text = text.replace(">", ">")
				1123	return text
				1124	except (TypeError, AttributeError):
				1125	_raise_serialization_error(text)
				1126
				1127	def _escape_attrib(text):
				1128	# escape attribute value
				1129	try:
				1130	if "&" in text:
				1131	text = text.replace("&", "&")
				1132	if "<" in text:
				1133	text = text.replace("<", "<")
				1134	if ">" in text:
				1135	text = text.replace(">", ">")
				1136	if "\"" in text:
				1137	text = text.replace("\"", """)
				1138	if "\n" in text:
				1139	text = text.replace("\n", " ")
				1140	return text
				1141	except (TypeError, AttributeError):
				1142	_raise_serialization_error(text)
				1143
				1144	def _escape_attrib_html(text):
				1145	# escape attribute value
				1146	try:
				1147	if "&" in text:
				1148	text = text.replace("&", "&")
				1149	if ">" in text:
				1150	text = text.replace(">", ">")
				1151	if "\"" in text:
				1152	text = text.replace("\"", """)
				1153	return text
				1154	except (TypeError, AttributeError):
				1155	_raise_serialization_error(text)
				1156
				1157	# --------------------------------------------------------------------
				1158
				1159	##
				1160	# Generates a string representation of an XML element, including all
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1161	# subelements. If encoding is "unicode", the return type is a string;
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1162	# otherwise it is a bytes array.
				1163	#
				1164	# @param element An Element instance.
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1165	# @keyparam encoding Optional output encoding (default is US-ASCII).
				1166	# Use "unicode" to return a Unicode string.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1167	# @keyparam method Optional output method ("xml", "html", "text" or
				1168	# "c14n"; default is "xml").
				1169	# @return An (optionally) encoded string containing the XML data.
				1170	# @defreturn string
				1171
				1172	def tostring(element, encoding=None, method=None):
Eli Bendersky	00f402b	2012-07-15 06:02:22 +0300	[diff] [blame]	1173	stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
				1174	ElementTree(element).write(stream, encoding, method=method)
				1175	return stream.getvalue()
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1176
				1177	##
				1178	# Generates a string representation of an XML element, including all
Eli Bendersky	00f402b	2012-07-15 06:02:22 +0300	[diff] [blame]	1179	# subelements.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1180	#
				1181	# @param element An Element instance.
				1182	# @keyparam encoding Optional output encoding (default is US-ASCII).
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1183	# Use "unicode" to return a Unicode string.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1184	# @keyparam method Optional output method ("xml", "html", "text" or
				1185	# "c14n"; default is "xml").
				1186	# @return A sequence object containing the XML data.
				1187	# @defreturn sequence
				1188	# @since 1.3
				1189
Eli Bendersky	43cc5f2	2012-07-17 15:09:12 +0300	[diff] [blame]	1190	class _ListDataStream(io.BufferedIOBase):
				1191	""" An auxiliary stream accumulating into a list reference
				1192	"""
				1193	def __init__(self, lst):
				1194	self.lst = lst
Eli Bendersky	f90fc68	2012-07-17 15:09:56 +0300	[diff] [blame]	1195
Eli Bendersky	43cc5f2	2012-07-17 15:09:12 +0300	[diff] [blame]	1196	def writable(self):
				1197	return True
				1198
				1199	def seekable(self):
				1200	return True
				1201
				1202	def write(self, b):
				1203	self.lst.append(b)
				1204
				1205	def tell(self):
				1206	return len(self.lst)
				1207
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1208	def tostringlist(element, encoding=None, method=None):
Eli Bendersky	43cc5f2	2012-07-17 15:09:12 +0300	[diff] [blame]	1209	lst = []
				1210	stream = _ListDataStream(lst)
				1211	ElementTree(element).write(stream, encoding, method=method)
				1212	return lst
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1213
				1214	##
				1215	# Writes an element tree or element structure to sys.stdout. This
				1216	# function should be used for debugging only.
				1217	# <p>
				1218	# The exact output format is implementation dependent. In this
				1219	# version, it's written as an ordinary XML file.
				1220	#
				1221	# @param elem An element tree or an individual element.
				1222
				1223	def dump(elem):
				1224	# debugging
				1225	if not isinstance(elem, ElementTree):
				1226	elem = ElementTree(elem)
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1227	elem.write(sys.stdout, encoding="unicode")
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1228	tail = elem.getroot().tail
				1229	if not tail or tail[-1] != "\n":
				1230	sys.stdout.write("\n")
				1231
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1232	# --------------------------------------------------------------------
				1233	# parsing
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1234
				1235	##
				1236	# Parses an XML document into an element tree.
				1237	#
				1238	# @param source A filename or file object containing XML data.
				1239	# @param parser An optional parser instance. If not given, the
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1240	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1241	# @return An ElementTree instance
				1242
				1243	def parse(source, parser=None):
				1244	tree = ElementTree()
				1245	tree.parse(source, parser)
				1246	return tree
				1247
				1248	##
				1249	# Parses an XML document into an element tree incrementally, and reports
				1250	# what's going on to the user.
				1251	#
				1252	# @param source A filename or file object containing XML data.
				1253	# @param events A list of events to report back. If omitted, only "end"
				1254	# events are reported.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1255	# @param parser An optional parser instance. If not given, the
				1256	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1257	# @return A (event, elem) iterator.
				1258
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1259	def iterparse(source, events=None, parser=None):
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1260	close_source = False
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1261	if not hasattr(source, "read"):
				1262	source = open(source, "rb")
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1263	close_source = True
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1264	if not parser:
				1265	parser = XMLParser(target=TreeBuilder())
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1266	return _IterParseIterator(source, events, parser, close_source)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1267
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1268	class _IterParseIterator:
				1269
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1270	def __init__(self, source, events, parser, close_source=False):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1271	self._file = source
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1272	self._close_file = close_source
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1273	self._events = []
				1274	self._index = 0
Florent Xicluna	91d5193	2011-11-01 23:31:09 +0100	[diff] [blame]	1275	self._error = None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1276	self.root = self._root = None
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1277	self._parser = parser
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1278	# wire up the parser for event reporting
				1279	parser = self._parser._parser
				1280	append = self._events.append
				1281	if events is None:
				1282	events = ["end"]
				1283	for event in events:
				1284	if event == "start":
				1285	try:
				1286	parser.ordered_attributes = 1
				1287	parser.specified_attributes = 1
				1288	def handler(tag, attrib_in, event=event, append=append,
				1289	start=self._parser._start_list):
				1290	append((event, start(tag, attrib_in)))
				1291	parser.StartElementHandler = handler
				1292	except AttributeError:
				1293	def handler(tag, attrib_in, event=event, append=append,
				1294	start=self._parser._start):
				1295	append((event, start(tag, attrib_in)))
				1296	parser.StartElementHandler = handler
				1297	elif event == "end":
				1298	def handler(tag, event=event, append=append,
				1299	end=self._parser._end):
				1300	append((event, end(tag)))
				1301	parser.EndElementHandler = handler
				1302	elif event == "start-ns":
				1303	def handler(prefix, uri, event=event, append=append):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1304	append((event, (prefix or "", uri or "")))
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1305	parser.StartNamespaceDeclHandler = handler
				1306	elif event == "end-ns":
				1307	def handler(prefix, event=event, append=append):
				1308	append((event, None))
				1309	parser.EndNamespaceDeclHandler = handler
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1310	else:
				1311	raise ValueError("unknown event %r" % event)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1312
Georg Brandl	a18af4e	2007-04-21 15:47:16 +0000	[diff] [blame]	1313	def __next__(self):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1314	while 1:
				1315	try:
				1316	item = self._events[self._index]
Florent Xicluna	91d5193	2011-11-01 23:31:09 +0100	[diff] [blame]	1317	self._index += 1
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1318	return item
Florent Xicluna	91d5193	2011-11-01 23:31:09 +0100	[diff] [blame]	1319	except IndexError:
				1320	pass
				1321	if self._error:
				1322	e = self._error
				1323	self._error = None
				1324	raise e
				1325	if self._parser is None:
				1326	self.root = self._root
				1327	if self._close_file:
				1328	self._file.close()
				1329	raise StopIteration
				1330	# load event buffer
				1331	del self._events[:]
				1332	self._index = 0
				1333	data = self._file.read(16384)
				1334	if data:
				1335	try:
				1336	self._parser.feed(data)
				1337	except SyntaxError as exc:
				1338	self._error = exc
				1339	else:
				1340	self._root = self._parser.close()
				1341	self._parser = None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1342
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1343	def __iter__(self):
				1344	return self
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1345
				1346	##
				1347	# Parses an XML document from a string constant. This function can
				1348	# be used to embed "XML literals" in Python code.
				1349	#
				1350	# @param source A string containing XML data.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1351	# @param parser An optional parser instance. If not given, the
				1352	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1353	# @return An Element instance.
				1354	# @defreturn Element
				1355
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1356	def XML(text, parser=None):
				1357	if not parser:
				1358	parser = XMLParser(target=TreeBuilder())
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1359	parser.feed(text)
				1360	return parser.close()
				1361
				1362	##
				1363	# Parses an XML document from a string constant, and also returns
				1364	# a dictionary which maps from element id:s to elements.
				1365	#
				1366	# @param source A string containing XML data.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1367	# @param parser An optional parser instance. If not given, the
				1368	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1369	# @return A tuple containing an Element instance and a dictionary.
				1370	# @defreturn (Element, dictionary)
				1371
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1372	def XMLID(text, parser=None):
				1373	if not parser:
				1374	parser = XMLParser(target=TreeBuilder())
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1375	parser.feed(text)
				1376	tree = parser.close()
				1377	ids = {}
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1378	for elem in tree.iter():
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1379	id = elem.get("id")
				1380	if id:
				1381	ids[id] = elem
				1382	return tree, ids
				1383
				1384	##
				1385	# Parses an XML document from a string constant. Same as {@link #XML}.
				1386	#
				1387	# @def fromstring(text)
				1388	# @param source A string containing XML data.
				1389	# @return An Element instance.
				1390	# @defreturn Element
				1391
				1392	fromstring = XML
				1393
				1394	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1395	# Parses an XML document from a sequence of string fragments.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1396	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1397	# @param sequence A list or other sequence containing XML data fragments.
				1398	# @param parser An optional parser instance. If not given, the
				1399	# standard {@link XMLParser} parser is used.
				1400	# @return An Element instance.
				1401	# @defreturn Element
				1402	# @since 1.3
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1403
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1404	def fromstringlist(sequence, parser=None):
				1405	if not parser:
				1406	parser = XMLParser(target=TreeBuilder())
				1407	for text in sequence:
				1408	parser.feed(text)
				1409	return parser.close()
				1410
				1411	# --------------------------------------------------------------------
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1412
				1413	##
				1414	# Generic element structure builder. This builder converts a sequence
				1415	# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
				1416	# #TreeBuilder.end} method calls to a well-formed element structure.
				1417	# <p>
				1418	# You can use this class to build an element structure using a custom XML
				1419	# parser, or a parser for some other XML-like format.
				1420	#
				1421	# @param element_factory Optional element factory. This factory
				1422	# is called to create new Element instances, as necessary.
				1423
				1424	class TreeBuilder:
				1425
				1426	def __init__(self, element_factory=None):
				1427	self._data = [] # data collector
				1428	self._elem = [] # element stack
				1429	self._last = None # last element
				1430	self._tail = None # true if we're after an end tag
				1431	if element_factory is None:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1432	element_factory = Element
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1433	self._factory = element_factory
				1434
				1435	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1436	# Flushes the builder buffers, and returns the toplevel document
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1437	# element.
				1438	#
				1439	# @return An Element instance.
				1440	# @defreturn Element
				1441
				1442	def close(self):
				1443	assert len(self._elem) == 0, "missing end tags"
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1444	assert self._last is not None, "missing toplevel element"
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1445	return self._last
				1446
				1447	def _flush(self):
				1448	if self._data:
				1449	if self._last is not None:
Neal Norwitz	9d72bb4	2007-04-17 08:48:32 +0000	[diff] [blame]	1450	text = "".join(self._data)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1451	if self._tail:
				1452	assert self._last.tail is None, "internal error (tail)"
				1453	self._last.tail = text
				1454	else:
				1455	assert self._last.text is None, "internal error (text)"
				1456	self._last.text = text
				1457	self._data = []
				1458
				1459	##
				1460	# Adds text to the current element.
				1461	#
				1462	# @param data A string. This should be either an 8-bit string
				1463	# containing ASCII text, or a Unicode string.
				1464
				1465	def data(self, data):
				1466	self._data.append(data)
				1467
				1468	##
				1469	# Opens a new element.
				1470	#
				1471	# @param tag The element name.
				1472	# @param attrib A dictionary containing element attributes.
				1473	# @return The opened element.
				1474	# @defreturn Element
				1475
				1476	def start(self, tag, attrs):
				1477	self._flush()
				1478	self._last = elem = self._factory(tag, attrs)
				1479	if self._elem:
				1480	self._elem[-1].append(elem)
				1481	self._elem.append(elem)
				1482	self._tail = 0
				1483	return elem
				1484
				1485	##
				1486	# Closes the current element.
				1487	#
				1488	# @param tag The element name.
				1489	# @return The closed element.
				1490	# @defreturn Element
				1491
				1492	def end(self, tag):
				1493	self._flush()
				1494	self._last = self._elem.pop()
				1495	assert self._last.tag == tag,\
				1496	"end tag mismatch (expected %s, got %s)" % (
				1497	self._last.tag, tag)
				1498	self._tail = 1
				1499	return self._last
				1500
				1501	##
				1502	# Element structure builder for XML source data, based on the
				1503	# <b>expat</b> parser.
				1504	#
				1505	# @keyparam target Target object. If omitted, the builder uses an
				1506	# instance of the standard {@link #TreeBuilder} class.
				1507	# @keyparam html Predefine HTML entities. This flag is not supported
				1508	# by the current implementation.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1509	# @keyparam encoding Optional encoding. If given, the value overrides
				1510	# the encoding specified in the XML file.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1511	# @see #ElementTree
				1512	# @see #TreeBuilder
				1513
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1514	class XMLParser:
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1515
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1516	def __init__(self, html=0, target=None, encoding=None):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1517	try:
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1518	from xml.parsers import expat
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1519	except ImportError:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1520	try:
				1521	import pyexpat as expat
				1522	except ImportError:
				1523	raise ImportError(
				1524	"No module named expat; use SimpleXMLTreeBuilder instead"
				1525	)
				1526	parser = expat.ParserCreate(encoding, "}")
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1527	if target is None:
				1528	target = TreeBuilder()
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1529	# underscored names are provided for compatibility only
				1530	self.parser = self._parser = parser
				1531	self.target = self._target = target
				1532	self._error = expat.error
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1533	self._names = {} # name memo cache
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1534	# main callbacks
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1535	parser.DefaultHandlerExpand = self._default
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1536	if hasattr(target, 'start'):
				1537	parser.StartElementHandler = self._start
				1538	if hasattr(target, 'end'):
				1539	parser.EndElementHandler = self._end
				1540	if hasattr(target, 'data'):
				1541	parser.CharacterDataHandler = target.data
				1542	# miscellaneous callbacks
				1543	if hasattr(target, 'comment'):
				1544	parser.CommentHandler = target.comment
				1545	if hasattr(target, 'pi'):
				1546	parser.ProcessingInstructionHandler = target.pi
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1547	# let expat do the buffering, if supported
				1548	try:
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1549	parser.buffer_text = 1
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1550	except AttributeError:
				1551	pass
				1552	# use new-style attribute handling, if supported
				1553	try:
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1554	parser.ordered_attributes = 1
				1555	parser.specified_attributes = 1
				1556	if hasattr(target, 'start'):
				1557	parser.StartElementHandler = self._start_list
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1558	except AttributeError:
				1559	pass
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1560	self._doctype = None
				1561	self.entity = {}
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1562	try:
				1563	self.version = "Expat %d.%d.%d" % expat.version_info
				1564	except AttributeError:
				1565	pass # unknown
				1566
				1567	def _raiseerror(self, value):
				1568	err = ParseError(value)
				1569	err.code = value.code
				1570	err.position = value.lineno, value.offset
				1571	raise err
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1572
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1573	def _fixname(self, key):
				1574	# expand qname, and convert name string to ascii, if possible
				1575	try:
				1576	name = self._names[key]
				1577	except KeyError:
				1578	name = key
				1579	if "}" in name:
				1580	name = "{" + name
Martin v. Löwis	f30bb0e	2007-07-28 11:40:46 +0000	[diff] [blame]	1581	self._names[key] = name
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1582	return name
				1583
				1584	def _start(self, tag, attrib_in):
				1585	fixname = self._fixname
				1586	tag = fixname(tag)
				1587	attrib = {}
				1588	for key, value in attrib_in.items():
Martin v. Löwis	f30bb0e	2007-07-28 11:40:46 +0000	[diff] [blame]	1589	attrib[fixname(key)] = value
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1590	return self.target.start(tag, attrib)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1591
				1592	def _start_list(self, tag, attrib_in):
				1593	fixname = self._fixname
				1594	tag = fixname(tag)
				1595	attrib = {}
				1596	if attrib_in:
				1597	for i in range(0, len(attrib_in), 2):
Martin v. Löwis	f30bb0e	2007-07-28 11:40:46 +0000	[diff] [blame]	1598	attrib[fixname(attrib_in[i])] = attrib_in[i+1]
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1599	return self.target.start(tag, attrib)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1600
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1601	def _end(self, tag):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1602	return self.target.end(self._fixname(tag))
				1603
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1604	def _default(self, text):
				1605	prefix = text[:1]
				1606	if prefix == "&":
				1607	# deal with undefined entities
				1608	try:
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1609	data_handler = self.target.data
				1610	except AttributeError:
				1611	return
				1612	try:
				1613	data_handler(self.entity[text[1:-1]])
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1614	except KeyError:
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1615	from xml.parsers import expat
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1616	err = expat.error(
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1617	"undefined entity %s: line %d, column %d" %
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1618	(text, self.parser.ErrorLineNumber,
				1619	self.parser.ErrorColumnNumber)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1620	)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1621	err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1622	err.lineno = self.parser.ErrorLineNumber
				1623	err.offset = self.parser.ErrorColumnNumber
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1624	raise err
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1625	elif prefix == "<" and text[:9] == "<!DOCTYPE":
				1626	self._doctype = [] # inside a doctype declaration
				1627	elif self._doctype is not None:
				1628	# parse doctype contents
				1629	if prefix == ">":
				1630	self._doctype = None
				1631	return
Neal Norwitz	9d72bb4	2007-04-17 08:48:32 +0000	[diff] [blame]	1632	text = text.strip()
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1633	if not text:
				1634	return
				1635	self._doctype.append(text)
				1636	n = len(self._doctype)
				1637	if n > 2:
				1638	type = self._doctype[1]
				1639	if type == "PUBLIC" and n == 4:
				1640	name, type, pubid, system = self._doctype
Florent Xicluna	a1c974a	2012-07-07 13:16:44 +0200	[diff] [blame]	1641	if pubid:
				1642	pubid = pubid[1:-1]
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1643	elif type == "SYSTEM" and n == 3:
				1644	name, type, system = self._doctype
				1645	pubid = None
				1646	else:
				1647	return
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1648	if hasattr(self.target, "doctype"):
				1649	self.target.doctype(name, pubid, system[1:-1])
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1650	elif self.doctype != self._XMLParser__doctype:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1651	# warn about deprecated call
				1652	self._XMLParser__doctype(name, pubid, system[1:-1])
				1653	self.doctype(name, pubid, system[1:-1])
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1654	self._doctype = None
				1655
				1656	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1657	# (Deprecated) Handles a doctype declaration.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1658	#
				1659	# @param name Doctype name.
				1660	# @param pubid Public identifier.
				1661	# @param system System identifier.
				1662
				1663	def doctype(self, name, pubid, system):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1664	"""This method of XMLParser is deprecated."""
				1665	warnings.warn(
				1666	"This method of XMLParser is deprecated. Define doctype() "
				1667	"method on the TreeBuilder target.",
				1668	DeprecationWarning,
				1669	)
				1670
				1671	# sentinel, if doctype is redefined in a subclass
				1672	__doctype = doctype
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1673
				1674	##
				1675	# Feeds data to the parser.
				1676	#
				1677	# @param data Encoded data.
				1678
				1679	def feed(self, data):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1680	try:
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1681	self.parser.Parse(data, 0)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1682	except self._error as v:
				1683	self._raiseerror(v)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1684
				1685	##
				1686	# Finishes feeding data to the parser.
				1687	#
				1688	# @return An element structure.
				1689	# @defreturn Element
				1690
				1691	def close(self):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1692	try:
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1693	self.parser.Parse("", 1) # end of data
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1694	except self._error as v:
				1695	self._raiseerror(v)
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1696	try:
Florent Xicluna	fb06746	2012-03-05 11:42:49 +0100	[diff] [blame]	1697	close_handler = self.target.close
				1698	except AttributeError:
				1699	pass
				1700	else:
				1701	return close_handler()
Florent Xicluna	75b5e7e	2012-03-05 10:42:19 +0100	[diff] [blame]	1702	finally:
				1703	# get rid of circular references
				1704	del self.parser, self._parser
				1705	del self.target, self._target
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1706
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	1707
				1708	# Import the C accelerators
				1709	try:
				1710	# Element, SubElement, ParseError, TreeBuilder, XMLParser
				1711	from _elementtree import *
				1712	except ImportError:
				1713	pass
				1714	else:
				1715	# Overwrite 'ElementTree.parse' and 'iterparse' to use the C XMLParser
				1716
				1717	class ElementTree(ElementTree):
				1718	def parse(self, source, parser=None):
				1719	close_source = False
				1720	if not hasattr(source, 'read'):
				1721	source = open(source, 'rb')
				1722	close_source = True
				1723	try:
				1724	if parser is not None:
				1725	while True:
				1726	data = source.read(65536)
				1727	if not data:
				1728	break
				1729	parser.feed(data)
				1730	self._root = parser.close()
				1731	else:
				1732	parser = XMLParser()
				1733	self._root = parser._parse(source)
				1734	return self._root
				1735	finally:
				1736	if close_source:
				1737	source.close()
				1738
				1739	class iterparse:
Eli Bendersky	aaa9780	2013-01-24 07:15:19 -0800	[diff] [blame]	1740	"""Parses an XML section into an element tree incrementally.
				1741
				1742	Reports what’s going on to the user. 'source' is a filename or file
				1743	object containing XML data. 'events' is a list of events to report back.
				1744	The supported events are the strings "start", "end", "start-ns" and
				1745	"end-ns" (the "ns" events are used to get detailed namespace
				1746	information). If 'events' is omitted, only "end" events are reported.
				1747	'parser' is an optional parser instance. If not given, the standard
				1748	XMLParser parser is used. Returns an iterator providing
				1749	(event, elem) pairs.
				1750	"""
				1751
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	1752	root = None
Eli Bendersky	aaa9780	2013-01-24 07:15:19 -0800	[diff] [blame]	1753	def __init__(self, file, events=None, parser=None):
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	1754	self._close_file = False
				1755	if not hasattr(file, 'read'):
				1756	file = open(file, 'rb')
				1757	self._close_file = True
				1758	self._file = file
				1759	self._events = []
				1760	self._index = 0
				1761	self._error = None
				1762	self.root = self._root = None
Eli Bendersky	aaa9780	2013-01-24 07:15:19 -0800	[diff] [blame]	1763	if parser is None:
				1764	parser = XMLParser(target=TreeBuilder())
				1765	self._parser = parser
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	1766	self._parser._setevents(self._events, events)
				1767
				1768	def __next__(self):
				1769	while True:
				1770	try:
				1771	item = self._events[self._index]
				1772	self._index += 1
				1773	return item
				1774	except IndexError:
				1775	pass
				1776	if self._error:
				1777	e = self._error
				1778	self._error = None
				1779	raise e
				1780	if self._parser is None:
				1781	self.root = self._root
				1782	if self._close_file:
				1783	self._file.close()
				1784	raise StopIteration
				1785	# load event buffer
				1786	del self._events[:]
				1787	self._index = 0
				1788	data = self._file.read(16384)
				1789	if data:
				1790	try:
				1791	self._parser.feed(data)
				1792	except SyntaxError as exc:
				1793	self._error = exc
				1794	else:
				1795	self._root = self._parser.close()
				1796	self._parser = None
				1797
				1798	def __iter__(self):
				1799	return self
				1800
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1801	# compatibility
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1802	XMLTreeBuilder = XMLParser
				1803
				1804	# workaround circular import.
				1805	try:
				1806	from ElementC14N import _serialize_c14n
				1807	_serialize["c14n"] = _serialize_c14n
				1808	except ImportError:
				1809	pass