Blame - Lib/xml/etree/ElementTree.py - platform/external/python/cpython2

blob: defef0d0ebca36dc35f85c1d22490763feffe736 [file] [log] [blame]

Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1	#
				2	# ElementTree
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	3	# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	4	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	5	# light-weight XML support for Python 2.3 and later.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	6	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	7	# history (since 1.2.6):
				8	# 2005-11-12 fl added tostringlist/fromstringlist helpers
				9	# 2006-07-05 fl merged in selected changes from the 1.3 sandbox
				10	# 2006-07-05 fl removed support for 2.1 and earlier
				11	# 2007-06-21 fl added deprecation/future warnings
				12	# 2007-08-25 fl added doctype hook, added parser version attribute etc
				13	# 2007-08-26 fl added new serializer code (better namespace handling, etc)
				14	# 2007-08-27 fl warn for broken /tag searches on tree level
				15	# 2007-09-02 fl added html/text methods to serializer (experimental)
				16	# 2007-09-05 fl added method argument to tostring/tostringlist
				17	# 2007-09-06 fl improved error handling
				18	# 2007-09-13 fl added itertext, iterfind; assorted cleanups
				19	# 2007-12-15 fl added C14N hooks, copy method (experimental)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	20	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	21	# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	22	#
				23	# fredrik@pythonware.com
				24	# http://www.pythonware.com
				25	#
				26	# --------------------------------------------------------------------
				27	# The ElementTree toolkit is
				28	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	29	# Copyright (c) 1999-2008 by Fredrik Lundh
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	30	#
				31	# By obtaining, using, and/or copying this software and/or its
				32	# associated documentation, you agree that you have read, understood,
				33	# and will comply with the following terms and conditions:
				34	#
				35	# Permission to use, copy, modify, and distribute this software and
				36	# its associated documentation for any purpose and without fee is
				37	# hereby granted, provided that the above copyright notice appears in
				38	# all copies, and that both that copyright notice and this permission
				39	# notice appear in supporting documentation, and that the name of
				40	# Secret Labs AB or the author not be used in advertising or publicity
				41	# pertaining to distribution of the software without specific, written
				42	# prior permission.
				43	#
				44	# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
				45	# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
				46	# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
				47	# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
				48	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
				49	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
				50	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
				51	# OF THIS SOFTWARE.
				52	# --------------------------------------------------------------------
				53
Fredrik Lundh	63168a5	2005-12-14 22:29:34 +0000	[diff] [blame]	54	# Licensed to PSF under a Contributor Agreement.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	55	# See http://www.python.org/psf/license for licensing details.
Fredrik Lundh	63168a5	2005-12-14 22:29:34 +0000	[diff] [blame]	56
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	57	__all__ = [
				58	# public symbols
				59	"Comment",
				60	"dump",
				61	"Element", "ElementTree",
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	62	"fromstring", "fromstringlist",
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	63	"iselement", "iterparse",
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	64	"parse", "ParseError",
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	65	"PI", "ProcessingInstruction",
				66	"QName",
				67	"SubElement",
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	68	"tostring", "tostringlist",
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	69	"TreeBuilder",
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	70	"VERSION",
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	71	"XML", "XMLID",
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	72	"XMLParser", "XMLTreeBuilder",
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	73	"register_namespace",
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	74	]
				75
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	76	VERSION = "1.3.0"
				77
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	78	##
				79	# The <b>Element</b> type is a flexible container object, designed to
				80	# store hierarchical data structures in memory. The type can be
				81	# described as a cross between a list and a dictionary.
				82	# <p>
				83	# Each element has a number of properties associated with it:
				84	# <ul>
				85	# <li>a <i>tag</i>. This is a string identifying what kind of data
				86	# this element represents (the element type, in other words).</li>
				87	# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
				88	# <li>a <i>text</i> string.</li>
				89	# <li>an optional <i>tail</i> string.</li>
				90	# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
				91	# </ul>
				92	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	93	# To create an element instance, use the {@link #Element} constructor
				94	# or the {@link #SubElement} factory function.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	95	# <p>
				96	# The {@link #ElementTree} class can be used to wrap an element
				97	# structure, and convert it from and to XML.
				98	##
				99
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	100	import sys
				101	import re
				102	import warnings
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	103
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	104
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	105	class _SimpleElementPath:
				106	# emulate pre-1.2 find/findtext/findall behaviour
				107	def find(self, element, tag, namespaces=None):
				108	for elem in element:
				109	if elem.tag == tag:
				110	return elem
				111	return None
				112	def findtext(self, element, tag, default=None, namespaces=None):
				113	elem = self.find(element, tag)
				114	if elem is None:
				115	return default
				116	return elem.text or ""
				117	def iterfind(self, element, tag, namespaces=None):
				118	if tag[:3] == ".//":
				119	for elem in element.iter(tag[3:]):
				120	yield elem
				121	for elem in element:
				122	if elem.tag == tag:
				123	yield elem
				124	def findall(self, element, tag, namespaces=None):
				125	return list(self.iterfind(element, tag, namespaces))
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	126
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	127	try:
				128	from . import ElementPath
				129	except ImportError:
				130	ElementPath = _SimpleElementPath()
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	131
				132	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	133	# Parser error. This is a subclass of <b>SyntaxError</b>.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	134	# <p>
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	135	# In addition to the exception value, an exception instance contains a
				136	# specific exception code in the <b>code</b> attribute, and the line and
				137	# column of the error in the <b>position</b> attribute.
				138
				139	class ParseError(SyntaxError):
				140	pass
				141
				142	# --------------------------------------------------------------------
				143
				144	##
				145	# Checks if an object appears to be a valid element object.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	146	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	147	# @param An element instance.
				148	# @return A true value if this is an element object.
				149	# @defreturn flag
				150
				151	def iselement(element):
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	152	# FIXME: not sure about this;
				153	# isinstance(element, Element) or look for tag/attrib/text attributes
				154	return hasattr(element, 'tag')
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	155
				156	##
				157	# Element class. This class defines the Element interface, and
				158	# provides a reference implementation of this interface.
				159	# <p>
				160	# The element name, attribute names, and attribute values can be
				161	# either ASCII strings (ordinary Python strings containing only 7-bit
				162	# ASCII characters) or Unicode strings.
				163	#
				164	# @param tag The element name.
				165	# @param attrib An optional dictionary, containing element attributes.
				166	# @param **extra Additional attributes, given as keyword arguments.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	167	# @see Element
				168	# @see SubElement
				169	# @see Comment
				170	# @see ProcessingInstruction
				171
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	172	class Element:
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	173	# <tag attrib>text<child/>...</tag>tail
				174
				175	##
				176	# (Attribute) Element tag.
				177
				178	tag = None
				179
				180	##
				181	# (Attribute) Element attribute dictionary. Where possible, use
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	182	# {@link #Element.get},
				183	# {@link #Element.set},
				184	# {@link #Element.keys}, and
				185	# {@link #Element.items} to access
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	186	# element attributes.
				187
				188	attrib = None
				189
				190	##
				191	# (Attribute) Text before first subelement. This is either a
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	192	# string or the value None. Note that if there was no text, this
				193	# attribute may be either None or an empty string, depending on
				194	# the parser.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	195
				196	text = None
				197
				198	##
				199	# (Attribute) Text after this element's end tag, but before the
				200	# next sibling element's start tag. This is either a string or
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	201	# the value None. Note that if there was no text, this attribute
				202	# may be either None or an empty string, depending on the parser.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	203
				204	tail = None # text after end tag, if any
				205
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	206	# constructor
				207
				208	def __init__(self, tag, attrib={}, **extra):
				209	attrib = attrib.copy()
				210	attrib.update(extra)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	211	self.tag = tag
				212	self.attrib = attrib
				213	self._children = []
				214
				215	def __repr__(self):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	216	return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	217
				218	##
				219	# Creates a new element object of the same type as this element.
				220	#
				221	# @param tag Element tag.
				222	# @param attrib Element attributes, given as a dictionary.
				223	# @return A new element instance.
				224
				225	def makeelement(self, tag, attrib):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	226	return self.__class__(tag, attrib)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	227
				228	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	229	# (Experimental) Copies the current element. This creates a
				230	# shallow copy; subelements will be shared with the original tree.
				231	#
				232	# @return A new element instance.
				233
				234	def copy(self):
				235	elem = self.makeelement(self.tag, self.attrib)
				236	elem.text = self.text
				237	elem.tail = self.tail
				238	elem[:] = self
				239	return elem
				240
				241	##
				242	# Returns the number of subelements. Note that this only counts
				243	# full elements; to check if there's any content in an element, you
				244	# have to check both the length and the <b>text</b> attribute.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	245	#
				246	# @return The number of subelements.
				247
				248	def __len__(self):
				249	return len(self._children)
				250
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	251	def __bool__(self):
				252	warnings.warn(
				253	"The behavior of this method will change in future versions. "
				254	"Use specific 'len(elem)' or 'elem is not None' test instead.",
				255	FutureWarning, stacklevel=2
				256	)
				257	return len(self._children) != 0 # emulate old behaviour, for now
				258
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	259	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	260	# Returns the given subelement, by index.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	261	#
				262	# @param index What subelement to return.
				263	# @return The given subelement.
				264	# @exception IndexError If the given element does not exist.
				265
				266	def __getitem__(self, index):
				267	return self._children[index]
				268
				269	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	270	# Replaces the given subelement, by index.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	271	#
				272	# @param index What subelement to replace.
				273	# @param element The new element value.
				274	# @exception IndexError If the given element does not exist.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	275
				276	def __setitem__(self, index, element):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	277	# if isinstance(index, slice):
				278	# for elt in element:
				279	# assert iselement(elt)
				280	# else:
				281	# assert iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	282	self._children[index] = element
				283
				284	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	285	# Deletes the given subelement, by index.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	286	#
				287	# @param index What subelement to delete.
				288	# @exception IndexError If the given element does not exist.
				289
				290	def __delitem__(self, index):
				291	del self._children[index]
				292
				293	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	294	# Adds a subelement to the end of this element. In document order,
				295	# the new element will appear after the last existing subelement (or
				296	# directly after the text, if it's the first subelement), but before
				297	# the end tag for this element.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	298	#
				299	# @param element The element to add.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	300
				301	def append(self, element):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	302	# assert iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	303	self._children.append(element)
				304
				305	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	306	# Appends subelements from a sequence.
				307	#
				308	# @param elements A sequence object with zero or more elements.
				309	# @since 1.3
				310
				311	def extend(self, elements):
				312	# for element in elements:
				313	# assert iselement(element)
				314	self._children.extend(elements)
				315
				316	##
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	317	# Inserts a subelement at the given position in this element.
				318	#
				319	# @param index Where to insert the new subelement.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	320
				321	def insert(self, index, element):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	322	# assert iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	323	self._children.insert(index, element)
				324
				325	##
				326	# Removes a matching subelement. Unlike the <b>find</b> methods,
				327	# this method compares elements based on identity, not on tag
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	328	# value or contents. To remove subelements by other means, the
				329	# easiest way is often to use a list comprehension to select what
				330	# elements to keep, and use slice assignment to update the parent
				331	# element.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	332	#
				333	# @param element What element to remove.
				334	# @exception ValueError If a matching element could not be found.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	335
				336	def remove(self, element):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	337	# assert iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	338	self._children.remove(element)
				339
				340	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	341	# (Deprecated) Returns all subelements. The elements are returned
				342	# in document order.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	343	#
				344	# @return A list of subelements.
				345	# @defreturn list of Element instances
				346
				347	def getchildren(self):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	348	warnings.warn(
				349	"This method will be removed in future versions. "
				350	"Use 'list(elem)' or iteration over elem instead.",
				351	DeprecationWarning, stacklevel=2
				352	)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	353	return self._children
				354
				355	##
				356	# Finds the first matching subelement, by tag name or path.
				357	#
				358	# @param path What element to look for.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	359	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	360	# @return The first matching element, or None if no element was found.
				361	# @defreturn Element or None
				362
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	363	def find(self, path, namespaces=None):
				364	return ElementPath.find(self, path, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	365
				366	##
				367	# Finds text for the first matching subelement, by tag name or path.
				368	#
				369	# @param path What element to look for.
				370	# @param default What to return if the element was not found.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	371	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	372	# @return The text content of the first matching element, or the
				373	# default value no element was found. Note that if the element
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	374	# is found, but has no text content, this method returns an
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	375	# empty string.
				376	# @defreturn string
				377
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	378	def findtext(self, path, default=None, namespaces=None):
				379	return ElementPath.findtext(self, path, default, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	380
				381	##
				382	# Finds all matching subelements, by tag name or path.
				383	#
				384	# @param path What element to look for.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	385	# @keyparam namespaces Optional namespace prefix map.
				386	# @return A list or other sequence containing all matching elements,
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	387	# in document order.
				388	# @defreturn list of Element instances
				389
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	390	def findall(self, path, namespaces=None):
				391	return ElementPath.findall(self, path, namespaces)
				392
				393	##
				394	# Finds all matching subelements, by tag name or path.
				395	#
				396	# @param path What element to look for.
				397	# @keyparam namespaces Optional namespace prefix map.
				398	# @return An iterator or sequence containing all matching elements,
				399	# in document order.
				400	# @defreturn a generated sequence of Element instances
				401
				402	def iterfind(self, path, namespaces=None):
				403	return ElementPath.iterfind(self, path, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	404
				405	##
				406	# Resets an element. This function removes all subelements, clears
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	407	# all attributes, and sets the <b>text</b> and <b>tail</b> attributes
				408	# to None.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	409
				410	def clear(self):
				411	self.attrib.clear()
				412	self._children = []
				413	self.text = self.tail = None
				414
				415	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	416	# Gets an element attribute. Equivalent to <b>attrib.get</b>, but
				417	# some implementations may handle this a bit more efficiently.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	418	#
				419	# @param key What attribute to look for.
				420	# @param default What to return if the attribute was not found.
				421	# @return The attribute value, or the default value, if the
				422	# attribute was not found.
				423	# @defreturn string or None
				424
				425	def get(self, key, default=None):
				426	return self.attrib.get(key, default)
				427
				428	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	429	# Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
				430	# but some implementations may handle this a bit more efficiently.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	431	#
				432	# @param key What attribute to set.
				433	# @param value The attribute value.
				434
				435	def set(self, key, value):
				436	self.attrib[key] = value
				437
				438	##
				439	# Gets a list of attribute names. The names are returned in an
				440	# arbitrary order (just like for an ordinary Python dictionary).
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	441	# Equivalent to <b>attrib.keys()</b>.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	442	#
				443	# @return A list of element attribute names.
				444	# @defreturn list of strings
				445
				446	def keys(self):
				447	return self.attrib.keys()
				448
				449	##
				450	# Gets element attributes, as a sequence. The attributes are
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	451	# returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	452	#
				453	# @return A list of (name, value) tuples for all attributes.
				454	# @defreturn list of (string, string) tuples
				455
				456	def items(self):
				457	return self.attrib.items()
				458
				459	##
				460	# Creates a tree iterator. The iterator loops over this element
				461	# and all subelements, in document order, and returns all elements
				462	# with a matching tag.
				463	# <p>
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	464	# If the tree structure is modified during iteration, new or removed
				465	# elements may or may not be included. To get a stable set, use the
				466	# list() function on the iterator, and loop over the resulting list.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	467	#
				468	# @param tag What tags to look for (default is to return all elements).
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	469	# @return An iterator containing all the matching elements.
				470	# @defreturn iterator
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	471
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	472	def iter(self, tag=None):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	473	if tag == "*":
				474	tag = None
				475	if tag is None or self.tag == tag:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	476	yield self
				477	for e in self._children:
				478	for e in e.iter(tag):
				479	yield e
				480
				481	# compatibility
				482	def getiterator(self, tag=None):
				483	# Change for a DeprecationWarning in 1.4
				484	warnings.warn(
				485	"This method will be removed in future versions. "
				486	"Use 'elem.iter()' or 'list(elem.iter())' instead.",
				487	PendingDeprecationWarning, stacklevel=2
				488	)
				489	return list(self.iter(tag))
				490
				491	##
				492	# Creates a text iterator. The iterator loops over this element
				493	# and all subelements, in document order, and returns all inner
				494	# text.
				495	#
				496	# @return An iterator containing all inner text.
				497	# @defreturn iterator
				498
				499	def itertext(self):
				500	tag = self.tag
				501	if not isinstance(tag, str) and tag is not None:
				502	return
				503	if self.text:
				504	yield self.text
				505	for e in self:
				506	for s in e.itertext():
				507	yield s
				508	if e.tail:
				509	yield e.tail
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	510
				511	# compatibility
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	512	_Element = _ElementInterface = Element
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	513
				514	##
				515	# Subelement factory. This function creates an element instance, and
				516	# appends it to an existing element.
				517	# <p>
				518	# The element name, attribute names, and attribute values can be
				519	# either 8-bit ASCII strings or Unicode strings.
				520	#
				521	# @param parent The parent element.
				522	# @param tag The subelement name.
				523	# @param attrib An optional dictionary, containing element attributes.
				524	# @param **extra Additional attributes, given as keyword arguments.
				525	# @return An element instance.
				526	# @defreturn Element
				527
				528	def SubElement(parent, tag, attrib={}, **extra):
				529	attrib = attrib.copy()
				530	attrib.update(extra)
				531	element = parent.makeelement(tag, attrib)
				532	parent.append(element)
				533	return element
				534
				535	##
				536	# Comment element factory. This factory function creates a special
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	537	# element that will be serialized as an XML comment by the standard
				538	# serializer.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	539	# <p>
				540	# The comment string can be either an 8-bit ASCII string or a Unicode
				541	# string.
				542	#
				543	# @param text A string containing the comment string.
				544	# @return An element instance, representing a comment.
				545	# @defreturn Element
				546
				547	def Comment(text=None):
				548	element = Element(Comment)
				549	element.text = text
				550	return element
				551
				552	##
				553	# PI element factory. This factory function creates a special element
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	554	# that will be serialized as an XML processing instruction by the standard
				555	# serializer.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	556	#
				557	# @param target A string containing the PI target.
				558	# @param text A string containing the PI contents, if any.
				559	# @return An element instance, representing a PI.
				560	# @defreturn Element
				561
				562	def ProcessingInstruction(target, text=None):
				563	element = Element(ProcessingInstruction)
				564	element.text = target
				565	if text:
				566	element.text = element.text + " " + text
				567	return element
				568
				569	PI = ProcessingInstruction
				570
				571	##
				572	# QName wrapper. This can be used to wrap a QName attribute value, in
				573	# order to get proper namespace handling on output.
				574	#
				575	# @param text A string containing the QName value, in the form {uri}local,
				576	# or, if the tag argument is given, the URI part of a QName.
				577	# @param tag Optional tag. If given, the first argument is interpreted as
				578	# an URI, and this argument is interpreted as a local name.
				579	# @return An opaque object, representing the QName.
				580
				581	class QName:
				582	def __init__(self, text_or_uri, tag=None):
				583	if tag:
				584	text_or_uri = "{%s}%s" % (text_or_uri, tag)
				585	self.text = text_or_uri
				586	def __str__(self):
				587	return self.text
Georg Brandl	b56c0e2	2010-12-09 18:10:27 +0000	[diff] [blame]	588	def __repr__(self):
Georg Brandl	c95c918	2010-12-09 18:26:02 +0000	[diff] [blame]	589	return '<QName %r>' % (self.text,)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	590	def __hash__(self):
				591	return hash(self.text)
Mark Dickinson	a56c467	2009-01-27 18:17:45 +0000	[diff] [blame]	592	def __le__(self, other):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	593	if isinstance(other, QName):
Mark Dickinson	a56c467	2009-01-27 18:17:45 +0000	[diff] [blame]	594	return self.text <= other.text
				595	return self.text <= other
				596	def __lt__(self, other):
				597	if isinstance(other, QName):
				598	return self.text < other.text
				599	return self.text < other
				600	def __ge__(self, other):
				601	if isinstance(other, QName):
				602	return self.text >= other.text
				603	return self.text >= other
				604	def __gt__(self, other):
				605	if isinstance(other, QName):
				606	return self.text > other.text
				607	return self.text > other
				608	def __eq__(self, other):
				609	if isinstance(other, QName):
				610	return self.text == other.text
				611	return self.text == other
				612	def __ne__(self, other):
				613	if isinstance(other, QName):
				614	return self.text != other.text
				615	return self.text != other
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	616
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	617	# --------------------------------------------------------------------
				618
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	619	##
				620	# ElementTree wrapper class. This class represents an entire element
				621	# hierarchy, and adds some extra support for serialization to and from
				622	# standard XML.
				623	#
				624	# @param element Optional root element.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	625	# @keyparam file Optional file handle or file name. If given, the
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	626	# tree is initialized with the contents of this XML file.
				627
				628	class ElementTree:
				629
				630	def __init__(self, element=None, file=None):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	631	# assert element is None or iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	632	self._root = element # first node
				633	if file:
				634	self.parse(file)
				635
				636	##
				637	# Gets the root element for this tree.
				638	#
				639	# @return An element instance.
				640	# @defreturn Element
				641
				642	def getroot(self):
				643	return self._root
				644
				645	##
				646	# Replaces the root element for this tree. This discards the
				647	# current contents of the tree, and replaces it with the given
				648	# element. Use with care.
				649	#
				650	# @param element An element instance.
				651
				652	def _setroot(self, element):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	653	# assert iselement(element)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	654	self._root = element
				655
				656	##
				657	# Loads an external XML document into this element tree.
				658	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	659	# @param source A file name or file object. If a file object is
				660	# given, it only has to implement a <b>read(n)</b> method.
				661	# @keyparam parser An optional parser instance. If not given, the
				662	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	663	# @return The document root element.
				664	# @defreturn Element
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	665	# @exception ParseError If the parser fails to parse the document.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	666
				667	def parse(self, source, parser=None):
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	668	close_source = False
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	669	if not hasattr(source, "read"):
				670	source = open(source, "rb")
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	671	close_source = True
				672	try:
				673	if not parser:
				674	parser = XMLParser(target=TreeBuilder())
				675	while 1:
				676	data = source.read(65536)
				677	if not data:
				678	break
				679	parser.feed(data)
				680	self._root = parser.close()
				681	return self._root
				682	finally:
				683	if close_source:
				684	source.close()
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	685
				686	##
				687	# Creates a tree iterator for the root element. The iterator loops
				688	# over all elements in this tree, in document order.
				689	#
				690	# @param tag What tags to look for (default is to return all elements)
				691	# @return An iterator.
				692	# @defreturn iterator
				693
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	694	def iter(self, tag=None):
				695	# assert self._root is not None
				696	return self._root.iter(tag)
				697
				698	# compatibility
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	699	def getiterator(self, tag=None):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	700	# Change for a DeprecationWarning in 1.4
				701	warnings.warn(
				702	"This method will be removed in future versions. "
				703	"Use 'tree.iter()' or 'list(tree.iter())' instead.",
				704	PendingDeprecationWarning, stacklevel=2
				705	)
				706	return list(self.iter(tag))
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	707
				708	##
				709	# Finds the first toplevel element with given tag.
				710	# Same as getroot().find(path).
				711	#
				712	# @param path What element to look for.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	713	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	714	# @return The first matching element, or None if no element was found.
				715	# @defreturn Element or None
				716
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	717	def find(self, path, namespaces=None):
				718	# assert self._root is not None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	719	if path[:1] == "/":
				720	path = "." + path
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	721	warnings.warn(
				722	"This search is broken in 1.3 and earlier, and will be "
				723	"fixed in a future version. If you rely on the current "
				724	"behaviour, change it to %r" % path,
				725	FutureWarning, stacklevel=2
				726	)
				727	return self._root.find(path, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	728
				729	##
				730	# Finds the element text for the first toplevel element with given
				731	# tag. Same as getroot().findtext(path).
				732	#
				733	# @param path What toplevel element to look for.
				734	# @param default What to return if the element was not found.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	735	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	736	# @return The text content of the first matching element, or the
				737	# default value no element was found. Note that if the element
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	738	# is found, but has no text content, this method returns an
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	739	# empty string.
				740	# @defreturn string
				741
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	742	def findtext(self, path, default=None, namespaces=None):
				743	# assert self._root is not None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	744	if path[:1] == "/":
				745	path = "." + path
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	746	warnings.warn(
				747	"This search is broken in 1.3 and earlier, and will be "
				748	"fixed in a future version. If you rely on the current "
				749	"behaviour, change it to %r" % path,
				750	FutureWarning, stacklevel=2
				751	)
				752	return self._root.findtext(path, default, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	753
				754	##
				755	# Finds all toplevel elements with the given tag.
				756	# Same as getroot().findall(path).
				757	#
				758	# @param path What element to look for.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	759	# @keyparam namespaces Optional namespace prefix map.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	760	# @return A list or iterator containing all matching elements,
				761	# in document order.
				762	# @defreturn list of Element instances
				763
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	764	def findall(self, path, namespaces=None):
				765	# assert self._root is not None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	766	if path[:1] == "/":
				767	path = "." + path
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	768	warnings.warn(
				769	"This search is broken in 1.3 and earlier, and will be "
				770	"fixed in a future version. If you rely on the current "
				771	"behaviour, change it to %r" % path,
				772	FutureWarning, stacklevel=2
				773	)
				774	return self._root.findall(path, namespaces)
				775
				776	##
				777	# Finds all matching subelements, by tag name or path.
				778	# Same as getroot().iterfind(path).
				779	#
				780	# @param path What element to look for.
				781	# @keyparam namespaces Optional namespace prefix map.
				782	# @return An iterator or sequence containing all matching elements,
				783	# in document order.
				784	# @defreturn a generated sequence of Element instances
				785
				786	def iterfind(self, path, namespaces=None):
				787	# assert self._root is not None
				788	if path[:1] == "/":
				789	path = "." + path
				790	warnings.warn(
				791	"This search is broken in 1.3 and earlier, and will be "
				792	"fixed in a future version. If you rely on the current "
				793	"behaviour, change it to %r" % path,
				794	FutureWarning, stacklevel=2
				795	)
				796	return self._root.iterfind(path, namespaces)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	797
				798	##
				799	# Writes the element tree to a file, as XML.
				800	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	801	# @def write(file, **options)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	802	# @param file A file name, or a file object opened for writing.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	803	# @param **options Options, given as keyword arguments.
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	804	# @keyparam encoding Optional output encoding (default is US-ASCII).
				805	# Use "unicode" to return a Unicode string.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	806	# @keyparam method Optional output method ("xml", "html", "text" or
				807	# "c14n"; default is "xml").
				808	# @keyparam xml_declaration Controls if an XML declaration should
				809	# be added to the file. Use False for never, True for always,
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	810	# None for only if not US-ASCII or UTF-8 or Unicode. None is default.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	811
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	812	def write(self, file_or_filename,
				813	# keyword arguments
				814	encoding=None,
				815	xml_declaration=None,
				816	default_namespace=None,
				817	method=None):
				818	# assert self._root is not None
				819	if not method:
				820	method = "xml"
				821	elif method not in _serialize:
				822	# FIXME: raise an ImportError for c14n if ElementC14N is missing?
				823	raise ValueError("unknown method %r" % method)
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	824	if not encoding:
				825	if method == "c14n":
				826	encoding = "utf-8"
				827	else:
				828	encoding = "us-ascii"
				829	elif encoding == str: # lxml.etree compatibility.
				830	encoding = "unicode"
				831	else:
				832	encoding = encoding.lower()
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	833	if hasattr(file_or_filename, "write"):
				834	file = file_or_filename
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	835	else:
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	836	if encoding != "unicode":
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	837	file = open(file_or_filename, "wb")
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	838	else:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	839	file = open(file_or_filename, "w")
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	840	if encoding != "unicode":
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	841	def write(text):
				842	try:
				843	return file.write(text.encode(encoding,
				844	"xmlcharrefreplace"))
				845	except (TypeError, AttributeError):
				846	_raise_serialization_error(text)
				847	else:
				848	write = file.write
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	849	if method == "xml" and (xml_declaration or
				850	(xml_declaration is None and
				851	encoding not in ("utf-8", "us-ascii", "unicode"))):
				852	declared_encoding = encoding
				853	if encoding == "unicode":
				854	# Retrieve the default encoding for the xml declaration
				855	import locale
				856	declared_encoding = locale.getpreferredencoding()
				857	write("<?xml version='1.0' encoding='%s'?>\n" % declared_encoding)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	858	if method == "text":
				859	_serialize_text(write, self._root)
				860	else:
				861	qnames, namespaces = _namespaces(self._root, default_namespace)
				862	serialize = _serialize[method]
				863	serialize(write, self._root, qnames, namespaces)
				864	if file_or_filename is not file:
				865	file.close()
				866
				867	def write_c14n(self, file):
				868	# lxml.etree compatibility. use output method instead
				869	return self.write(file, method="c14n")
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	870
				871	# --------------------------------------------------------------------
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	872	# serialization support
				873
				874	def _namespaces(elem, default_namespace=None):
				875	# identify namespaces used in this tree
				876
				877	# maps qnames to encoded prefix:local names
				878	qnames = {None: None}
				879
				880	# maps uri:s to prefixes
				881	namespaces = {}
				882	if default_namespace:
				883	namespaces[default_namespace] = ""
				884
				885	def add_qname(qname):
				886	# calculate serialized qname representation
				887	try:
				888	if qname[:1] == "{":
				889	uri, tag = qname[1:].rsplit("}", 1)
				890	prefix = namespaces.get(uri)
				891	if prefix is None:
				892	prefix = _namespace_map.get(uri)
				893	if prefix is None:
				894	prefix = "ns%d" % len(namespaces)
				895	if prefix != "xml":
				896	namespaces[uri] = prefix
				897	if prefix:
				898	qnames[qname] = "%s:%s" % (prefix, tag)
				899	else:
				900	qnames[qname] = tag # default element
				901	else:
				902	if default_namespace:
				903	# FIXME: can this be handled in XML 1.0?
				904	raise ValueError(
				905	"cannot use non-qualified names with "
				906	"default_namespace option"
				907	)
				908	qnames[qname] = qname
				909	except TypeError:
				910	_raise_serialization_error(qname)
				911
				912	# populate qname and namespaces table
				913	try:
				914	iterate = elem.iter
				915	except AttributeError:
				916	iterate = elem.getiterator # cET compatibility
				917	for elem in iterate():
				918	tag = elem.tag
Senthil Kumaran	ec30b3d	2010-11-09 02:36:59 +0000	[diff] [blame]	919	if isinstance(tag, QName):
				920	if tag.text not in qnames:
				921	add_qname(tag.text)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	922	elif isinstance(tag, str):
				923	if tag not in qnames:
				924	add_qname(tag)
				925	elif tag is not None and tag is not Comment and tag is not PI:
				926	_raise_serialization_error(tag)
				927	for key, value in elem.items():
				928	if isinstance(key, QName):
				929	key = key.text
				930	if key not in qnames:
				931	add_qname(key)
				932	if isinstance(value, QName) and value.text not in qnames:
				933	add_qname(value.text)
				934	text = elem.text
				935	if isinstance(text, QName) and text.text not in qnames:
				936	add_qname(text.text)
				937	return qnames, namespaces
				938
				939	def _serialize_xml(write, elem, qnames, namespaces):
				940	tag = elem.tag
				941	text = elem.text
				942	if tag is Comment:
				943	write("<!--%s-->" % text)
				944	elif tag is ProcessingInstruction:
				945	write("<?%s?>" % text)
				946	else:
				947	tag = qnames[tag]
				948	if tag is None:
				949	if text:
				950	write(_escape_cdata(text))
				951	for e in elem:
				952	_serialize_xml(write, e, qnames, None)
				953	else:
				954	write("<" + tag)
				955	items = list(elem.items())
				956	if items or namespaces:
				957	if namespaces:
				958	for v, k in sorted(namespaces.items(),
				959	key=lambda x: x[1]): # sort on prefix
				960	if k:
				961	k = ":" + k
				962	write(" xmlns%s=\"%s\"" % (
				963	k,
				964	_escape_attrib(v)
				965	))
				966	for k, v in sorted(items): # lexical order
				967	if isinstance(k, QName):
				968	k = k.text
				969	if isinstance(v, QName):
				970	v = qnames[v.text]
				971	else:
				972	v = _escape_attrib(v)
				973	write(" %s=\"%s\"" % (qnames[k], v))
				974	if text or len(elem):
				975	write(">")
				976	if text:
				977	write(_escape_cdata(text))
				978	for e in elem:
				979	_serialize_xml(write, e, qnames, None)
				980	write("</" + tag + ">")
				981	else:
				982	write(" />")
				983	if elem.tail:
				984	write(_escape_cdata(elem.tail))
				985
				986	HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
				987	"img", "input", "isindex", "link", "meta" "param")
				988
				989	try:
				990	HTML_EMPTY = set(HTML_EMPTY)
				991	except NameError:
				992	pass
				993
				994	def _serialize_html(write, elem, qnames, namespaces):
				995	tag = elem.tag
				996	text = elem.text
				997	if tag is Comment:
				998	write("<!--%s-->" % _escape_cdata(text))
				999	elif tag is ProcessingInstruction:
				1000	write("<?%s?>" % _escape_cdata(text))
				1001	else:
				1002	tag = qnames[tag]
				1003	if tag is None:
				1004	if text:
				1005	write(_escape_cdata(text))
				1006	for e in elem:
				1007	_serialize_html(write, e, qnames, None)
				1008	else:
				1009	write("<" + tag)
				1010	items = list(elem.items())
				1011	if items or namespaces:
				1012	if namespaces:
				1013	for v, k in sorted(namespaces.items(),
				1014	key=lambda x: x[1]): # sort on prefix
				1015	if k:
				1016	k = ":" + k
				1017	write(" xmlns%s=\"%s\"" % (
				1018	k,
				1019	_escape_attrib(v)
				1020	))
				1021	for k, v in sorted(items): # lexical order
				1022	if isinstance(k, QName):
				1023	k = k.text
				1024	if isinstance(v, QName):
				1025	v = qnames[v.text]
				1026	else:
				1027	v = _escape_attrib_html(v)
				1028	# FIXME: handle boolean attributes
				1029	write(" %s=\"%s\"" % (qnames[k], v))
				1030	write(">")
				1031	tag = tag.lower()
				1032	if text:
				1033	if tag == "script" or tag == "style":
				1034	write(text)
				1035	else:
				1036	write(_escape_cdata(text))
				1037	for e in elem:
				1038	_serialize_html(write, e, qnames, None)
				1039	if tag not in HTML_EMPTY:
				1040	write("</" + tag + ">")
				1041	if elem.tail:
				1042	write(_escape_cdata(elem.tail))
				1043
				1044	def _serialize_text(write, elem):
				1045	for part in elem.itertext():
				1046	write(part)
				1047	if elem.tail:
				1048	write(elem.tail)
				1049
				1050	_serialize = {
				1051	"xml": _serialize_xml,
				1052	"html": _serialize_html,
				1053	"text": _serialize_text,
				1054	# this optional method is imported at the end of the module
				1055	# "c14n": _serialize_c14n,
				1056	}
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1057
				1058	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1059	# Registers a namespace prefix. The registry is global, and any
				1060	# existing mapping for either the given prefix or the namespace URI
				1061	# will be removed.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1062	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1063	# @param prefix Namespace prefix.
				1064	# @param uri Namespace uri. Tags and attributes in this namespace
				1065	# will be serialized with the given prefix, if at all possible.
				1066	# @exception ValueError If the prefix is reserved, or is otherwise
				1067	# invalid.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1068
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1069	def register_namespace(prefix, uri):
				1070	if re.match("ns\d+$", prefix):
				1071	raise ValueError("Prefix format reserved for internal use")
Georg Brandl	90b2067	2010-12-28 10:38:33 +0000	[diff] [blame]	1072	for k, v in list(_namespace_map.items()):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1073	if k == uri or v == prefix:
				1074	del _namespace_map[k]
				1075	_namespace_map[uri] = prefix
				1076
				1077	_namespace_map = {
				1078	# "well-known" namespace prefixes
				1079	"http://www.w3.org/XML/1998/namespace": "xml",
				1080	"http://www.w3.org/1999/xhtml": "html",
				1081	"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
				1082	"http://schemas.xmlsoap.org/wsdl/": "wsdl",
				1083	# xml schema
				1084	"http://www.w3.org/2001/XMLSchema": "xs",
				1085	"http://www.w3.org/2001/XMLSchema-instance": "xsi",
				1086	# dublin core
				1087	"http://purl.org/dc/elements/1.1/": "dc",
				1088	}
Florent Xicluna	1639505	2012-02-16 23:28:35 +0100	[diff] [blame^]	1089	# For tests and troubleshooting
				1090	register_namespace._namespace_map = _namespace_map
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1091
				1092	def _raise_serialization_error(text):
				1093	raise TypeError(
				1094	"cannot serialize %r (type %s)" % (text, type(text).__name__)
				1095	)
				1096
				1097	def _escape_cdata(text):
				1098	# escape character data
				1099	try:
				1100	# it's worth avoiding do-nothing calls for strings that are
				1101	# shorter than 500 character, or so. assume that's, by far,
				1102	# the most common case in most applications.
				1103	if "&" in text:
				1104	text = text.replace("&", "&")
				1105	if "<" in text:
				1106	text = text.replace("<", "<")
				1107	if ">" in text:
				1108	text = text.replace(">", ">")
				1109	return text
				1110	except (TypeError, AttributeError):
				1111	_raise_serialization_error(text)
				1112
				1113	def _escape_attrib(text):
				1114	# escape attribute value
				1115	try:
				1116	if "&" in text:
				1117	text = text.replace("&", "&")
				1118	if "<" in text:
				1119	text = text.replace("<", "<")
				1120	if ">" in text:
				1121	text = text.replace(">", ">")
				1122	if "\"" in text:
				1123	text = text.replace("\"", """)
				1124	if "\n" in text:
				1125	text = text.replace("\n", " ")
				1126	return text
				1127	except (TypeError, AttributeError):
				1128	_raise_serialization_error(text)
				1129
				1130	def _escape_attrib_html(text):
				1131	# escape attribute value
				1132	try:
				1133	if "&" in text:
				1134	text = text.replace("&", "&")
				1135	if ">" in text:
				1136	text = text.replace(">", ">")
				1137	if "\"" in text:
				1138	text = text.replace("\"", """)
				1139	return text
				1140	except (TypeError, AttributeError):
				1141	_raise_serialization_error(text)
				1142
				1143	# --------------------------------------------------------------------
				1144
				1145	##
				1146	# Generates a string representation of an XML element, including all
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1147	# subelements. If encoding is "unicode", the return type is a string;
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1148	# otherwise it is a bytes array.
				1149	#
				1150	# @param element An Element instance.
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1151	# @keyparam encoding Optional output encoding (default is US-ASCII).
				1152	# Use "unicode" to return a Unicode string.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1153	# @keyparam method Optional output method ("xml", "html", "text" or
				1154	# "c14n"; default is "xml").
				1155	# @return An (optionally) encoded string containing the XML data.
				1156	# @defreturn string
				1157
				1158	def tostring(element, encoding=None, method=None):
				1159	class dummy:
				1160	pass
				1161	data = []
				1162	file = dummy()
				1163	file.write = data.append
				1164	ElementTree(element).write(file, encoding, method=method)
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1165	if encoding in (str, "unicode"):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1166	return "".join(data)
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1167	else:
				1168	return b"".join(data)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1169
				1170	##
				1171	# Generates a string representation of an XML element, including all
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1172	# subelements. If encoding is False, the string is returned as a
				1173	# sequence of string fragments; otherwise it is a sequence of
				1174	# bytestrings.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1175	#
				1176	# @param element An Element instance.
				1177	# @keyparam encoding Optional output encoding (default is US-ASCII).
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1178	# Use "unicode" to return a Unicode string.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1179	# @keyparam method Optional output method ("xml", "html", "text" or
				1180	# "c14n"; default is "xml").
				1181	# @return A sequence object containing the XML data.
				1182	# @defreturn sequence
				1183	# @since 1.3
				1184
				1185	def tostringlist(element, encoding=None, method=None):
				1186	class dummy:
				1187	pass
				1188	data = []
				1189	file = dummy()
				1190	file.write = data.append
				1191	ElementTree(element).write(file, encoding, method=method)
				1192	# FIXME: merge small fragments into larger parts
				1193	return data
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1194
				1195	##
				1196	# Writes an element tree or element structure to sys.stdout. This
				1197	# function should be used for debugging only.
				1198	# <p>
				1199	# The exact output format is implementation dependent. In this
				1200	# version, it's written as an ordinary XML file.
				1201	#
				1202	# @param elem An element tree or an individual element.
				1203
				1204	def dump(elem):
				1205	# debugging
				1206	if not isinstance(elem, ElementTree):
				1207	elem = ElementTree(elem)
Florent Xicluna	c17f172	2010-08-08 19:48:29 +0000	[diff] [blame]	1208	elem.write(sys.stdout, encoding="unicode")
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1209	tail = elem.getroot().tail
				1210	if not tail or tail[-1] != "\n":
				1211	sys.stdout.write("\n")
				1212
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1213	# --------------------------------------------------------------------
				1214	# parsing
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1215
				1216	##
				1217	# Parses an XML document into an element tree.
				1218	#
				1219	# @param source A filename or file object containing XML data.
				1220	# @param parser An optional parser instance. If not given, the
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1221	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1222	# @return An ElementTree instance
				1223
				1224	def parse(source, parser=None):
				1225	tree = ElementTree()
				1226	tree.parse(source, parser)
				1227	return tree
				1228
				1229	##
				1230	# Parses an XML document into an element tree incrementally, and reports
				1231	# what's going on to the user.
				1232	#
				1233	# @param source A filename or file object containing XML data.
				1234	# @param events A list of events to report back. If omitted, only "end"
				1235	# events are reported.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1236	# @param parser An optional parser instance. If not given, the
				1237	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1238	# @return A (event, elem) iterator.
				1239
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1240	def iterparse(source, events=None, parser=None):
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1241	close_source = False
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1242	if not hasattr(source, "read"):
				1243	source = open(source, "rb")
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1244	close_source = True
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1245	if not parser:
				1246	parser = XMLParser(target=TreeBuilder())
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1247	return _IterParseIterator(source, events, parser, close_source)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1248
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1249	class _IterParseIterator:
				1250
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1251	def __init__(self, source, events, parser, close_source=False):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1252	self._file = source
Antoine Pitrou	e033e06	2010-10-29 10:38:18 +0000	[diff] [blame]	1253	self._close_file = close_source
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1254	self._events = []
				1255	self._index = 0
Florent Xicluna	91d5193	2011-11-01 23:31:09 +0100	[diff] [blame]	1256	self._error = None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1257	self.root = self._root = None
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1258	self._parser = parser
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1259	# wire up the parser for event reporting
				1260	parser = self._parser._parser
				1261	append = self._events.append
				1262	if events is None:
				1263	events = ["end"]
				1264	for event in events:
				1265	if event == "start":
				1266	try:
				1267	parser.ordered_attributes = 1
				1268	parser.specified_attributes = 1
				1269	def handler(tag, attrib_in, event=event, append=append,
				1270	start=self._parser._start_list):
				1271	append((event, start(tag, attrib_in)))
				1272	parser.StartElementHandler = handler
				1273	except AttributeError:
				1274	def handler(tag, attrib_in, event=event, append=append,
				1275	start=self._parser._start):
				1276	append((event, start(tag, attrib_in)))
				1277	parser.StartElementHandler = handler
				1278	elif event == "end":
				1279	def handler(tag, event=event, append=append,
				1280	end=self._parser._end):
				1281	append((event, end(tag)))
				1282	parser.EndElementHandler = handler
				1283	elif event == "start-ns":
				1284	def handler(prefix, uri, event=event, append=append):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1285	append((event, (prefix or "", uri or "")))
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1286	parser.StartNamespaceDeclHandler = handler
				1287	elif event == "end-ns":
				1288	def handler(prefix, event=event, append=append):
				1289	append((event, None))
				1290	parser.EndNamespaceDeclHandler = handler
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1291	else:
				1292	raise ValueError("unknown event %r" % event)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1293
Georg Brandl	a18af4e	2007-04-21 15:47:16 +0000	[diff] [blame]	1294	def __next__(self):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1295	while 1:
				1296	try:
				1297	item = self._events[self._index]
Florent Xicluna	91d5193	2011-11-01 23:31:09 +0100	[diff] [blame]	1298	self._index += 1
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1299	return item
Florent Xicluna	91d5193	2011-11-01 23:31:09 +0100	[diff] [blame]	1300	except IndexError:
				1301	pass
				1302	if self._error:
				1303	e = self._error
				1304	self._error = None
				1305	raise e
				1306	if self._parser is None:
				1307	self.root = self._root
				1308	if self._close_file:
				1309	self._file.close()
				1310	raise StopIteration
				1311	# load event buffer
				1312	del self._events[:]
				1313	self._index = 0
				1314	data = self._file.read(16384)
				1315	if data:
				1316	try:
				1317	self._parser.feed(data)
				1318	except SyntaxError as exc:
				1319	self._error = exc
				1320	else:
				1321	self._root = self._parser.close()
				1322	self._parser = None
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1323
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1324	def __iter__(self):
				1325	return self
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1326
				1327	##
				1328	# Parses an XML document from a string constant. This function can
				1329	# be used to embed "XML literals" in Python code.
				1330	#
				1331	# @param source A string containing XML data.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1332	# @param parser An optional parser instance. If not given, the
				1333	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1334	# @return An Element instance.
				1335	# @defreturn Element
				1336
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1337	def XML(text, parser=None):
				1338	if not parser:
				1339	parser = XMLParser(target=TreeBuilder())
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1340	parser.feed(text)
				1341	return parser.close()
				1342
				1343	##
				1344	# Parses an XML document from a string constant, and also returns
				1345	# a dictionary which maps from element id:s to elements.
				1346	#
				1347	# @param source A string containing XML data.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1348	# @param parser An optional parser instance. If not given, the
				1349	# standard {@link XMLParser} parser is used.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1350	# @return A tuple containing an Element instance and a dictionary.
				1351	# @defreturn (Element, dictionary)
				1352
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1353	def XMLID(text, parser=None):
				1354	if not parser:
				1355	parser = XMLParser(target=TreeBuilder())
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1356	parser.feed(text)
				1357	tree = parser.close()
				1358	ids = {}
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1359	for elem in tree.iter():
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1360	id = elem.get("id")
				1361	if id:
				1362	ids[id] = elem
				1363	return tree, ids
				1364
				1365	##
				1366	# Parses an XML document from a string constant. Same as {@link #XML}.
				1367	#
				1368	# @def fromstring(text)
				1369	# @param source A string containing XML data.
				1370	# @return An Element instance.
				1371	# @defreturn Element
				1372
				1373	fromstring = XML
				1374
				1375	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1376	# Parses an XML document from a sequence of string fragments.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1377	#
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1378	# @param sequence A list or other sequence containing XML data fragments.
				1379	# @param parser An optional parser instance. If not given, the
				1380	# standard {@link XMLParser} parser is used.
				1381	# @return An Element instance.
				1382	# @defreturn Element
				1383	# @since 1.3
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1384
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1385	def fromstringlist(sequence, parser=None):
				1386	if not parser:
				1387	parser = XMLParser(target=TreeBuilder())
				1388	for text in sequence:
				1389	parser.feed(text)
				1390	return parser.close()
				1391
				1392	# --------------------------------------------------------------------
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1393
				1394	##
				1395	# Generic element structure builder. This builder converts a sequence
				1396	# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
				1397	# #TreeBuilder.end} method calls to a well-formed element structure.
				1398	# <p>
				1399	# You can use this class to build an element structure using a custom XML
				1400	# parser, or a parser for some other XML-like format.
				1401	#
				1402	# @param element_factory Optional element factory. This factory
				1403	# is called to create new Element instances, as necessary.
				1404
				1405	class TreeBuilder:
				1406
				1407	def __init__(self, element_factory=None):
				1408	self._data = [] # data collector
				1409	self._elem = [] # element stack
				1410	self._last = None # last element
				1411	self._tail = None # true if we're after an end tag
				1412	if element_factory is None:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1413	element_factory = Element
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1414	self._factory = element_factory
				1415
				1416	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1417	# Flushes the builder buffers, and returns the toplevel document
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1418	# element.
				1419	#
				1420	# @return An Element instance.
				1421	# @defreturn Element
				1422
				1423	def close(self):
				1424	assert len(self._elem) == 0, "missing end tags"
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1425	assert self._last is not None, "missing toplevel element"
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1426	return self._last
				1427
				1428	def _flush(self):
				1429	if self._data:
				1430	if self._last is not None:
Neal Norwitz	9d72bb4	2007-04-17 08:48:32 +0000	[diff] [blame]	1431	text = "".join(self._data)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1432	if self._tail:
				1433	assert self._last.tail is None, "internal error (tail)"
				1434	self._last.tail = text
				1435	else:
				1436	assert self._last.text is None, "internal error (text)"
				1437	self._last.text = text
				1438	self._data = []
				1439
				1440	##
				1441	# Adds text to the current element.
				1442	#
				1443	# @param data A string. This should be either an 8-bit string
				1444	# containing ASCII text, or a Unicode string.
				1445
				1446	def data(self, data):
				1447	self._data.append(data)
				1448
				1449	##
				1450	# Opens a new element.
				1451	#
				1452	# @param tag The element name.
				1453	# @param attrib A dictionary containing element attributes.
				1454	# @return The opened element.
				1455	# @defreturn Element
				1456
				1457	def start(self, tag, attrs):
				1458	self._flush()
				1459	self._last = elem = self._factory(tag, attrs)
				1460	if self._elem:
				1461	self._elem[-1].append(elem)
				1462	self._elem.append(elem)
				1463	self._tail = 0
				1464	return elem
				1465
				1466	##
				1467	# Closes the current element.
				1468	#
				1469	# @param tag The element name.
				1470	# @return The closed element.
				1471	# @defreturn Element
				1472
				1473	def end(self, tag):
				1474	self._flush()
				1475	self._last = self._elem.pop()
				1476	assert self._last.tag == tag,\
				1477	"end tag mismatch (expected %s, got %s)" % (
				1478	self._last.tag, tag)
				1479	self._tail = 1
				1480	return self._last
				1481
				1482	##
				1483	# Element structure builder for XML source data, based on the
				1484	# <b>expat</b> parser.
				1485	#
				1486	# @keyparam target Target object. If omitted, the builder uses an
				1487	# instance of the standard {@link #TreeBuilder} class.
				1488	# @keyparam html Predefine HTML entities. This flag is not supported
				1489	# by the current implementation.
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1490	# @keyparam encoding Optional encoding. If given, the value overrides
				1491	# the encoding specified in the XML file.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1492	# @see #ElementTree
				1493	# @see #TreeBuilder
				1494
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1495	class XMLParser:
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1496
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1497	def __init__(self, html=0, target=None, encoding=None):
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1498	try:
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1499	from xml.parsers import expat
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1500	except ImportError:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1501	try:
				1502	import pyexpat as expat
				1503	except ImportError:
				1504	raise ImportError(
				1505	"No module named expat; use SimpleXMLTreeBuilder instead"
				1506	)
				1507	parser = expat.ParserCreate(encoding, "}")
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1508	if target is None:
				1509	target = TreeBuilder()
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1510	# underscored names are provided for compatibility only
				1511	self.parser = self._parser = parser
				1512	self.target = self._target = target
				1513	self._error = expat.error
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1514	self._names = {} # name memo cache
				1515	# callbacks
				1516	parser.DefaultHandlerExpand = self._default
				1517	parser.StartElementHandler = self._start
				1518	parser.EndElementHandler = self._end
				1519	parser.CharacterDataHandler = self._data
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1520	# optional callbacks
				1521	parser.CommentHandler = self._comment
				1522	parser.ProcessingInstructionHandler = self._pi
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1523	# let expat do the buffering, if supported
				1524	try:
				1525	self._parser.buffer_text = 1
				1526	except AttributeError:
				1527	pass
				1528	# use new-style attribute handling, if supported
				1529	try:
				1530	self._parser.ordered_attributes = 1
				1531	self._parser.specified_attributes = 1
				1532	parser.StartElementHandler = self._start_list
				1533	except AttributeError:
				1534	pass
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1535	self._doctype = None
				1536	self.entity = {}
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1537	try:
				1538	self.version = "Expat %d.%d.%d" % expat.version_info
				1539	except AttributeError:
				1540	pass # unknown
				1541
				1542	def _raiseerror(self, value):
				1543	err = ParseError(value)
				1544	err.code = value.code
				1545	err.position = value.lineno, value.offset
				1546	raise err
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1547
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1548	def _fixname(self, key):
				1549	# expand qname, and convert name string to ascii, if possible
				1550	try:
				1551	name = self._names[key]
				1552	except KeyError:
				1553	name = key
				1554	if "}" in name:
				1555	name = "{" + name
Martin v. Löwis	f30bb0e	2007-07-28 11:40:46 +0000	[diff] [blame]	1556	self._names[key] = name
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1557	return name
				1558
				1559	def _start(self, tag, attrib_in):
				1560	fixname = self._fixname
				1561	tag = fixname(tag)
				1562	attrib = {}
				1563	for key, value in attrib_in.items():
Martin v. Löwis	f30bb0e	2007-07-28 11:40:46 +0000	[diff] [blame]	1564	attrib[fixname(key)] = value
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1565	return self.target.start(tag, attrib)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1566
				1567	def _start_list(self, tag, attrib_in):
				1568	fixname = self._fixname
				1569	tag = fixname(tag)
				1570	attrib = {}
				1571	if attrib_in:
				1572	for i in range(0, len(attrib_in), 2):
Martin v. Löwis	f30bb0e	2007-07-28 11:40:46 +0000	[diff] [blame]	1573	attrib[fixname(attrib_in[i])] = attrib_in[i+1]
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1574	return self.target.start(tag, attrib)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1575
				1576	def _data(self, text):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1577	return self.target.data(text)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1578
				1579	def _end(self, tag):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1580	return self.target.end(self._fixname(tag))
				1581
				1582	def _comment(self, data):
				1583	try:
				1584	comment = self.target.comment
				1585	except AttributeError:
				1586	pass
				1587	else:
				1588	return comment(data)
				1589
				1590	def _pi(self, target, data):
				1591	try:
				1592	pi = self.target.pi
				1593	except AttributeError:
				1594	pass
				1595	else:
				1596	return pi(target, data)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1597
				1598	def _default(self, text):
				1599	prefix = text[:1]
				1600	if prefix == "&":
				1601	# deal with undefined entities
				1602	try:
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1603	self.target.data(self.entity[text[1:-1]])
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1604	except KeyError:
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1605	from xml.parsers import expat
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1606	err = expat.error(
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1607	"undefined entity %s: line %d, column %d" %
				1608	(text, self._parser.ErrorLineNumber,
				1609	self._parser.ErrorColumnNumber)
				1610	)
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1611	err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
				1612	err.lineno = self._parser.ErrorLineNumber
				1613	err.offset = self._parser.ErrorColumnNumber
				1614	raise err
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1615	elif prefix == "<" and text[:9] == "<!DOCTYPE":
				1616	self._doctype = [] # inside a doctype declaration
				1617	elif self._doctype is not None:
				1618	# parse doctype contents
				1619	if prefix == ">":
				1620	self._doctype = None
				1621	return
Neal Norwitz	9d72bb4	2007-04-17 08:48:32 +0000	[diff] [blame]	1622	text = text.strip()
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1623	if not text:
				1624	return
				1625	self._doctype.append(text)
				1626	n = len(self._doctype)
				1627	if n > 2:
				1628	type = self._doctype[1]
				1629	if type == "PUBLIC" and n == 4:
				1630	name, type, pubid, system = self._doctype
				1631	elif type == "SYSTEM" and n == 3:
				1632	name, type, system = self._doctype
				1633	pubid = None
				1634	else:
				1635	return
				1636	if pubid:
				1637	pubid = pubid[1:-1]
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1638	if hasattr(self.target, "doctype"):
				1639	self.target.doctype(name, pubid, system[1:-1])
				1640	elif self.doctype is not self._XMLParser__doctype:
				1641	# warn about deprecated call
				1642	self._XMLParser__doctype(name, pubid, system[1:-1])
				1643	self.doctype(name, pubid, system[1:-1])
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1644	self._doctype = None
				1645
				1646	##
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1647	# (Deprecated) Handles a doctype declaration.
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1648	#
				1649	# @param name Doctype name.
				1650	# @param pubid Public identifier.
				1651	# @param system System identifier.
				1652
				1653	def doctype(self, name, pubid, system):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1654	"""This method of XMLParser is deprecated."""
				1655	warnings.warn(
				1656	"This method of XMLParser is deprecated. Define doctype() "
				1657	"method on the TreeBuilder target.",
				1658	DeprecationWarning,
				1659	)
				1660
				1661	# sentinel, if doctype is redefined in a subclass
				1662	__doctype = doctype
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1663
				1664	##
				1665	# Feeds data to the parser.
				1666	#
				1667	# @param data Encoded data.
				1668
				1669	def feed(self, data):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1670	try:
				1671	self._parser.Parse(data, 0)
				1672	except self._error as v:
				1673	self._raiseerror(v)
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1674
				1675	##
				1676	# Finishes feeding data to the parser.
				1677	#
				1678	# @return An element structure.
				1679	# @defreturn Element
				1680
				1681	def close(self):
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1682	try:
				1683	self._parser.Parse("", 1) # end of data
				1684	except self._error as v:
				1685	self._raiseerror(v)
				1686	tree = self.target.close()
				1687	del self.target, self._parser # get rid of circular references
Armin Rigo	9ed7306	2005-12-14 18:10:45 +0000	[diff] [blame]	1688	return tree
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1689
Florent Xicluna	a72a98f	2012-02-13 11:03:30 +0100	[diff] [blame]	1690
				1691	# Import the C accelerators
				1692	try:
				1693	# Element, SubElement, ParseError, TreeBuilder, XMLParser
				1694	from _elementtree import *
				1695	except ImportError:
				1696	pass
				1697	else:
				1698	# Overwrite 'ElementTree.parse' and 'iterparse' to use the C XMLParser
				1699
				1700	class ElementTree(ElementTree):
				1701	def parse(self, source, parser=None):
				1702	close_source = False
				1703	if not hasattr(source, 'read'):
				1704	source = open(source, 'rb')
				1705	close_source = True
				1706	try:
				1707	if parser is not None:
				1708	while True:
				1709	data = source.read(65536)
				1710	if not data:
				1711	break
				1712	parser.feed(data)
				1713	self._root = parser.close()
				1714	else:
				1715	parser = XMLParser()
				1716	self._root = parser._parse(source)
				1717	return self._root
				1718	finally:
				1719	if close_source:
				1720	source.close()
				1721
				1722	class iterparse:
				1723	root = None
				1724	def __init__(self, file, events=None):
				1725	self._close_file = False
				1726	if not hasattr(file, 'read'):
				1727	file = open(file, 'rb')
				1728	self._close_file = True
				1729	self._file = file
				1730	self._events = []
				1731	self._index = 0
				1732	self._error = None
				1733	self.root = self._root = None
				1734	b = TreeBuilder()
				1735	self._parser = XMLParser(b)
				1736	self._parser._setevents(self._events, events)
				1737
				1738	def __next__(self):
				1739	while True:
				1740	try:
				1741	item = self._events[self._index]
				1742	self._index += 1
				1743	return item
				1744	except IndexError:
				1745	pass
				1746	if self._error:
				1747	e = self._error
				1748	self._error = None
				1749	raise e
				1750	if self._parser is None:
				1751	self.root = self._root
				1752	if self._close_file:
				1753	self._file.close()
				1754	raise StopIteration
				1755	# load event buffer
				1756	del self._events[:]
				1757	self._index = 0
				1758	data = self._file.read(16384)
				1759	if data:
				1760	try:
				1761	self._parser.feed(data)
				1762	except SyntaxError as exc:
				1763	self._error = exc
				1764	else:
				1765	self._root = self._parser.close()
				1766	self._parser = None
				1767
				1768	def __iter__(self):
				1769	return self
				1770
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1771	# compatibility
Florent Xicluna	f15351d	2010-03-13 23:24:31 +0000	[diff] [blame]	1772	XMLTreeBuilder = XMLParser
				1773
				1774	# workaround circular import.
				1775	try:
				1776	from ElementC14N import _serialize_c14n
				1777	_serialize["c14n"] = _serialize_c14n
				1778	except ImportError:
				1779	pass