Blame - Lib/xml/etree/ElementTree.py - platform/external/python/cpython2

blob: bd452515979f62b4450ce2373108d61a692d66cc [file] [log] [blame]

Fredrik Lundh	075854f	2005-12-12 15:10:44 +0000	[diff] [blame]	1	#
				2	# ElementTree
				3	# $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $
				4	#
				5	# light-weight XML support for Python 1.5.2 and later.
				6	#
				7	# history:
				8	# 2001-10-20 fl created (from various sources)
				9	# 2001-11-01 fl return root from parse method
				10	# 2002-02-16 fl sort attributes in lexical order
				11	# 2002-04-06 fl TreeBuilder refactoring, added PythonDoc markup
				12	# 2002-05-01 fl finished TreeBuilder refactoring
				13	# 2002-07-14 fl added basic namespace support to ElementTree.write
				14	# 2002-07-25 fl added QName attribute support
				15	# 2002-10-20 fl fixed encoding in write
				16	# 2002-11-24 fl changed default encoding to ascii; fixed attribute encoding
				17	# 2002-11-27 fl accept file objects or file names for parse/write
				18	# 2002-12-04 fl moved XMLTreeBuilder back to this module
				19	# 2003-01-11 fl fixed entity encoding glitch for us-ascii
				20	# 2003-02-13 fl added XML literal factory
				21	# 2003-02-21 fl added ProcessingInstruction/PI factory
				22	# 2003-05-11 fl added tostring/fromstring helpers
				23	# 2003-05-26 fl added ElementPath support
				24	# 2003-07-05 fl added makeelement factory method
				25	# 2003-07-28 fl added more well-known namespace prefixes
				26	# 2003-08-15 fl fixed typo in ElementTree.findtext (Thomas Dartsch)
				27	# 2003-09-04 fl fall back on emulator if ElementPath is not installed
				28	# 2003-10-31 fl markup updates
				29	# 2003-11-15 fl fixed nested namespace bug
				30	# 2004-03-28 fl added XMLID helper
				31	# 2004-06-02 fl added default support to findtext
				32	# 2004-06-08 fl fixed encoding of non-ascii element/attribute names
				33	# 2004-08-23 fl take advantage of post-2.1 expat features
				34	# 2005-02-01 fl added iterparse implementation
				35	# 2005-03-02 fl fixed iterparse support for pre-2.2 versions
				36	#
				37	# Copyright (c) 1999-2005 by Fredrik Lundh. All rights reserved.
				38	#
				39	# fredrik@pythonware.com
				40	# http://www.pythonware.com
				41	#
				42	# --------------------------------------------------------------------
				43	# The ElementTree toolkit is
				44	#
				45	# Copyright (c) 1999-2005 by Fredrik Lundh
				46	#
				47	# By obtaining, using, and/or copying this software and/or its
				48	# associated documentation, you agree that you have read, understood,
				49	# and will comply with the following terms and conditions:
				50	#
				51	# Permission to use, copy, modify, and distribute this software and
				52	# its associated documentation for any purpose and without fee is
				53	# hereby granted, provided that the above copyright notice appears in
				54	# all copies, and that both that copyright notice and this permission
				55	# notice appear in supporting documentation, and that the name of
				56	# Secret Labs AB or the author not be used in advertising or publicity
				57	# pertaining to distribution of the software without specific, written
				58	# prior permission.
				59	#
				60	# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
				61	# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
				62	# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
				63	# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
				64	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
				65	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
				66	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
				67	# OF THIS SOFTWARE.
				68	# --------------------------------------------------------------------
				69
				70	__all__ = [
				71	# public symbols
				72	"Comment",
				73	"dump",
				74	"Element", "ElementTree",
				75	"fromstring",
				76	"iselement", "iterparse",
				77	"parse",
				78	"PI", "ProcessingInstruction",
				79	"QName",
				80	"SubElement",
				81	"tostring",
				82	"TreeBuilder",
				83	"VERSION", "XML",
				84	"XMLTreeBuilder",
				85	]
				86
				87	##
				88	# The <b>Element</b> type is a flexible container object, designed to
				89	# store hierarchical data structures in memory. The type can be
				90	# described as a cross between a list and a dictionary.
				91	# <p>
				92	# Each element has a number of properties associated with it:
				93	# <ul>
				94	# <li>a <i>tag</i>. This is a string identifying what kind of data
				95	# this element represents (the element type, in other words).</li>
				96	# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
				97	# <li>a <i>text</i> string.</li>
				98	# <li>an optional <i>tail</i> string.</li>
				99	# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
				100	# </ul>
				101	#
				102	# To create an element instance, use the {@link #Element} or {@link
				103	# #SubElement} factory functions.
				104	# <p>
				105	# The {@link #ElementTree} class can be used to wrap an element
				106	# structure, and convert it from and to XML.
				107	##
				108
				109	import string, sys, re
				110
				111	class _SimpleElementPath:
				112	# emulate pre-1.2 find/findtext/findall behaviour
				113	def find(self, element, tag):
				114	for elem in element:
				115	if elem.tag == tag:
				116	return elem
				117	return None
				118	def findtext(self, element, tag, default=None):
				119	for elem in element:
				120	if elem.tag == tag:
				121	return elem.text or ""
				122	return default
				123	def findall(self, element, tag):
				124	if tag[:3] == ".//":
				125	return element.getiterator(tag[3:])
				126	result = []
				127	for elem in element:
				128	if elem.tag == tag:
				129	result.append(elem)
				130	return result
				131
				132	try:
				133	import ElementPath
				134	except ImportError:
				135	# FIXME: issue warning in this case?
				136	ElementPath = _SimpleElementPath()
				137
				138	# TODO: add support for custom namespace resolvers/default namespaces
				139	# TODO: add improved support for incremental parsing
				140
				141	VERSION = "1.2.6"
				142
				143	##
				144	# Internal element class. This class defines the Element interface,
				145	# and provides a reference implementation of this interface.
				146	# <p>
				147	# You should not create instances of this class directly. Use the
				148	# appropriate factory functions instead, such as {@link #Element}
				149	# and {@link #SubElement}.
				150	#
				151	# @see Element
				152	# @see SubElement
				153	# @see Comment
				154	# @see ProcessingInstruction
				155
				156	class _ElementInterface:
				157	# <tag attrib>text<child/>...</tag>tail
				158
				159	##
				160	# (Attribute) Element tag.
				161
				162	tag = None
				163
				164	##
				165	# (Attribute) Element attribute dictionary. Where possible, use
				166	# {@link #_ElementInterface.get},
				167	# {@link #_ElementInterface.set},
				168	# {@link #_ElementInterface.keys}, and
				169	# {@link #_ElementInterface.items} to access
				170	# element attributes.
				171
				172	attrib = None
				173
				174	##
				175	# (Attribute) Text before first subelement. This is either a
				176	# string or the value None, if there was no text.
				177
				178	text = None
				179
				180	##
				181	# (Attribute) Text after this element's end tag, but before the
				182	# next sibling element's start tag. This is either a string or
				183	# the value None, if there was no text.
				184
				185	tail = None # text after end tag, if any
				186
				187	def __init__(self, tag, attrib):
				188	self.tag = tag
				189	self.attrib = attrib
				190	self._children = []
				191
				192	def __repr__(self):
				193	return "<Element %s at %x>" % (self.tag, id(self))
				194
				195	##
				196	# Creates a new element object of the same type as this element.
				197	#
				198	# @param tag Element tag.
				199	# @param attrib Element attributes, given as a dictionary.
				200	# @return A new element instance.
				201
				202	def makeelement(self, tag, attrib):
				203	return Element(tag, attrib)
				204
				205	##
				206	# Returns the number of subelements.
				207	#
				208	# @return The number of subelements.
				209
				210	def __len__(self):
				211	return len(self._children)
				212
				213	##
				214	# Returns the given subelement.
				215	#
				216	# @param index What subelement to return.
				217	# @return The given subelement.
				218	# @exception IndexError If the given element does not exist.
				219
				220	def __getitem__(self, index):
				221	return self._children[index]
				222
				223	##
				224	# Replaces the given subelement.
				225	#
				226	# @param index What subelement to replace.
				227	# @param element The new element value.
				228	# @exception IndexError If the given element does not exist.
				229	# @exception AssertionError If element is not a valid object.
				230
				231	def __setitem__(self, index, element):
				232	assert iselement(element)
				233	self._children[index] = element
				234
				235	##
				236	# Deletes the given subelement.
				237	#
				238	# @param index What subelement to delete.
				239	# @exception IndexError If the given element does not exist.
				240
				241	def __delitem__(self, index):
				242	del self._children[index]
				243
				244	##
				245	# Returns a list containing subelements in the given range.
				246	#
				247	# @param start The first subelement to return.
				248	# @param stop The first subelement that shouldn't be returned.
				249	# @return A sequence object containing subelements.
				250
				251	def __getslice__(self, start, stop):
				252	return self._children[start:stop]
				253
				254	##
				255	# Replaces a number of subelements with elements from a sequence.
				256	#
				257	# @param start The first subelement to replace.
				258	# @param stop The first subelement that shouldn't be replaced.
				259	# @param elements A sequence object with zero or more elements.
				260	# @exception AssertionError If a sequence member is not a valid object.
				261
				262	def __setslice__(self, start, stop, elements):
				263	for element in elements:
				264	assert iselement(element)
				265	self._children[start:stop] = list(elements)
				266
				267	##
				268	# Deletes a number of subelements.
				269	#
				270	# @param start The first subelement to delete.
				271	# @param stop The first subelement to leave in there.
				272
				273	def __delslice__(self, start, stop):
				274	del self._children[start:stop]
				275
				276	##
				277	# Adds a subelement to the end of this element.
				278	#
				279	# @param element The element to add.
				280	# @exception AssertionError If a sequence member is not a valid object.
				281
				282	def append(self, element):
				283	assert iselement(element)
				284	self._children.append(element)
				285
				286	##
				287	# Inserts a subelement at the given position in this element.
				288	#
				289	# @param index Where to insert the new subelement.
				290	# @exception AssertionError If the element is not a valid object.
				291
				292	def insert(self, index, element):
				293	assert iselement(element)
				294	self._children.insert(index, element)
				295
				296	##
				297	# Removes a matching subelement. Unlike the <b>find</b> methods,
				298	# this method compares elements based on identity, not on tag
				299	# value or contents.
				300	#
				301	# @param element What element to remove.
				302	# @exception ValueError If a matching element could not be found.
				303	# @exception AssertionError If the element is not a valid object.
				304
				305	def remove(self, element):
				306	assert iselement(element)
				307	self._children.remove(element)
				308
				309	##
				310	# Returns all subelements. The elements are returned in document
				311	# order.
				312	#
				313	# @return A list of subelements.
				314	# @defreturn list of Element instances
				315
				316	def getchildren(self):
				317	return self._children
				318
				319	##
				320	# Finds the first matching subelement, by tag name or path.
				321	#
				322	# @param path What element to look for.
				323	# @return The first matching element, or None if no element was found.
				324	# @defreturn Element or None
				325
				326	def find(self, path):
				327	return ElementPath.find(self, path)
				328
				329	##
				330	# Finds text for the first matching subelement, by tag name or path.
				331	#
				332	# @param path What element to look for.
				333	# @param default What to return if the element was not found.
				334	# @return The text content of the first matching element, or the
				335	# default value no element was found. Note that if the element
				336	# has is found, but has no text content, this method returns an
				337	# empty string.
				338	# @defreturn string
				339
				340	def findtext(self, path, default=None):
				341	return ElementPath.findtext(self, path, default)
				342
				343	##
				344	# Finds all matching subelements, by tag name or path.
				345	#
				346	# @param path What element to look for.
				347	# @return A list or iterator containing all matching elements,
				348	# in document order.
				349	# @defreturn list of Element instances
				350
				351	def findall(self, path):
				352	return ElementPath.findall(self, path)
				353
				354	##
				355	# Resets an element. This function removes all subelements, clears
				356	# all attributes, and sets the text and tail attributes to None.
				357
				358	def clear(self):
				359	self.attrib.clear()
				360	self._children = []
				361	self.text = self.tail = None
				362
				363	##
				364	# Gets an element attribute.
				365	#
				366	# @param key What attribute to look for.
				367	# @param default What to return if the attribute was not found.
				368	# @return The attribute value, or the default value, if the
				369	# attribute was not found.
				370	# @defreturn string or None
				371
				372	def get(self, key, default=None):
				373	return self.attrib.get(key, default)
				374
				375	##
				376	# Sets an element attribute.
				377	#
				378	# @param key What attribute to set.
				379	# @param value The attribute value.
				380
				381	def set(self, key, value):
				382	self.attrib[key] = value
				383
				384	##
				385	# Gets a list of attribute names. The names are returned in an
				386	# arbitrary order (just like for an ordinary Python dictionary).
				387	#
				388	# @return A list of element attribute names.
				389	# @defreturn list of strings
				390
				391	def keys(self):
				392	return self.attrib.keys()
				393
				394	##
				395	# Gets element attributes, as a sequence. The attributes are
				396	# returned in an arbitrary order.
				397	#
				398	# @return A list of (name, value) tuples for all attributes.
				399	# @defreturn list of (string, string) tuples
				400
				401	def items(self):
				402	return self.attrib.items()
				403
				404	##
				405	# Creates a tree iterator. The iterator loops over this element
				406	# and all subelements, in document order, and returns all elements
				407	# with a matching tag.
				408	# <p>
				409	# If the tree structure is modified during iteration, the result
				410	# is undefined.
				411	#
				412	# @param tag What tags to look for (default is to return all elements).
				413	# @return A list or iterator containing all the matching elements.
				414	# @defreturn list or iterator
				415
				416	def getiterator(self, tag=None):
				417	nodes = []
				418	if tag == "*":
				419	tag = None
				420	if tag is None or self.tag == tag:
				421	nodes.append(self)
				422	for node in self._children:
				423	nodes.extend(node.getiterator(tag))
				424	return nodes
				425
				426	# compatibility
				427	_Element = _ElementInterface
				428
				429	##
				430	# Element factory. This function returns an object implementing the
				431	# standard Element interface. The exact class or type of that object
				432	# is implementation dependent, but it will always be compatible with
				433	# the {@link #_ElementInterface} class in this module.
				434	# <p>
				435	# The element name, attribute names, and attribute values can be
				436	# either 8-bit ASCII strings or Unicode strings.
				437	#
				438	# @param tag The element name.
				439	# @param attrib An optional dictionary, containing element attributes.
				440	# @param **extra Additional attributes, given as keyword arguments.
				441	# @return An element instance.
				442	# @defreturn Element
				443
				444	def Element(tag, attrib={}, **extra):
				445	attrib = attrib.copy()
				446	attrib.update(extra)
				447	return _ElementInterface(tag, attrib)
				448
				449	##
				450	# Subelement factory. This function creates an element instance, and
				451	# appends it to an existing element.
				452	# <p>
				453	# The element name, attribute names, and attribute values can be
				454	# either 8-bit ASCII strings or Unicode strings.
				455	#
				456	# @param parent The parent element.
				457	# @param tag The subelement name.
				458	# @param attrib An optional dictionary, containing element attributes.
				459	# @param **extra Additional attributes, given as keyword arguments.
				460	# @return An element instance.
				461	# @defreturn Element
				462
				463	def SubElement(parent, tag, attrib={}, **extra):
				464	attrib = attrib.copy()
				465	attrib.update(extra)
				466	element = parent.makeelement(tag, attrib)
				467	parent.append(element)
				468	return element
				469
				470	##
				471	# Comment element factory. This factory function creates a special
				472	# element that will be serialized as an XML comment.
				473	# <p>
				474	# The comment string can be either an 8-bit ASCII string or a Unicode
				475	# string.
				476	#
				477	# @param text A string containing the comment string.
				478	# @return An element instance, representing a comment.
				479	# @defreturn Element
				480
				481	def Comment(text=None):
				482	element = Element(Comment)
				483	element.text = text
				484	return element
				485
				486	##
				487	# PI element factory. This factory function creates a special element
				488	# that will be serialized as an XML processing instruction.
				489	#
				490	# @param target A string containing the PI target.
				491	# @param text A string containing the PI contents, if any.
				492	# @return An element instance, representing a PI.
				493	# @defreturn Element
				494
				495	def ProcessingInstruction(target, text=None):
				496	element = Element(ProcessingInstruction)
				497	element.text = target
				498	if text:
				499	element.text = element.text + " " + text
				500	return element
				501
				502	PI = ProcessingInstruction
				503
				504	##
				505	# QName wrapper. This can be used to wrap a QName attribute value, in
				506	# order to get proper namespace handling on output.
				507	#
				508	# @param text A string containing the QName value, in the form {uri}local,
				509	# or, if the tag argument is given, the URI part of a QName.
				510	# @param tag Optional tag. If given, the first argument is interpreted as
				511	# an URI, and this argument is interpreted as a local name.
				512	# @return An opaque object, representing the QName.
				513
				514	class QName:
				515	def __init__(self, text_or_uri, tag=None):
				516	if tag:
				517	text_or_uri = "{%s}%s" % (text_or_uri, tag)
				518	self.text = text_or_uri
				519	def __str__(self):
				520	return self.text
				521	def __hash__(self):
				522	return hash(self.text)
				523	def __cmp__(self, other):
				524	if isinstance(other, QName):
				525	return cmp(self.text, other.text)
				526	return cmp(self.text, other)
				527
				528	##
				529	# ElementTree wrapper class. This class represents an entire element
				530	# hierarchy, and adds some extra support for serialization to and from
				531	# standard XML.
				532	#
				533	# @param element Optional root element.
				534	# @keyparam file Optional file handle or name. If given, the
				535	# tree is initialized with the contents of this XML file.
				536
				537	class ElementTree:
				538
				539	def __init__(self, element=None, file=None):
				540	assert element is None or iselement(element)
				541	self._root = element # first node
				542	if file:
				543	self.parse(file)
				544
				545	##
				546	# Gets the root element for this tree.
				547	#
				548	# @return An element instance.
				549	# @defreturn Element
				550
				551	def getroot(self):
				552	return self._root
				553
				554	##
				555	# Replaces the root element for this tree. This discards the
				556	# current contents of the tree, and replaces it with the given
				557	# element. Use with care.
				558	#
				559	# @param element An element instance.
				560
				561	def _setroot(self, element):
				562	assert iselement(element)
				563	self._root = element
				564
				565	##
				566	# Loads an external XML document into this element tree.
				567	#
				568	# @param source A file name or file object.
				569	# @param parser An optional parser instance. If not given, the
				570	# standard {@link XMLTreeBuilder} parser is used.
				571	# @return The document root element.
				572	# @defreturn Element
				573
				574	def parse(self, source, parser=None):
				575	if not hasattr(source, "read"):
				576	source = open(source, "rb")
				577	if not parser:
				578	parser = XMLTreeBuilder()
				579	while 1:
				580	data = source.read(32768)
				581	if not data:
				582	break
				583	parser.feed(data)
				584	self._root = parser.close()
				585	return self._root
				586
				587	##
				588	# Creates a tree iterator for the root element. The iterator loops
				589	# over all elements in this tree, in document order.
				590	#
				591	# @param tag What tags to look for (default is to return all elements)
				592	# @return An iterator.
				593	# @defreturn iterator
				594
				595	def getiterator(self, tag=None):
				596	assert self._root is not None
				597	return self._root.getiterator(tag)
				598
				599	##
				600	# Finds the first toplevel element with given tag.
				601	# Same as getroot().find(path).
				602	#
				603	# @param path What element to look for.
				604	# @return The first matching element, or None if no element was found.
				605	# @defreturn Element or None
				606
				607	def find(self, path):
				608	assert self._root is not None
				609	if path[:1] == "/":
				610	path = "." + path
				611	return self._root.find(path)
				612
				613	##
				614	# Finds the element text for the first toplevel element with given
				615	# tag. Same as getroot().findtext(path).
				616	#
				617	# @param path What toplevel element to look for.
				618	# @param default What to return if the element was not found.
				619	# @return The text content of the first matching element, or the
				620	# default value no element was found. Note that if the element
				621	# has is found, but has no text content, this method returns an
				622	# empty string.
				623	# @defreturn string
				624
				625	def findtext(self, path, default=None):
				626	assert self._root is not None
				627	if path[:1] == "/":
				628	path = "." + path
				629	return self._root.findtext(path, default)
				630
				631	##
				632	# Finds all toplevel elements with the given tag.
				633	# Same as getroot().findall(path).
				634	#
				635	# @param path What element to look for.
				636	# @return A list or iterator containing all matching elements,
				637	# in document order.
				638	# @defreturn list of Element instances
				639
				640	def findall(self, path):
				641	assert self._root is not None
				642	if path[:1] == "/":
				643	path = "." + path
				644	return self._root.findall(path)
				645
				646	##
				647	# Writes the element tree to a file, as XML.
				648	#
				649	# @param file A file name, or a file object opened for writing.
				650	# @param encoding Optional output encoding (default is US-ASCII).
				651
				652	def write(self, file, encoding="us-ascii"):
				653	assert self._root is not None
				654	if not hasattr(file, "write"):
				655	file = open(file, "wb")
				656	if not encoding:
				657	encoding = "us-ascii"
				658	elif encoding != "utf-8" and encoding != "us-ascii":
				659	file.write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
				660	self._write(file, self._root, encoding, {})
				661
				662	def _write(self, file, node, encoding, namespaces):
				663	# write XML to file
				664	tag = node.tag
				665	if tag is Comment:
				666	file.write("<!-- %s -->" % _escape_cdata(node.text, encoding))
				667	elif tag is ProcessingInstruction:
				668	file.write("<?%s?>" % _escape_cdata(node.text, encoding))
				669	else:
				670	items = node.items()
				671	xmlns_items = [] # new namespaces in this scope
				672	try:
				673	if isinstance(tag, QName) or tag[:1] == "{":
				674	tag, xmlns = fixtag(tag, namespaces)
				675	if xmlns: xmlns_items.append(xmlns)
				676	except TypeError:
				677	_raise_serialization_error(tag)
				678	file.write("<" + _encode(tag, encoding))
				679	if items or xmlns_items:
				680	items.sort() # lexical order
				681	for k, v in items:
				682	try:
				683	if isinstance(k, QName) or k[:1] == "{":
				684	k, xmlns = fixtag(k, namespaces)
				685	if xmlns: xmlns_items.append(xmlns)
				686	except TypeError:
				687	_raise_serialization_error(k)
				688	try:
				689	if isinstance(v, QName):
				690	v, xmlns = fixtag(v, namespaces)
				691	if xmlns: xmlns_items.append(xmlns)
				692	except TypeError:
				693	_raise_serialization_error(v)
				694	file.write(" %s=\"%s\"" % (_encode(k, encoding),
				695	_escape_attrib(v, encoding)))
				696	for k, v in xmlns_items:
				697	file.write(" %s=\"%s\"" % (_encode(k, encoding),
				698	_escape_attrib(v, encoding)))
				699	if node.text or len(node):
				700	file.write(">")
				701	if node.text:
				702	file.write(_escape_cdata(node.text, encoding))
				703	for n in node:
				704	self._write(file, n, encoding, namespaces)
				705	file.write("</" + _encode(tag, encoding) + ">")
				706	else:
				707	file.write(" />")
				708	for k, v in xmlns_items:
				709	del namespaces[v]
				710	if node.tail:
				711	file.write(_escape_cdata(node.tail, encoding))
				712
				713	# --------------------------------------------------------------------
				714	# helpers
				715
				716	##
				717	# Checks if an object appears to be a valid element object.
				718	#
				719	# @param An element instance.
				720	# @return A true value if this is an element object.
				721	# @defreturn flag
				722
				723	def iselement(element):
				724	# FIXME: not sure about this; might be a better idea to look
				725	# for tag/attrib/text attributes
				726	return isinstance(element, _ElementInterface) or hasattr(element, "tag")
				727
				728	##
				729	# Writes an element tree or element structure to sys.stdout. This
				730	# function should be used for debugging only.
				731	# <p>
				732	# The exact output format is implementation dependent. In this
				733	# version, it's written as an ordinary XML file.
				734	#
				735	# @param elem An element tree or an individual element.
				736
				737	def dump(elem):
				738	# debugging
				739	if not isinstance(elem, ElementTree):
				740	elem = ElementTree(elem)
				741	elem.write(sys.stdout)
				742	tail = elem.getroot().tail
				743	if not tail or tail[-1] != "\n":
				744	sys.stdout.write("\n")
				745
				746	def _encode(s, encoding):
				747	try:
				748	return s.encode(encoding)
				749	except AttributeError:
				750	return s # 1.5.2: assume the string uses the right encoding
				751
				752	if sys.version[:3] == "1.5":
				753	_escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2
				754	else:
				755	_escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"'))
				756
				757	_escape_map = {
				758	"&": "&",
				759	"<": "<",
				760	">": ">",
				761	'"': """,
				762	}
				763
				764	_namespace_map = {
				765	# "well-known" namespace prefixes
				766	"http://www.w3.org/XML/1998/namespace": "xml",
				767	"http://www.w3.org/1999/xhtml": "html",
				768	"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
				769	"http://schemas.xmlsoap.org/wsdl/": "wsdl",
				770	}
				771
				772	def _raise_serialization_error(text):
				773	raise TypeError(
				774	"cannot serialize %r (type %s)" % (text, type(text).__name__)
				775	)
				776
				777	def _encode_entity(text, pattern=_escape):
				778	# map reserved and non-ascii characters to numerical entities
				779	def escape_entities(m, map=_escape_map):
				780	out = []
				781	append = out.append
				782	for char in m.group():
				783	text = map.get(char)
				784	if text is None:
				785	text = "&#%d;" % ord(char)
				786	append(text)
				787	return string.join(out, "")
				788	try:
				789	return _encode(pattern.sub(escape_entities, text), "ascii")
				790	except TypeError:
				791	_raise_serialization_error(text)
				792
				793	#
				794	# the following functions assume an ascii-compatible encoding
				795	# (or "utf-16")
				796
				797	def _escape_cdata(text, encoding=None, replace=string.replace):
				798	# escape character data
				799	try:
				800	if encoding:
				801	try:
				802	text = _encode(text, encoding)
				803	except UnicodeError:
				804	return _encode_entity(text)
				805	text = replace(text, "&", "&")
				806	text = replace(text, "<", "<")
				807	text = replace(text, ">", ">")
				808	return text
				809	except (TypeError, AttributeError):
				810	_raise_serialization_error(text)
				811
				812	def _escape_attrib(text, encoding=None, replace=string.replace):
				813	# escape attribute value
				814	try:
				815	if encoding:
				816	try:
				817	text = _encode(text, encoding)
				818	except UnicodeError:
				819	return _encode_entity(text)
				820	text = replace(text, "&", "&")
				821	text = replace(text, "'", "'") # FIXME: overkill
				822	text = replace(text, "\"", """)
				823	text = replace(text, "<", "<")
				824	text = replace(text, ">", ">")
				825	return text
				826	except (TypeError, AttributeError):
				827	_raise_serialization_error(text)
				828
				829	def fixtag(tag, namespaces):
				830	# given a decorated tag (of the form {uri}tag), return prefixed
				831	# tag and namespace declaration, if any
				832	if isinstance(tag, QName):
				833	tag = tag.text
				834	namespace_uri, tag = string.split(tag[1:], "}", 1)
				835	prefix = namespaces.get(namespace_uri)
				836	if prefix is None:
				837	prefix = _namespace_map.get(namespace_uri)
				838	if prefix is None:
				839	prefix = "ns%d" % len(namespaces)
				840	namespaces[namespace_uri] = prefix
				841	if prefix == "xml":
				842	xmlns = None
				843	else:
				844	xmlns = ("xmlns:%s" % prefix, namespace_uri)
				845	else:
				846	xmlns = None
				847	return "%s:%s" % (prefix, tag), xmlns
				848
				849	##
				850	# Parses an XML document into an element tree.
				851	#
				852	# @param source A filename or file object containing XML data.
				853	# @param parser An optional parser instance. If not given, the
				854	# standard {@link XMLTreeBuilder} parser is used.
				855	# @return An ElementTree instance
				856
				857	def parse(source, parser=None):
				858	tree = ElementTree()
				859	tree.parse(source, parser)
				860	return tree
				861
				862	##
				863	# Parses an XML document into an element tree incrementally, and reports
				864	# what's going on to the user.
				865	#
				866	# @param source A filename or file object containing XML data.
				867	# @param events A list of events to report back. If omitted, only "end"
				868	# events are reported.
				869	# @return A (event, elem) iterator.
				870
				871	class iterparse:
				872
				873	def __init__(self, source, events=None):
				874	if not hasattr(source, "read"):
				875	source = open(source, "rb")
				876	self._file = source
				877	self._events = []
				878	self._index = 0
				879	self.root = self._root = None
				880	self._parser = XMLTreeBuilder()
				881	# wire up the parser for event reporting
				882	parser = self._parser._parser
				883	append = self._events.append
				884	if events is None:
				885	events = ["end"]
				886	for event in events:
				887	if event == "start":
				888	try:
				889	parser.ordered_attributes = 1
				890	parser.specified_attributes = 1
				891	def handler(tag, attrib_in, event=event, append=append,
				892	start=self._parser._start_list):
				893	append((event, start(tag, attrib_in)))
				894	parser.StartElementHandler = handler
				895	except AttributeError:
				896	def handler(tag, attrib_in, event=event, append=append,
				897	start=self._parser._start):
				898	append((event, start(tag, attrib_in)))
				899	parser.StartElementHandler = handler
				900	elif event == "end":
				901	def handler(tag, event=event, append=append,
				902	end=self._parser._end):
				903	append((event, end(tag)))
				904	parser.EndElementHandler = handler
				905	elif event == "start-ns":
				906	def handler(prefix, uri, event=event, append=append):
				907	try:
				908	uri = _encode(uri, "ascii")
				909	except UnicodeError:
				910	pass
				911	append((event, (prefix or "", uri)))
				912	parser.StartNamespaceDeclHandler = handler
				913	elif event == "end-ns":
				914	def handler(prefix, event=event, append=append):
				915	append((event, None))
				916	parser.EndNamespaceDeclHandler = handler
				917
				918	def next(self):
				919	while 1:
				920	try:
				921	item = self._events[self._index]
				922	except IndexError:
				923	if self._parser is None:
				924	self.root = self._root
				925	try:
				926	raise StopIteration
				927	except NameError:
				928	raise IndexError
				929	# load event buffer
				930	del self._events[:]
				931	self._index = 0
				932	data = self._file.read(16384)
				933	if data:
				934	self._parser.feed(data)
				935	else:
				936	self._root = self._parser.close()
				937	self._parser = None
				938	else:
				939	self._index = self._index + 1
				940	return item
				941
				942	try:
				943	iter
				944	def __iter__(self):
				945	return self
				946	except NameError:
				947	def __getitem__(self, index):
				948	return self.next()
				949
				950	##
				951	# Parses an XML document from a string constant. This function can
				952	# be used to embed "XML literals" in Python code.
				953	#
				954	# @param source A string containing XML data.
				955	# @return An Element instance.
				956	# @defreturn Element
				957
				958	def XML(text):
				959	parser = XMLTreeBuilder()
				960	parser.feed(text)
				961	return parser.close()
				962
				963	##
				964	# Parses an XML document from a string constant, and also returns
				965	# a dictionary which maps from element id:s to elements.
				966	#
				967	# @param source A string containing XML data.
				968	# @return A tuple containing an Element instance and a dictionary.
				969	# @defreturn (Element, dictionary)
				970
				971	def XMLID(text):
				972	parser = XMLTreeBuilder()
				973	parser.feed(text)
				974	tree = parser.close()
				975	ids = {}
				976	for elem in tree.getiterator():
				977	id = elem.get("id")
				978	if id:
				979	ids[id] = elem
				980	return tree, ids
				981
				982	##
				983	# Parses an XML document from a string constant. Same as {@link #XML}.
				984	#
				985	# @def fromstring(text)
				986	# @param source A string containing XML data.
				987	# @return An Element instance.
				988	# @defreturn Element
				989
				990	fromstring = XML
				991
				992	##
				993	# Generates a string representation of an XML element, including all
				994	# subelements.
				995	#
				996	# @param element An Element instance.
				997	# @return An encoded string containing the XML data.
				998	# @defreturn string
				999
				1000	def tostring(element, encoding=None):
				1001	class dummy:
				1002	pass
				1003	data = []
				1004	file = dummy()
				1005	file.write = data.append
				1006	ElementTree(element).write(file, encoding)
				1007	return string.join(data, "")
				1008
				1009	##
				1010	# Generic element structure builder. This builder converts a sequence
				1011	# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
				1012	# #TreeBuilder.end} method calls to a well-formed element structure.
				1013	# <p>
				1014	# You can use this class to build an element structure using a custom XML
				1015	# parser, or a parser for some other XML-like format.
				1016	#
				1017	# @param element_factory Optional element factory. This factory
				1018	# is called to create new Element instances, as necessary.
				1019
				1020	class TreeBuilder:
				1021
				1022	def __init__(self, element_factory=None):
				1023	self._data = [] # data collector
				1024	self._elem = [] # element stack
				1025	self._last = None # last element
				1026	self._tail = None # true if we're after an end tag
				1027	if element_factory is None:
				1028	element_factory = _ElementInterface
				1029	self._factory = element_factory
				1030
				1031	##
				1032	# Flushes the parser buffers, and returns the toplevel documen
				1033	# element.
				1034	#
				1035	# @return An Element instance.
				1036	# @defreturn Element
				1037
				1038	def close(self):
				1039	assert len(self._elem) == 0, "missing end tags"
				1040	assert self._last != None, "missing toplevel element"
				1041	return self._last
				1042
				1043	def _flush(self):
				1044	if self._data:
				1045	if self._last is not None:
				1046	text = string.join(self._data, "")
				1047	if self._tail:
				1048	assert self._last.tail is None, "internal error (tail)"
				1049	self._last.tail = text
				1050	else:
				1051	assert self._last.text is None, "internal error (text)"
				1052	self._last.text = text
				1053	self._data = []
				1054
				1055	##
				1056	# Adds text to the current element.
				1057	#
				1058	# @param data A string. This should be either an 8-bit string
				1059	# containing ASCII text, or a Unicode string.
				1060
				1061	def data(self, data):
				1062	self._data.append(data)
				1063
				1064	##
				1065	# Opens a new element.
				1066	#
				1067	# @param tag The element name.
				1068	# @param attrib A dictionary containing element attributes.
				1069	# @return The opened element.
				1070	# @defreturn Element
				1071
				1072	def start(self, tag, attrs):
				1073	self._flush()
				1074	self._last = elem = self._factory(tag, attrs)
				1075	if self._elem:
				1076	self._elem[-1].append(elem)
				1077	self._elem.append(elem)
				1078	self._tail = 0
				1079	return elem
				1080
				1081	##
				1082	# Closes the current element.
				1083	#
				1084	# @param tag The element name.
				1085	# @return The closed element.
				1086	# @defreturn Element
				1087
				1088	def end(self, tag):
				1089	self._flush()
				1090	self._last = self._elem.pop()
				1091	assert self._last.tag == tag,\
				1092	"end tag mismatch (expected %s, got %s)" % (
				1093	self._last.tag, tag)
				1094	self._tail = 1
				1095	return self._last
				1096
				1097	##
				1098	# Element structure builder for XML source data, based on the
				1099	# <b>expat</b> parser.
				1100	#
				1101	# @keyparam target Target object. If omitted, the builder uses an
				1102	# instance of the standard {@link #TreeBuilder} class.
				1103	# @keyparam html Predefine HTML entities. This flag is not supported
				1104	# by the current implementation.
				1105	# @see #ElementTree
				1106	# @see #TreeBuilder
				1107
				1108	class XMLTreeBuilder:
				1109
				1110	def __init__(self, html=0, target=None):
				1111	try:
				1112	from xml.parsers import expat
				1113	except ImportError:
				1114	raise ImportError(
				1115	"No module named expat; use SimpleXMLTreeBuilder instead"
				1116	)
				1117	self._parser = parser = expat.ParserCreate(None, "}")
				1118	if target is None:
				1119	target = TreeBuilder()
				1120	self._target = target
				1121	self._names = {} # name memo cache
				1122	# callbacks
				1123	parser.DefaultHandlerExpand = self._default
				1124	parser.StartElementHandler = self._start
				1125	parser.EndElementHandler = self._end
				1126	parser.CharacterDataHandler = self._data
				1127	# let expat do the buffering, if supported
				1128	try:
				1129	self._parser.buffer_text = 1
				1130	except AttributeError:
				1131	pass
				1132	# use new-style attribute handling, if supported
				1133	try:
				1134	self._parser.ordered_attributes = 1
				1135	self._parser.specified_attributes = 1
				1136	parser.StartElementHandler = self._start_list
				1137	except AttributeError:
				1138	pass
				1139	encoding = None
				1140	if not parser.returns_unicode:
				1141	encoding = "utf-8"
				1142	# target.xml(encoding, None)
				1143	self._doctype = None
				1144	self.entity = {}
				1145
				1146	def _fixtext(self, text):
				1147	# convert text string to ascii, if possible
				1148	try:
				1149	return _encode(text, "ascii")
				1150	except UnicodeError:
				1151	return text
				1152
				1153	def _fixname(self, key):
				1154	# expand qname, and convert name string to ascii, if possible
				1155	try:
				1156	name = self._names[key]
				1157	except KeyError:
				1158	name = key
				1159	if "}" in name:
				1160	name = "{" + name
				1161	self._names[key] = name = self._fixtext(name)
				1162	return name
				1163
				1164	def _start(self, tag, attrib_in):
				1165	fixname = self._fixname
				1166	tag = fixname(tag)
				1167	attrib = {}
				1168	for key, value in attrib_in.items():
				1169	attrib[fixname(key)] = self._fixtext(value)
				1170	return self._target.start(tag, attrib)
				1171
				1172	def _start_list(self, tag, attrib_in):
				1173	fixname = self._fixname
				1174	tag = fixname(tag)
				1175	attrib = {}
				1176	if attrib_in:
				1177	for i in range(0, len(attrib_in), 2):
				1178	attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
				1179	return self._target.start(tag, attrib)
				1180
				1181	def _data(self, text):
				1182	return self._target.data(self._fixtext(text))
				1183
				1184	def _end(self, tag):
				1185	return self._target.end(self._fixname(tag))
				1186
				1187	def _default(self, text):
				1188	prefix = text[:1]
				1189	if prefix == "&":
				1190	# deal with undefined entities
				1191	try:
				1192	self._target.data(self.entity[text[1:-1]])
				1193	except KeyError:
				1194	from xml.parsers import expat
				1195	raise expat.error(
				1196	"undefined entity %s: line %d, column %d" %
				1197	(text, self._parser.ErrorLineNumber,
				1198	self._parser.ErrorColumnNumber)
				1199	)
				1200	elif prefix == "<" and text[:9] == "<!DOCTYPE":
				1201	self._doctype = [] # inside a doctype declaration
				1202	elif self._doctype is not None:
				1203	# parse doctype contents
				1204	if prefix == ">":
				1205	self._doctype = None
				1206	return
				1207	text = string.strip(text)
				1208	if not text:
				1209	return
				1210	self._doctype.append(text)
				1211	n = len(self._doctype)
				1212	if n > 2:
				1213	type = self._doctype[1]
				1214	if type == "PUBLIC" and n == 4:
				1215	name, type, pubid, system = self._doctype
				1216	elif type == "SYSTEM" and n == 3:
				1217	name, type, system = self._doctype
				1218	pubid = None
				1219	else:
				1220	return
				1221	if pubid:
				1222	pubid = pubid[1:-1]
				1223	self.doctype(name, pubid, system[1:-1])
				1224	self._doctype = None
				1225
				1226	##
				1227	# Handles a doctype declaration.
				1228	#
				1229	# @param name Doctype name.
				1230	# @param pubid Public identifier.
				1231	# @param system System identifier.
				1232
				1233	def doctype(self, name, pubid, system):
				1234	pass
				1235
				1236	##
				1237	# Feeds data to the parser.
				1238	#
				1239	# @param data Encoded data.
				1240
				1241	def feed(self, data):
				1242	self._parser.Parse(data, 0)
				1243
				1244	##
				1245	# Finishes feeding data to the parser.
				1246	#
				1247	# @return An element structure.
				1248	# @defreturn Element
				1249
				1250	def close(self):
				1251	self._parser.Parse("", 1) # end of data
				1252	tree = self._target.close()
				1253	del self._target, self._parser # get rid of circular references
				1254	return tree