blob: 6f20ee300dc350c7fd279cb4bcf9a06c2d2220c8 [file] [log] [blame]
Thomas Wouters89f507f2006-12-13 04:49:30 +00001\section{\module{xml.etree.ElementTree} --- The ElementTree XML API}
2\declaremodule{standard}{xml.etree.ElementTree}
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003\moduleauthor{Fredrik Lundh}{fredrik@pythonware.com}
Thomas Wouters89f507f2006-12-13 04:49:30 +00004\modulesynopsis{Implementation of the ElementTree API.}
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006\versionadded{2.5}
7
Thomas Wouters0e3f5912006-08-11 14:57:12 +00008The Element type is a flexible container object, designed to store
9hierarchical data structures in memory. The type can be described as a
10cross between a list and a dictionary.
11
12Each element has a number of properties associated with it:
Thomas Wouters89f507f2006-12-13 04:49:30 +000013
Thomas Wouters0e3f5912006-08-11 14:57:12 +000014\begin{itemize}
Thomas Wouters89f507f2006-12-13 04:49:30 +000015 \item a tag which is a string identifying what kind of data
16 this element represents (the element type, in other words).
17 \item a number of attributes, stored in a Python dictionary.
18 \item a text string.
19 \item an optional tail string.
20 \item a number of child elements, stored in a Python sequence
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021\end{itemize}
22
23To create an element instance, use the Element or SubElement factory
24functions.
25
Thomas Wouters89f507f2006-12-13 04:49:30 +000026The \class{ElementTree} class can be used to wrap an element
Thomas Wouters0e3f5912006-08-11 14:57:12 +000027structure, and convert it from and to XML.
28
Thomas Wouters89f507f2006-12-13 04:49:30 +000029A C implementation of this API is available as
30\module{xml.etree.cElementTree}.
31
Thomas Wouters0e3f5912006-08-11 14:57:12 +000032
33\subsection{Functions\label{elementtree-functions}}
34
35\begin{funcdesc}{Comment}{\optional{text}}
36Comment element factory. This factory function creates a special
37element that will be serialized as an XML comment.
38The comment string can be either an 8-bit ASCII string or a Unicode
39string.
40\var{text} is a string containing the comment string.
Thomas Wouters27d517b2007-02-25 20:39:11 +000041Returns an element instance representing a comment.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042\end{funcdesc}
43
44\begin{funcdesc}{dump}{elem}
45Writes an element tree or element structure to sys.stdout. This
46function should be used for debugging only.
47
48The exact output format is implementation dependent. In this
49version, it's written as an ordinary XML file.
50
51\var{elem} is an element tree or an individual element.
52\end{funcdesc}
53
54\begin{funcdesc}{Element}{tag\optional{, attrib}\optional{, **extra}}
55Element factory. This function returns an object implementing the
56standard Element interface. The exact class or type of that object
57is implementation dependent, but it will always be compatible with
58the {\_}ElementInterface class in this module.
59
60The element name, attribute names, and attribute values can be
61either 8-bit ASCII strings or Unicode strings.
62\var{tag} is the element name.
63\var{attrib} is an optional dictionary, containing element attributes.
64\var{extra} contains additional attributes, given as keyword arguments.
Thomas Wouters27d517b2007-02-25 20:39:11 +000065Returns an element instance.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000066\end{funcdesc}
67
68\begin{funcdesc}{fromstring}{text}
69Parses an XML section from a string constant. Same as XML.
70\var{text} is a string containing XML data.
Thomas Wouters27d517b2007-02-25 20:39:11 +000071Returns an Element instance.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000072\end{funcdesc}
73
74\begin{funcdesc}{iselement}{element}
75Checks if an object appears to be a valid element object.
76\var{element} is an element instance.
Thomas Wouters27d517b2007-02-25 20:39:11 +000077Returns a true value if this is an element object.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000078\end{funcdesc}
79
80\begin{funcdesc}{iterparse}{source\optional{, events}}
81Parses an XML section into an element tree incrementally, and reports
82what's going on to the user.
83\var{source} is a filename or file object containing XML data.
84\var{events} is a list of events to report back. If omitted, only ``end''
85events are reported.
Thomas Wouters27d517b2007-02-25 20:39:11 +000086Returns an iterator providing \code{(\var{event}, \var{elem})} pairs.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000087\end{funcdesc}
88
89\begin{funcdesc}{parse}{source\optional{, parser}}
90Parses an XML section into an element tree.
91\var{source} is a filename or file object containing XML data.
92\var{parser} is an optional parser instance. If not given, the
93standard XMLTreeBuilder parser is used.
Thomas Wouters27d517b2007-02-25 20:39:11 +000094Returns an ElementTree instance.
Thomas Wouters0e3f5912006-08-11 14:57:12 +000095\end{funcdesc}
96
97\begin{funcdesc}{ProcessingInstruction}{target\optional{, text}}
98PI element factory. This factory function creates a special element
99that will be serialized as an XML processing instruction.
100\var{target} is a string containing the PI target.
101\var{text} is a string containing the PI contents, if given.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000102Returns an element instance, representing a processing instruction.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000103\end{funcdesc}
104
Thomas Wouters27d517b2007-02-25 20:39:11 +0000105\begin{funcdesc}{SubElement}{parent, tag\optional{,
106 attrib\optional{, **extra}}}
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000107Subelement factory. This function creates an element instance, and
108appends it to an existing element.
109
110The element name, attribute names, and attribute values can be
111either 8-bit ASCII strings or Unicode strings.
112\var{parent} is the parent element.
113\var{tag} is the subelement name.
114\var{attrib} is an optional dictionary, containing element attributes.
115\var{extra} contains additional attributes, given as keyword arguments.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000116Returns an element instance.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000117\end{funcdesc}
118
119\begin{funcdesc}{tostring}{element\optional{, encoding}}
120Generates a string representation of an XML element, including all
121subelements.
122\var{element} is an Element instance.
123\var{encoding} is the output encoding (default is US-ASCII).
Thomas Wouters27d517b2007-02-25 20:39:11 +0000124Returns an encoded string containing the XML data.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000125\end{funcdesc}
126
127\begin{funcdesc}{XML}{text}
128Parses an XML section from a string constant. This function can
129be used to embed ``XML literals'' in Python code.
130\var{text} is a string containing XML data.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000131Returns an Element instance.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000132\end{funcdesc}
133
134\begin{funcdesc}{XMLID}{text}
135Parses an XML section from a string constant, and also returns
136a dictionary which maps from element id:s to elements.
137\var{text} is a string containing XML data.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000138Returns a tuple containing an Element instance and a dictionary.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000139\end{funcdesc}
140
141
Thomas Wouters27d517b2007-02-25 20:39:11 +0000142\subsection{The Element Interface\label{elementtree-element-interface}}
143
144Element objects returned by Element or SubElement have the
145following methods and attributes.
146
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147\begin{memberdesc}[Element]{tag}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000148A string identifying what kind of data this element represents
149(the element type, in other words).
150\end{memberdesc}
151
Guido van Rossumd8faa362007-04-27 19:54:29 +0000152\begin{memberdesc}[Element]{text}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000153The \var{text} attribute can be used to hold additional data
154associated with the element.
155As the name implies this attribute is usually a string but may be any
156application-specific object.
157If the element is created from an XML file the attribute will contain
158any text found between the element tags.
159\end{memberdesc}
160
Guido van Rossumd8faa362007-04-27 19:54:29 +0000161\begin{memberdesc}[Element]{tail}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000162The \var{tail} attribute can be used to hold additional data
163associated with the element.
164This attribute is usually a string but may be any application-specific object.
165If the element is created from an XML file the attribute will contain
166any text found after the element's end tag and before the next tag.
167\end{memberdesc}
168
Guido van Rossumd8faa362007-04-27 19:54:29 +0000169\begin{memberdesc}[Element]{attrib}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000170A dictionary containing the element's attributes.
171Note that while the \var{attrib} value is always a real mutable Python
172dictionary, an ElementTree implementation may choose to use another
173internal representation, and create the dictionary only if someone
174asks for it. To take advantage of such implementations, use the
175dictionary methods below whenever possible.
176\end{memberdesc}
177
178The following dictionary-like methods work on the element attributes.
179
Guido van Rossumd8faa362007-04-27 19:54:29 +0000180\begin{methoddesc}[Element]{clear}{}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000181Resets an element. This function removes all subelements, clears
182all attributes, and sets the text and tail attributes to None.
183\end{methoddesc}
184
Guido van Rossumd8faa362007-04-27 19:54:29 +0000185\begin{methoddesc}[Element]{get}{key\optional{, default=None}}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000186Gets the element attribute named \var{key}.
187
188Returns the attribute value, or \var{default} if the
189attribute was not found.
190\end{methoddesc}
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192\begin{methoddesc}[Element]{items}{}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000193Returns the element attributes as a sequence of (name, value) pairs.
194The attributes are returned in an arbitrary order.
195\end{methoddesc}
196
Guido van Rossumd8faa362007-04-27 19:54:29 +0000197\begin{methoddesc}[Element]{keys}{}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000198Returns the elements attribute names as a list.
199The names are returned in an arbitrary order.
200\end{methoddesc}
201
Guido van Rossumd8faa362007-04-27 19:54:29 +0000202\begin{methoddesc}[Element]{set}{key, value}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000203Set the attribute \var{key} on the element to \var{value}.
204\end{methoddesc}
205
206The following methods work on the element's children (subelements).
207
Guido van Rossumd8faa362007-04-27 19:54:29 +0000208\begin{methoddesc}[Element]{append}{subelement}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000209Adds the element \var{subelement} to the end of this elements internal list
210of subelements.
211\end{methoddesc}
212
Guido van Rossumd8faa362007-04-27 19:54:29 +0000213\begin{methoddesc}[Element]{find}{match}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000214Finds the first subelement matching \var{match}.
215\var{match} may be a tag name or path.
216Returns an element instance or \code{None}.
217\end{methoddesc}
218
Guido van Rossumd8faa362007-04-27 19:54:29 +0000219\begin{methoddesc}[Element]{findall}{match}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000220Finds all subelements matching \var{match}.
221\var{match} may be a tag name or path.
222Returns an iterable yielding all matching elements in document order.
223\end{methoddesc}
224
Guido van Rossumd8faa362007-04-27 19:54:29 +0000225\begin{methoddesc}[Element]{findtext}{condition\optional{, default=None}}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000226Finds text for the first subelement matching \var{condition}.
227\var{condition} may be a tag name or path.
228Returns the text content of the first matching element, or
229\var{default} if no element was found. Note that if the
230matching element has no text content an empty string is returned.
231\end{methoddesc}
232
Guido van Rossumd8faa362007-04-27 19:54:29 +0000233\begin{methoddesc}[Element]{getchildren}{}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000234Returns all subelements. The elements are returned in document order.
235\end{methoddesc}
236
Guido van Rossumd8faa362007-04-27 19:54:29 +0000237\begin{methoddesc}[Element]{getiterator}{\optional{tag=None}}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000238Creates a tree iterator with the current element as the root.
239The iterator iterates over this element and all elements below it
240that match the given tag. If tag
241is \code{None} or \code{'*'} then all elements are iterated over.
242Returns an iterable that provides element objects in document (depth first)
243order.
244\end{methoddesc}
245
Guido van Rossumd8faa362007-04-27 19:54:29 +0000246\begin{methoddesc}[Element]{insert}{index, element}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000247Inserts a subelement at the given position in this element.
248\end{methoddesc}
249
Guido van Rossumd8faa362007-04-27 19:54:29 +0000250\begin{methoddesc}[Element]{makeelement}{tag, attrib}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000251Creates a new element object of the same type as this element.
252Do not call this method, use the SubElement factory function instead.
253\end{methoddesc}
254
Guido van Rossumd8faa362007-04-27 19:54:29 +0000255\begin{methoddesc}[Element]{remove}{subelement}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000256Removes \var{subelement} from the element.
257Unlike the findXXX methods this method compares elements based on
258the instance identity, not on tag value or contents.
259\end{methoddesc}
260
261Element objects also support the following sequence type methods for
262working with subelements: \method{__delitem__()},
263\method{__getitem__()}, \method{__setitem__()}, \method{__len__()}.
264
265Caution: Because Element objects do not define a
266\method{__nonzero__()} method, elements with no subelements will test
267as \code{False}.
268
269\begin{verbatim}
270element = root.find('foo')
271
272if not element: # careful!
273 print "element not found, or element has no subelements"
274
275if element is None:
276 print "element not found"
277\end{verbatim}
278
279
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000280\subsection{ElementTree Objects\label{elementtree-elementtree-objects}}
281
282\begin{classdesc}{ElementTree}{\optional{element,} \optional{file}}
283ElementTree wrapper class. This class represents an entire element
284hierarchy, and adds some extra support for serialization to and from
285standard XML.
286
287\var{element} is the root element.
288The tree is initialized with the contents of the XML \var{file} if given.
289\end{classdesc}
290
291\begin{methoddesc}{_setroot}{element}
292Replaces the root element for this tree. This discards the
293current contents of the tree, and replaces it with the given
294element. Use with care.
295\var{element} is an element instance.
296\end{methoddesc}
297
298\begin{methoddesc}{find}{path}
299Finds the first toplevel element with given tag.
300Same as getroot().find(path).
301\var{path} is the element to look for.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000302Returns the first matching element, or \code{None} if no element was found.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000303\end{methoddesc}
304
305\begin{methoddesc}{findall}{path}
306Finds all toplevel elements with the given tag.
307Same as getroot().findall(path).
308\var{path} is the element to look for.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000309Returns a list or iterator containing all matching elements,
310in document order.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000311\end{methoddesc}
312
313\begin{methoddesc}{findtext}{path\optional{, default}}
314Finds the element text for the first toplevel element with given
315tag. Same as getroot().findtext(path).
316\var{path} is the toplevel element to look for.
317\var{default} is the value to return if the element was not found.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000318Returns the text content of the first matching element, or the
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319default value no element was found. Note that if the element
320has is found, but has no text content, this method returns an
321empty string.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000322\end{methoddesc}
323
324\begin{methoddesc}{getiterator}{\optional{tag}}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000325Creates and returns a tree iterator for the root element. The iterator loops
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000326over all elements in this tree, in section order.
327\var{tag} is the tag to look for (default is to return all elements)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000328\end{methoddesc}
329
330\begin{methoddesc}{getroot}{}
Thomas Wouters27d517b2007-02-25 20:39:11 +0000331Returns the root element for this tree.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332\end{methoddesc}
333
334\begin{methoddesc}{parse}{source\optional{, parser}}
335Loads an external XML section into this element tree.
336\var{source} is a file name or file object.
337\var{parser} is an optional parser instance. If not given, the
338standard XMLTreeBuilder parser is used.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000339Returns the section root element.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000340\end{methoddesc}
341
342\begin{methoddesc}{write}{file\optional{, encoding}}
343Writes the element tree to a file, as XML.
344\var{file} is a file name, or a file object opened for writing.
345\var{encoding} is the output encoding (default is US-ASCII).
346\end{methoddesc}
347
348
349\subsection{QName Objects\label{elementtree-qname-objects}}
350
351\begin{classdesc}{QName}{text_or_uri\optional{, tag}}
352QName wrapper. This can be used to wrap a QName attribute value, in
353order to get proper namespace handling on output.
354\var{text_or_uri} is a string containing the QName value,
355in the form {\{}uri{\}}local, or, if the tag argument is given,
356the URI part of a QName.
357If \var{tag} is given, the first argument is interpreted as
358an URI, and this argument is interpreted as a local name.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000359\class{QName} instances are opaque.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000360\end{classdesc}
361
362
363\subsection{TreeBuilder Objects\label{elementtree-treebuilder-objects}}
364
365\begin{classdesc}{TreeBuilder}{\optional{element_factory}}
366Generic element structure builder. This builder converts a sequence
367of start, data, and end method calls to a well-formed element structure.
368You can use this class to build an element structure using a custom XML
369parser, or a parser for some other XML-like format.
370The \var{element_factory} is called to create new Element instances when
371given.
372\end{classdesc}
373
374\begin{methoddesc}{close}{}
375Flushes the parser buffers, and returns the toplevel documen
376element.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000377Returns an Element instance.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378\end{methoddesc}
379
380\begin{methoddesc}{data}{data}
381Adds text to the current element.
382\var{data} is a string. This should be either an 8-bit string
383containing ASCII text, or a Unicode string.
384\end{methoddesc}
385
386\begin{methoddesc}{end}{tag}
387Closes the current element.
388\var{tag} is the element name.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000389Returns the closed element.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000390\end{methoddesc}
391
392\begin{methoddesc}{start}{tag, attrs}
393Opens a new element.
394\var{tag} is the element name.
395\var{attrs} is a dictionary containing element attributes.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000396Returns the opened element.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000397\end{methoddesc}
398
399
400\subsection{XMLTreeBuilder Objects\label{elementtree-xmltreebuilder-objects}}
401
402\begin{classdesc}{XMLTreeBuilder}{\optional{html,} \optional{target}}
403Element structure builder for XML source data, based on the
404expat parser.
405\var{html} are predefined HTML entities. This flag is not supported
406by the current implementation.
407\var{target} is the target object. If omitted, the builder uses an
408instance of the standard TreeBuilder class.
409\end{classdesc}
410
411\begin{methoddesc}{close}{}
412Finishes feeding data to the parser.
Thomas Wouters27d517b2007-02-25 20:39:11 +0000413Returns an element structure.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414\end{methoddesc}
415
416\begin{methoddesc}{doctype}{name, pubid, system}
417Handles a doctype declaration.
418\var{name} is the doctype name.
419\var{pubid} is the public identifier.
420\var{system} is the system identifier.
421\end{methoddesc}
422
423\begin{methoddesc}{feed}{data}
424Feeds data to the parser.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000425\var{data} is encoded data.
426\end{methoddesc}