blob: 40c22917c07440c464aa76db5d9d54a9c3ec4799 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001# xml.etree test. This file contains enough tests to make sure that
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002# all included components work as they should.
3# Large parts are extracted from the upstream test suite.
Armin Rigo9ed73062005-12-14 18:10:45 +00004
Florent Xiclunaf15351d2010-03-13 23:24:31 +00005# IMPORTANT: the same doctests are run from "test_xml_etree_c" in
6# order to ensure consistency between the C implementation and the
7# Python implementation.
8#
9# For this purpose, the module-level "ET" symbol is temporarily
10# monkey-patched when running the "test_xml_etree_c" test suite.
11# Don't re-import "xml.etree.ElementTree" module in the docstring,
12# except if the test is specific to the Python implementation.
13
Christian Heimes05e8be12008-02-23 18:30:17 +000014import sys
Georg Brandl1f7fffb2010-10-15 15:57:45 +000015import html
Victor Stinner6c6f8512010-08-07 10:09:35 +000016import unittest
Armin Rigo9ed73062005-12-14 18:10:45 +000017
Benjamin Petersonee8712c2008-05-20 21:35:26 +000018from test import support
Florent Xiclunaf15351d2010-03-13 23:24:31 +000019from test.support import findfile
Armin Rigo9ed73062005-12-14 18:10:45 +000020
Florent Xiclunaf15351d2010-03-13 23:24:31 +000021from xml.etree import ElementTree as ET
22
23SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
Victor Stinner6c6f8512010-08-07 10:09:35 +000024try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +000025 SIMPLE_XMLFILE.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +000026except UnicodeEncodeError:
27 raise unittest.SkipTest("filename is not encodable to utf8")
Florent Xiclunaf15351d2010-03-13 23:24:31 +000028SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
29
30SAMPLE_XML = """\
Armin Rigo9ed73062005-12-14 18:10:45 +000031<body>
Florent Xiclunaf15351d2010-03-13 23:24:31 +000032 <tag class='a'>text</tag>
33 <tag class='b' />
Armin Rigo9ed73062005-12-14 18:10:45 +000034 <section>
Florent Xiclunaf15351d2010-03-13 23:24:31 +000035 <tag class='b' id='inner'>subtext</tag>
Armin Rigo9ed73062005-12-14 18:10:45 +000036 </section>
37</body>
38"""
39
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040SAMPLE_SECTION = """\
41<section>
42 <tag class='b' id='inner'>subtext</tag>
43 <nexttag />
44 <nextsection>
45 <tag />
46 </nextsection>
47</section>
48"""
49
Armin Rigo9ed73062005-12-14 18:10:45 +000050SAMPLE_XML_NS = """
51<body xmlns="http://effbot.org/ns">
52 <tag>text</tag>
53 <tag />
54 <section>
55 <tag>subtext</tag>
56 </section>
57</body>
58"""
59
Florent Xiclunaf15351d2010-03-13 23:24:31 +000060
Armin Rigo9ed73062005-12-14 18:10:45 +000061def sanity():
62 """
63 Import sanity.
64
Thomas Wouters0e3f5912006-08-11 14:57:12 +000065 >>> from xml.etree import ElementTree
66 >>> from xml.etree import ElementInclude
67 >>> from xml.etree import ElementPath
Armin Rigo9ed73062005-12-14 18:10:45 +000068 """
69
70def check_method(method):
Guido van Rossumd59da4b2007-05-22 18:11:13 +000071 if not hasattr(method, '__call__'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +000072 print(method, "not callable")
Armin Rigo9ed73062005-12-14 18:10:45 +000073
Florent Xiclunac17f1722010-08-08 19:48:29 +000074def serialize(elem, to_string=True, encoding='unicode', **options):
Guido van Rossum34d19282007-08-09 01:03:29 +000075 import io
Florent Xiclunac17f1722010-08-08 19:48:29 +000076 if encoding != 'unicode':
Florent Xiclunaf15351d2010-03-13 23:24:31 +000077 file = io.BytesIO()
78 else:
79 file = io.StringIO()
Armin Rigo9ed73062005-12-14 18:10:45 +000080 tree = ET.ElementTree(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +000081 tree.write(file, encoding=encoding, **options)
Florent Xiclunaf15351d2010-03-13 23:24:31 +000082 if to_string:
83 return file.getvalue()
84 else:
85 file.seek(0)
86 return file
Armin Rigo9ed73062005-12-14 18:10:45 +000087
88def summarize(elem):
Florent Xiclunaf15351d2010-03-13 23:24:31 +000089 if elem.tag == ET.Comment:
90 return "<Comment>"
Armin Rigo9ed73062005-12-14 18:10:45 +000091 return elem.tag
92
93def summarize_list(seq):
Florent Xiclunaf15351d2010-03-13 23:24:31 +000094 return [summarize(elem) for elem in seq]
95
96def normalize_crlf(tree):
97 for elem in tree.iter():
98 if elem.text:
99 elem.text = elem.text.replace("\r\n", "\n")
100 if elem.tail:
101 elem.tail = elem.tail.replace("\r\n", "\n")
102
103def normalize_exception(func, *args, **kwargs):
104 # Ignore the exception __module__
105 try:
106 func(*args, **kwargs)
107 except Exception as err:
108 print("Traceback (most recent call last):")
109 print("{}: {}".format(err.__class__.__name__, err))
110
111def check_string(string):
112 len(string)
113 for char in string:
114 if len(char) != 1:
115 print("expected one-character string, got %r" % char)
116 new_string = string + ""
117 new_string = string + " "
118 string[:0]
119
120def check_mapping(mapping):
121 len(mapping)
122 keys = mapping.keys()
123 items = mapping.items()
124 for key in keys:
125 item = mapping[key]
126 mapping["key"] = "value"
127 if mapping["key"] != "value":
128 print("expected value string, got %r" % mapping["key"])
129
130def check_element(element):
131 if not ET.iselement(element):
132 print("not an element")
133 if not hasattr(element, "tag"):
134 print("no tag member")
135 if not hasattr(element, "attrib"):
136 print("no attrib member")
137 if not hasattr(element, "text"):
138 print("no text member")
139 if not hasattr(element, "tail"):
140 print("no tail member")
141
142 check_string(element.tag)
143 check_mapping(element.attrib)
144 if element.text is not None:
145 check_string(element.text)
146 if element.tail is not None:
147 check_string(element.tail)
148 for elem in element:
149 check_element(elem)
150
151# --------------------------------------------------------------------
152# element tree tests
Armin Rigo9ed73062005-12-14 18:10:45 +0000153
154def interface():
155 """
156 Test element tree interface.
157
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000158 >>> element = ET.Element("tag")
159 >>> check_element(element)
Armin Rigo9ed73062005-12-14 18:10:45 +0000160 >>> tree = ET.ElementTree(element)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000161 >>> check_element(tree.getroot())
162
163 >>> element = ET.Element("t\\xe4g", key="value")
164 >>> tree = ET.ElementTree(element)
165 >>> repr(element) # doctest: +ELLIPSIS
166 "<Element 't\\xe4g' at 0x...>"
167 >>> element = ET.Element("tag", key="value")
Armin Rigo9ed73062005-12-14 18:10:45 +0000168
169 Make sure all standard element methods exist.
170
171 >>> check_method(element.append)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000172 >>> check_method(element.extend)
Armin Rigo9ed73062005-12-14 18:10:45 +0000173 >>> check_method(element.insert)
174 >>> check_method(element.remove)
175 >>> check_method(element.getchildren)
176 >>> check_method(element.find)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000177 >>> check_method(element.iterfind)
Armin Rigo9ed73062005-12-14 18:10:45 +0000178 >>> check_method(element.findall)
179 >>> check_method(element.findtext)
180 >>> check_method(element.clear)
181 >>> check_method(element.get)
182 >>> check_method(element.set)
183 >>> check_method(element.keys)
184 >>> check_method(element.items)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000185 >>> check_method(element.iter)
186 >>> check_method(element.itertext)
Armin Rigo9ed73062005-12-14 18:10:45 +0000187 >>> check_method(element.getiterator)
188
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000189 These methods return an iterable. See bug 6472.
190
191 >>> check_method(element.iter("tag").__next__)
192 >>> check_method(element.iterfind("tag").__next__)
193 >>> check_method(element.iterfind("*").__next__)
194 >>> check_method(tree.iter("tag").__next__)
195 >>> check_method(tree.iterfind("tag").__next__)
196 >>> check_method(tree.iterfind("*").__next__)
197
198 These aliases are provided:
199
200 >>> assert ET.XML == ET.fromstring
201 >>> assert ET.PI == ET.ProcessingInstruction
202 >>> assert ET.XMLParser == ET.XMLTreeBuilder
203 """
204
205def simpleops():
206 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000207 Basic method sanity checks.
208
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000209 >>> elem = ET.XML("<body><tag/></body>")
210 >>> serialize(elem)
211 '<body><tag /></body>'
212 >>> e = ET.Element("tag2")
213 >>> elem.append(e)
214 >>> serialize(elem)
215 '<body><tag /><tag2 /></body>'
216 >>> elem.remove(e)
217 >>> serialize(elem)
218 '<body><tag /></body>'
219 >>> elem.insert(0, e)
220 >>> serialize(elem)
221 '<body><tag2 /><tag /></body>'
222 >>> elem.remove(e)
223 >>> elem.extend([e])
224 >>> serialize(elem)
225 '<body><tag /><tag2 /></body>'
226 >>> elem.remove(e)
227
228 >>> element = ET.Element("tag", key="value")
229 >>> serialize(element) # 1
Armin Rigo9ed73062005-12-14 18:10:45 +0000230 '<tag key="value" />'
231 >>> subelement = ET.Element("subtag")
232 >>> element.append(subelement)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000233 >>> serialize(element) # 2
Armin Rigo9ed73062005-12-14 18:10:45 +0000234 '<tag key="value"><subtag /></tag>'
235 >>> element.insert(0, subelement)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000236 >>> serialize(element) # 3
Armin Rigo9ed73062005-12-14 18:10:45 +0000237 '<tag key="value"><subtag /><subtag /></tag>'
238 >>> element.remove(subelement)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000239 >>> serialize(element) # 4
Armin Rigo9ed73062005-12-14 18:10:45 +0000240 '<tag key="value"><subtag /></tag>'
241 >>> element.remove(subelement)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000242 >>> serialize(element) # 5
Armin Rigo9ed73062005-12-14 18:10:45 +0000243 '<tag key="value" />'
244 >>> element.remove(subelement)
245 Traceback (most recent call last):
246 ValueError: list.remove(x): x not in list
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000247 >>> serialize(element) # 6
Armin Rigo9ed73062005-12-14 18:10:45 +0000248 '<tag key="value" />'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000249 >>> element[0:0] = [subelement, subelement, subelement]
250 >>> serialize(element[1])
251 '<subtag />'
252 >>> element[1:9] == [element[1], element[2]]
253 True
254 >>> element[:9:2] == [element[0], element[2]]
255 True
256 >>> del element[1:2]
257 >>> serialize(element)
258 '<tag key="value"><subtag /><subtag /></tag>'
259 """
260
261def cdata():
262 """
263 Test CDATA handling (etc).
264
265 >>> serialize(ET.XML("<tag>hello</tag>"))
266 '<tag>hello</tag>'
267 >>> serialize(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"))
268 '<tag>hello</tag>'
269 >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>"))
270 '<tag>hello</tag>'
271 """
272
273# Only with Python implementation
274def simplefind():
275 """
276 Test find methods using the elementpath fallback.
277
278 >>> from xml.etree import ElementTree
279
280 >>> CurrentElementPath = ElementTree.ElementPath
281 >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
282 >>> elem = ElementTree.XML(SAMPLE_XML)
283 >>> elem.find("tag").tag
284 'tag'
285 >>> ElementTree.ElementTree(elem).find("tag").tag
286 'tag'
287 >>> elem.findtext("tag")
288 'text'
289 >>> elem.findtext("tog")
290 >>> elem.findtext("tog", "default")
291 'default'
292 >>> ElementTree.ElementTree(elem).findtext("tag")
293 'text'
294 >>> summarize_list(elem.findall("tag"))
295 ['tag', 'tag']
296 >>> summarize_list(elem.findall(".//tag"))
297 ['tag', 'tag', 'tag']
298
299 Path syntax doesn't work in this case.
300
301 >>> elem.find("section/tag")
302 >>> elem.findtext("section/tag")
303 >>> summarize_list(elem.findall("section/tag"))
304 []
305
306 >>> ElementTree.ElementPath = CurrentElementPath
Armin Rigo9ed73062005-12-14 18:10:45 +0000307 """
308
309def find():
310 """
311 Test find methods (including xpath syntax).
312
Armin Rigo9ed73062005-12-14 18:10:45 +0000313 >>> elem = ET.XML(SAMPLE_XML)
314 >>> elem.find("tag").tag
315 'tag'
316 >>> ET.ElementTree(elem).find("tag").tag
317 'tag'
318 >>> elem.find("section/tag").tag
319 'tag'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000320 >>> elem.find("./tag").tag
321 'tag'
322 >>> ET.ElementTree(elem).find("./tag").tag
323 'tag'
324 >>> ET.ElementTree(elem).find("/tag").tag
325 'tag'
326 >>> elem[2] = ET.XML(SAMPLE_SECTION)
327 >>> elem.find("section/nexttag").tag
328 'nexttag'
Armin Rigo9ed73062005-12-14 18:10:45 +0000329 >>> ET.ElementTree(elem).find("section/tag").tag
330 'tag'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000331 >>> ET.ElementTree(elem).find("tog")
332 >>> ET.ElementTree(elem).find("tog/foo")
Armin Rigo9ed73062005-12-14 18:10:45 +0000333 >>> elem.findtext("tag")
334 'text'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000335 >>> elem.findtext("section/nexttag")
336 ''
337 >>> elem.findtext("section/nexttag", "default")
338 ''
Armin Rigo9ed73062005-12-14 18:10:45 +0000339 >>> elem.findtext("tog")
340 >>> elem.findtext("tog", "default")
341 'default'
342 >>> ET.ElementTree(elem).findtext("tag")
343 'text'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000344 >>> ET.ElementTree(elem).findtext("tog/foo")
345 >>> ET.ElementTree(elem).findtext("tog/foo", "default")
346 'default'
347 >>> ET.ElementTree(elem).findtext("./tag")
348 'text'
349 >>> ET.ElementTree(elem).findtext("/tag")
350 'text'
Armin Rigo9ed73062005-12-14 18:10:45 +0000351 >>> elem.findtext("section/tag")
352 'subtext'
353 >>> ET.ElementTree(elem).findtext("section/tag")
354 'subtext'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000355 >>> summarize_list(elem.findall("."))
356 ['body']
Armin Rigo9ed73062005-12-14 18:10:45 +0000357 >>> summarize_list(elem.findall("tag"))
358 ['tag', 'tag']
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000359 >>> summarize_list(elem.findall("tog"))
360 []
361 >>> summarize_list(elem.findall("tog/foo"))
362 []
Armin Rigo9ed73062005-12-14 18:10:45 +0000363 >>> summarize_list(elem.findall("*"))
364 ['tag', 'tag', 'section']
365 >>> summarize_list(elem.findall(".//tag"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000366 ['tag', 'tag', 'tag', 'tag']
Armin Rigo9ed73062005-12-14 18:10:45 +0000367 >>> summarize_list(elem.findall("section/tag"))
368 ['tag']
369 >>> summarize_list(elem.findall("section//tag"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000370 ['tag', 'tag']
Armin Rigo9ed73062005-12-14 18:10:45 +0000371 >>> summarize_list(elem.findall("section/*"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000372 ['tag', 'nexttag', 'nextsection']
Armin Rigo9ed73062005-12-14 18:10:45 +0000373 >>> summarize_list(elem.findall("section//*"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000374 ['tag', 'nexttag', 'nextsection', 'tag']
Armin Rigo9ed73062005-12-14 18:10:45 +0000375 >>> summarize_list(elem.findall("section/.//*"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000376 ['tag', 'nexttag', 'nextsection', 'tag']
Armin Rigo9ed73062005-12-14 18:10:45 +0000377 >>> summarize_list(elem.findall("*/*"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000378 ['tag', 'nexttag', 'nextsection']
Armin Rigo9ed73062005-12-14 18:10:45 +0000379 >>> summarize_list(elem.findall("*//*"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000380 ['tag', 'nexttag', 'nextsection', 'tag']
Armin Rigo9ed73062005-12-14 18:10:45 +0000381 >>> summarize_list(elem.findall("*/tag"))
382 ['tag']
383 >>> summarize_list(elem.findall("*/./tag"))
384 ['tag']
385 >>> summarize_list(elem.findall("./tag"))
386 ['tag', 'tag']
387 >>> summarize_list(elem.findall(".//tag"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000388 ['tag', 'tag', 'tag', 'tag']
Armin Rigo9ed73062005-12-14 18:10:45 +0000389 >>> summarize_list(elem.findall("././tag"))
390 ['tag', 'tag']
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000391 >>> summarize_list(elem.findall(".//tag[@class]"))
392 ['tag', 'tag', 'tag']
393 >>> summarize_list(elem.findall(".//tag[@class='a']"))
394 ['tag']
395 >>> summarize_list(elem.findall(".//tag[@class='b']"))
396 ['tag', 'tag']
397 >>> summarize_list(elem.findall(".//tag[@id]"))
398 ['tag']
399 >>> summarize_list(elem.findall(".//section[tag]"))
400 ['section']
401 >>> summarize_list(elem.findall(".//section[element]"))
402 []
403 >>> summarize_list(elem.findall("../tag"))
404 []
405 >>> summarize_list(elem.findall("section/../tag"))
Armin Rigo9ed73062005-12-14 18:10:45 +0000406 ['tag', 'tag']
407 >>> summarize_list(ET.ElementTree(elem).findall("./tag"))
408 ['tag', 'tag']
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000409
410 Following example is invalid in 1.2.
411 A leading '*' is assumed in 1.3.
412
413 >>> elem.findall("section//") == elem.findall("section//*")
414 True
415
416 ET's Path module handles this case incorrectly; this gives
417 a warning in 1.3, and the behaviour will be modified in 1.4.
418
419 >>> summarize_list(ET.ElementTree(elem).findall("/tag"))
420 ['tag', 'tag']
421
Armin Rigo9ed73062005-12-14 18:10:45 +0000422 >>> elem = ET.XML(SAMPLE_XML_NS)
423 >>> summarize_list(elem.findall("tag"))
424 []
425 >>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
426 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
427 >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
428 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
429 """
430
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000431def file_init():
432 """
433 >>> import io
434
435 >>> stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
436 >>> tree = ET.ElementTree(file=stringfile)
437 >>> tree.find("tag").tag
438 'tag'
439 >>> tree.find("section/tag").tag
440 'tag'
441
442 >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
443 >>> tree.find("element").tag
444 'element'
445 >>> tree.find("element/../empty-element").tag
446 'empty-element'
447 """
448
449def bad_find():
450 """
451 Check bad or unsupported path expressions.
452
453 >>> elem = ET.XML(SAMPLE_XML)
454 >>> elem.findall("/tag")
455 Traceback (most recent call last):
456 SyntaxError: cannot use absolute path on element
457 """
458
459def path_cache():
460 """
461 Check that the path cache behaves sanely.
462
463 >>> elem = ET.XML(SAMPLE_XML)
464 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
465 >>> cache_len_10 = len(ET.ElementPath._cache)
466 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
467 >>> len(ET.ElementPath._cache) == cache_len_10
468 True
469 >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
470 >>> len(ET.ElementPath._cache) > cache_len_10
471 True
472 >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
473 >>> len(ET.ElementPath._cache) < 500
474 True
475 """
476
477def copy():
478 """
479 Test copy handling (etc).
480
481 >>> import copy
482 >>> e1 = ET.XML("<tag>hello<foo/></tag>")
483 >>> e2 = copy.copy(e1)
484 >>> e3 = copy.deepcopy(e1)
485 >>> e1.find("foo").tag = "bar"
486 >>> serialize(e1)
487 '<tag>hello<bar /></tag>'
488 >>> serialize(e2)
489 '<tag>hello<bar /></tag>'
490 >>> serialize(e3)
491 '<tag>hello<foo /></tag>'
492
493 """
494
495def attrib():
496 """
497 Test attribute handling.
498
499 >>> elem = ET.Element("tag")
500 >>> elem.get("key") # 1.1
501 >>> elem.get("key", "default") # 1.2
502 'default'
503 >>> elem.set("key", "value")
504 >>> elem.get("key") # 1.3
505 'value'
506
507 >>> elem = ET.Element("tag", key="value")
508 >>> elem.get("key") # 2.1
509 'value'
510 >>> elem.attrib # 2.2
511 {'key': 'value'}
512
513 >>> attrib = {"key": "value"}
514 >>> elem = ET.Element("tag", attrib)
515 >>> attrib.clear() # check for aliasing issues
516 >>> elem.get("key") # 3.1
517 'value'
518 >>> elem.attrib # 3.2
519 {'key': 'value'}
520
521 >>> attrib = {"key": "value"}
522 >>> elem = ET.Element("tag", **attrib)
523 >>> attrib.clear() # check for aliasing issues
524 >>> elem.get("key") # 4.1
525 'value'
526 >>> elem.attrib # 4.2
527 {'key': 'value'}
528
529 >>> elem = ET.Element("tag", {"key": "other"}, key="value")
530 >>> elem.get("key") # 5.1
531 'value'
532 >>> elem.attrib # 5.2
533 {'key': 'value'}
534
535 >>> elem = ET.Element('test')
536 >>> elem.text = "aa"
537 >>> elem.set('testa', 'testval')
538 >>> elem.set('testb', 'test2')
539 >>> ET.tostring(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000540 b'<test testa="testval" testb="test2">aa</test>'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000541 >>> sorted(elem.keys())
542 ['testa', 'testb']
543 >>> sorted(elem.items())
544 [('testa', 'testval'), ('testb', 'test2')]
545 >>> elem.attrib['testb']
546 'test2'
547 >>> elem.attrib['testb'] = 'test1'
548 >>> elem.attrib['testc'] = 'test2'
549 >>> ET.tostring(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000550 b'<test testa="testval" testb="test1" testc="test2">aa</test>'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000551 """
552
553def makeelement():
554 """
555 Test makeelement handling.
556
557 >>> elem = ET.Element("tag")
558 >>> attrib = {"key": "value"}
559 >>> subelem = elem.makeelement("subtag", attrib)
560 >>> if subelem.attrib is attrib:
561 ... print("attrib aliasing")
562 >>> elem.append(subelem)
563 >>> serialize(elem)
564 '<tag><subtag key="value" /></tag>'
565
566 >>> elem.clear()
567 >>> serialize(elem)
568 '<tag />'
569 >>> elem.append(subelem)
570 >>> serialize(elem)
571 '<tag><subtag key="value" /></tag>'
572 >>> elem.extend([subelem, subelem])
573 >>> serialize(elem)
574 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>'
575 >>> elem[:] = [subelem]
576 >>> serialize(elem)
577 '<tag><subtag key="value" /></tag>'
578 >>> elem[:] = tuple([subelem])
579 >>> serialize(elem)
580 '<tag><subtag key="value" /></tag>'
581
582 """
583
584def parsefile():
585 """
586 Test parsing from file.
587
588 >>> tree = ET.parse(SIMPLE_XMLFILE)
589 >>> normalize_crlf(tree)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000590 >>> tree.write(sys.stdout, encoding='unicode')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000591 <root>
592 <element key="value">text</element>
593 <element>text</element>tail
594 <empty-element />
595 </root>
596 >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
597 >>> normalize_crlf(tree)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000598 >>> tree.write(sys.stdout, encoding='unicode')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000599 <ns0:root xmlns:ns0="namespace">
600 <ns0:element key="value">text</ns0:element>
601 <ns0:element>text</ns0:element>tail
602 <ns0:empty-element />
603 </ns0:root>
604
Antoine Pitroub86680e2010-10-14 21:15:17 +0000605 >>> with open(SIMPLE_XMLFILE) as f:
606 ... data = f.read()
607
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000608 >>> parser = ET.XMLParser()
Benjamin Peterson886af962010-03-21 23:13:07 +0000609 >>> parser.version # doctest: +ELLIPSIS
610 'Expat ...'
Antoine Pitroub86680e2010-10-14 21:15:17 +0000611 >>> parser.feed(data)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000612 >>> print(serialize(parser.close()))
613 <root>
614 <element key="value">text</element>
615 <element>text</element>tail
616 <empty-element />
617 </root>
618
619 >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
Antoine Pitroub86680e2010-10-14 21:15:17 +0000620 >>> parser.feed(data)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000621 >>> print(serialize(parser.close()))
622 <root>
623 <element key="value">text</element>
624 <element>text</element>tail
625 <empty-element />
626 </root>
627
628 >>> target = ET.TreeBuilder()
629 >>> parser = ET.XMLParser(target=target)
Antoine Pitroub86680e2010-10-14 21:15:17 +0000630 >>> parser.feed(data)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000631 >>> print(serialize(parser.close()))
632 <root>
633 <element key="value">text</element>
634 <element>text</element>tail
635 <empty-element />
636 </root>
637 """
638
Armin Rigo9ed73062005-12-14 18:10:45 +0000639def parseliteral():
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000640 """
Armin Rigo9ed73062005-12-14 18:10:45 +0000641 >>> element = ET.XML("<html><body>text</body></html>")
Florent Xiclunac17f1722010-08-08 19:48:29 +0000642 >>> ET.ElementTree(element).write(sys.stdout, encoding='unicode')
Armin Rigo9ed73062005-12-14 18:10:45 +0000643 <html><body>text</body></html>
644 >>> element = ET.fromstring("<html><body>text</body></html>")
Florent Xiclunac17f1722010-08-08 19:48:29 +0000645 >>> ET.ElementTree(element).write(sys.stdout, encoding='unicode')
Armin Rigo9ed73062005-12-14 18:10:45 +0000646 <html><body>text</body></html>
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000647 >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
648 >>> element = ET.fromstringlist(sequence)
Florent Xicluna14bd1c32010-08-08 22:58:56 +0000649 >>> ET.tostring(element)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000650 b'<html><body>text</body></html>'
Florent Xicluna14bd1c32010-08-08 22:58:56 +0000651 >>> b"".join(ET.tostringlist(element))
Florent Xiclunac17f1722010-08-08 19:48:29 +0000652 b'<html><body>text</body></html>'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000653 >>> ET.tostring(element, "ascii")
654 b"<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
Armin Rigo9ed73062005-12-14 18:10:45 +0000655 >>> _, ids = ET.XMLID("<html><body>text</body></html>")
656 >>> len(ids)
657 0
658 >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
659 >>> len(ids)
660 1
661 >>> ids["body"].tag
662 'body'
663 """
664
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000665def iterparse():
Fredrik Lundh8911ca3d2005-12-16 22:07:17 +0000666 """
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000667 Test iterparse interface.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000668
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000669 >>> iterparse = ET.iterparse
670
671 >>> context = iterparse(SIMPLE_XMLFILE)
672 >>> action, elem = next(context)
673 >>> print(action, elem.tag)
674 end element
675 >>> for action, elem in context:
676 ... print(action, elem.tag)
677 end element
678 end empty-element
679 end root
680 >>> context.root.tag
681 'root'
682
683 >>> context = iterparse(SIMPLE_NS_XMLFILE)
684 >>> for action, elem in context:
685 ... print(action, elem.tag)
686 end {namespace}element
687 end {namespace}element
688 end {namespace}empty-element
689 end {namespace}root
690
691 >>> events = ()
692 >>> context = iterparse(SIMPLE_XMLFILE, events)
693 >>> for action, elem in context:
694 ... print(action, elem.tag)
695
696 >>> events = ()
697 >>> context = iterparse(SIMPLE_XMLFILE, events=events)
698 >>> for action, elem in context:
699 ... print(action, elem.tag)
700
701 >>> events = ("start", "end")
702 >>> context = iterparse(SIMPLE_XMLFILE, events)
703 >>> for action, elem in context:
704 ... print(action, elem.tag)
705 start root
706 start element
707 end element
708 start element
709 end element
710 start empty-element
711 end empty-element
712 end root
713
714 >>> events = ("start", "end", "start-ns", "end-ns")
715 >>> context = iterparse(SIMPLE_NS_XMLFILE, events)
716 >>> for action, elem in context:
717 ... if action in ("start", "end"):
718 ... print(action, elem.tag)
719 ... else:
720 ... print(action, elem)
721 start-ns ('', 'namespace')
722 start {namespace}root
723 start {namespace}element
724 end {namespace}element
725 start {namespace}element
726 end {namespace}element
727 start {namespace}empty-element
728 end {namespace}empty-element
729 end {namespace}root
730 end-ns None
731
732 >>> events = ("start", "end", "bogus")
Antoine Pitroub86680e2010-10-14 21:15:17 +0000733 >>> with open(SIMPLE_XMLFILE, "rb") as f:
734 ... iterparse(f, events)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000735 Traceback (most recent call last):
736 ValueError: unknown event 'bogus'
737
738 >>> import io
739
740 >>> source = io.BytesIO(
741 ... b"<?xml version='1.0' encoding='iso-8859-1'?>\\n"
742 ... b"<body xmlns='http://&#233;ffbot.org/ns'\\n"
743 ... b" xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n")
744 >>> events = ("start-ns",)
745 >>> context = iterparse(source, events)
746 >>> for action, elem in context:
747 ... print(action, elem)
748 start-ns ('', 'http://\\xe9ffbot.org/ns')
749 start-ns ('cl\\xe9', 'http://effbot.org/ns')
750
751 >>> source = io.StringIO("<document />junk")
752 >>> try:
753 ... for action, elem in iterparse(source):
754 ... print(action, elem.tag)
755 ... except ET.ParseError as v:
756 ... print(v)
757 junk after document element: line 1, column 12
758 """
759
760def writefile():
761 """
762 >>> elem = ET.Element("tag")
763 >>> elem.text = "text"
764 >>> serialize(elem)
765 '<tag>text</tag>'
766 >>> ET.SubElement(elem, "subtag").text = "subtext"
767 >>> serialize(elem)
768 '<tag>text<subtag>subtext</subtag></tag>'
769
770 Test tag suppression
771 >>> elem.tag = None
772 >>> serialize(elem)
773 'text<subtag>subtext</subtag>'
774 >>> elem.insert(0, ET.Comment("comment"))
775 >>> serialize(elem) # assumes 1.3
776 'text<!--comment--><subtag>subtext</subtag>'
777 >>> elem[0] = ET.PI("key", "value")
778 >>> serialize(elem)
779 'text<?key value?><subtag>subtext</subtag>'
780 """
781
782def custom_builder():
783 """
784 Test parser w. custom builder.
785
Antoine Pitroub86680e2010-10-14 21:15:17 +0000786 >>> with open(SIMPLE_XMLFILE) as f:
787 ... data = f.read()
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000788 >>> class Builder:
789 ... def start(self, tag, attrib):
790 ... print("start", tag)
791 ... def end(self, tag):
792 ... print("end", tag)
793 ... def data(self, text):
794 ... pass
795 >>> builder = Builder()
796 >>> parser = ET.XMLParser(target=builder)
Antoine Pitroub86680e2010-10-14 21:15:17 +0000797 >>> parser.feed(data)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000798 start root
799 start element
800 end element
801 start element
802 end element
803 start empty-element
804 end empty-element
805 end root
806
Antoine Pitroub86680e2010-10-14 21:15:17 +0000807 >>> with open(SIMPLE_NS_XMLFILE) as f:
808 ... data = f.read()
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000809 >>> class Builder:
810 ... def start(self, tag, attrib):
811 ... print("start", tag)
812 ... def end(self, tag):
813 ... print("end", tag)
814 ... def data(self, text):
815 ... pass
816 ... def pi(self, target, data):
817 ... print("pi", target, repr(data))
818 ... def comment(self, data):
819 ... print("comment", repr(data))
820 >>> builder = Builder()
821 >>> parser = ET.XMLParser(target=builder)
Antoine Pitroub86680e2010-10-14 21:15:17 +0000822 >>> parser.feed(data)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000823 pi pi 'data'
824 comment ' comment '
825 start {namespace}root
826 start {namespace}element
827 end {namespace}element
828 start {namespace}element
829 end {namespace}element
830 start {namespace}empty-element
831 end {namespace}empty-element
832 end {namespace}root
833
834 """
835
836def getchildren():
837 """
838 Test Element.getchildren()
839
Antoine Pitroub86680e2010-10-14 21:15:17 +0000840 >>> with open(SIMPLE_XMLFILE, "rb") as f:
841 ... tree = ET.parse(f)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000842 >>> for elem in tree.getroot().iter():
843 ... summarize_list(elem.getchildren())
844 ['element', 'element', 'empty-element']
845 []
846 []
847 []
848 >>> for elem in tree.getiterator():
849 ... summarize_list(elem.getchildren())
850 ['element', 'element', 'empty-element']
851 []
852 []
853 []
854
855 >>> elem = ET.XML(SAMPLE_XML)
856 >>> len(elem.getchildren())
857 3
858 >>> len(elem[2].getchildren())
859 1
860 >>> elem[:] == elem.getchildren()
861 True
862 >>> child1 = elem[0]
863 >>> child2 = elem[2]
864 >>> del elem[1:2]
865 >>> len(elem.getchildren())
866 2
867 >>> child1 == elem[0]
868 True
869 >>> child2 == elem[1]
870 True
871 >>> elem[0:2] = [child2, child1]
872 >>> child2 == elem[0]
873 True
874 >>> child1 == elem[1]
875 True
876 >>> child1 == elem[0]
877 False
878 >>> elem.clear()
879 >>> elem.getchildren()
880 []
881 """
882
883def writestring():
884 """
885 >>> elem = ET.XML("<html><body>text</body></html>")
886 >>> ET.tostring(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000887 b'<html><body>text</body></html>'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000888 >>> elem = ET.fromstring("<html><body>text</body></html>")
889 >>> ET.tostring(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000890 b'<html><body>text</body></html>'
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000891 """
892
893def check_encoding(encoding):
894 """
895 >>> check_encoding("ascii")
896 >>> check_encoding("us-ascii")
897 >>> check_encoding("iso-8859-1")
898 >>> check_encoding("iso-8859-15")
899 >>> check_encoding("cp437")
900 >>> check_encoding("mac-roman")
Fredrik Lundh8911ca3d2005-12-16 22:07:17 +0000901 """
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000902 ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
903
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000904def encoding():
Antoine Pitrou99f69ee2010-02-09 17:25:47 +0000905 r"""
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000906 Test encoding issues.
Antoine Pitrou99f69ee2010-02-09 17:25:47 +0000907
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000908 >>> elem = ET.Element("tag")
909 >>> elem.text = "abc"
910 >>> serialize(elem)
911 '<tag>abc</tag>'
912 >>> serialize(elem, encoding="utf-8")
913 b'<tag>abc</tag>'
914 >>> serialize(elem, encoding="us-ascii")
915 b'<tag>abc</tag>'
916 >>> serialize(elem, encoding="iso-8859-1")
917 b"<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
918
919 >>> elem.text = "<&\"\'>"
920 >>> serialize(elem)
921 '<tag>&lt;&amp;"\'&gt;</tag>'
922 >>> serialize(elem, encoding="utf-8")
923 b'<tag>&lt;&amp;"\'&gt;</tag>'
924 >>> serialize(elem, encoding="us-ascii") # cdata characters
925 b'<tag>&lt;&amp;"\'&gt;</tag>'
926 >>> serialize(elem, encoding="iso-8859-1")
927 b'<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
928
929 >>> elem.attrib["key"] = "<&\"\'>"
930 >>> elem.text = None
931 >>> serialize(elem)
932 '<tag key="&lt;&amp;&quot;\'&gt;" />'
933 >>> serialize(elem, encoding="utf-8")
934 b'<tag key="&lt;&amp;&quot;\'&gt;" />'
935 >>> serialize(elem, encoding="us-ascii")
936 b'<tag key="&lt;&amp;&quot;\'&gt;" />'
937 >>> serialize(elem, encoding="iso-8859-1")
938 b'<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;" />'
939
940 >>> elem.text = '\xe5\xf6\xf6<>'
941 >>> elem.attrib.clear()
942 >>> serialize(elem)
943 '<tag>\xe5\xf6\xf6&lt;&gt;</tag>'
944 >>> serialize(elem, encoding="utf-8")
945 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
946 >>> serialize(elem, encoding="us-ascii")
947 b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
948 >>> serialize(elem, encoding="iso-8859-1")
949 b"<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
950
951 >>> elem.attrib["key"] = '\xe5\xf6\xf6<>'
952 >>> elem.text = None
953 >>> serialize(elem)
954 '<tag key="\xe5\xf6\xf6&lt;&gt;" />'
955 >>> serialize(elem, encoding="utf-8")
956 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
957 >>> serialize(elem, encoding="us-ascii")
958 b'<tag key="&#229;&#246;&#246;&lt;&gt;" />'
959 >>> serialize(elem, encoding="iso-8859-1")
960 b'<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
961 """
962
963def methods():
964 r"""
965 Test serialization methods.
966
967 >>> e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
968 >>> e.tail = "\n"
969 >>> serialize(e)
970 '<html><link /><script>1 &lt; 2</script></html>\n'
971 >>> serialize(e, method=None)
972 '<html><link /><script>1 &lt; 2</script></html>\n'
973 >>> serialize(e, method="xml")
974 '<html><link /><script>1 &lt; 2</script></html>\n'
975 >>> serialize(e, method="html")
976 '<html><link><script>1 < 2</script></html>\n'
977 >>> serialize(e, method="text")
978 '1 < 2\n'
979 """
980
981def iterators():
982 """
983 Test iterators.
984
985 >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
986 >>> summarize_list(e.iter())
987 ['html', 'body', 'i']
988 >>> summarize_list(e.find("body").iter())
989 ['body', 'i']
990 >>> summarize(next(e.iter()))
991 'html'
992 >>> "".join(e.itertext())
993 'this is a paragraph...'
994 >>> "".join(e.find("body").itertext())
995 'this is a paragraph.'
996 >>> next(e.itertext())
997 'this is a '
998
999 Method iterparse should return an iterator. See bug 6472.
1000
1001 >>> sourcefile = serialize(e, to_string=False)
1002 >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
1003 ('end', <Element 'i' at 0x...>)
1004
1005 >>> tree = ET.ElementTree(None)
1006 >>> tree.iter()
1007 Traceback (most recent call last):
1008 AttributeError: 'NoneType' object has no attribute 'iter'
1009 """
1010
1011ENTITY_XML = """\
1012<!DOCTYPE points [
1013<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
1014%user-entities;
1015]>
1016<document>&entity;</document>
1017"""
1018
1019def entity():
1020 """
1021 Test entity handling.
1022
1023 1) good entities
1024
1025 >>> e = ET.XML("<document title='&#x8230;'>test</document>")
1026 >>> serialize(e, encoding="us-ascii")
1027 b'<document title="&#33328;">test</document>'
1028 >>> serialize(e)
1029 '<document title="\u8230">test</document>'
1030
1031 2) bad entities
1032
1033 >>> normalize_exception(ET.XML, "<document>&entity;</document>")
1034 Traceback (most recent call last):
1035 ParseError: undefined entity: line 1, column 10
1036
1037 >>> normalize_exception(ET.XML, ENTITY_XML)
1038 Traceback (most recent call last):
1039 ParseError: undefined entity &entity;: line 5, column 10
1040
1041 3) custom entity
1042
1043 >>> parser = ET.XMLParser()
1044 >>> parser.entity["entity"] = "text"
1045 >>> parser.feed(ENTITY_XML)
1046 >>> root = parser.close()
1047 >>> serialize(root)
1048 '<document>text</document>'
1049 """
1050
1051def error(xml):
1052 """
1053
1054 Test error handling.
1055
1056 >>> issubclass(ET.ParseError, SyntaxError)
1057 True
1058 >>> error("foo").position
1059 (1, 0)
1060 >>> error("<tag>&foo;</tag>").position
1061 (1, 5)
1062 >>> error("foobar<").position
1063 (1, 6)
1064
1065 """
1066 try:
1067 ET.XML(xml)
1068 except ET.ParseError:
1069 return sys.exc_info()[1]
1070
1071def namespace():
1072 """
1073 Test namespace issues.
1074
1075 1) xml namespace
1076
1077 >>> elem = ET.XML("<tag xml:lang='en' />")
1078 >>> serialize(elem) # 1.1
1079 '<tag xml:lang="en" />'
1080
1081 2) other "well-known" namespaces
1082
1083 >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1084 >>> serialize(elem) # 2.1
1085 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
1086
1087 >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1088 >>> serialize(elem) # 2.2
1089 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
1090
1091 >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1092 >>> serialize(elem) # 2.3
1093 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
1094
1095 3) unknown namespaces
1096 >>> elem = ET.XML(SAMPLE_XML_NS)
1097 >>> print(serialize(elem))
1098 <ns0:body xmlns:ns0="http://effbot.org/ns">
1099 <ns0:tag>text</ns0:tag>
1100 <ns0:tag />
1101 <ns0:section>
1102 <ns0:tag>subtext</ns0:tag>
1103 </ns0:section>
1104 </ns0:body>
1105 """
1106
1107def qname():
1108 """
1109 Test QName handling.
1110
1111 1) decorated tags
1112
1113 >>> elem = ET.Element("{uri}tag")
1114 >>> serialize(elem) # 1.1
1115 '<ns0:tag xmlns:ns0="uri" />'
1116 >>> elem = ET.Element(ET.QName("{uri}tag"))
1117 >>> serialize(elem) # 1.2
1118 '<ns0:tag xmlns:ns0="uri" />'
1119 >>> elem = ET.Element(ET.QName("uri", "tag"))
1120 >>> serialize(elem) # 1.3
1121 '<ns0:tag xmlns:ns0="uri" />'
Senthil Kumaranec30b3d2010-11-09 02:36:59 +00001122 >>> elem = ET.Element(ET.QName("uri", "tag"))
1123 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1124 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1125 >>> serialize(elem) # 1.4
1126 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>'
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127
1128 2) decorated attributes
1129
1130 >>> elem.clear()
1131 >>> elem.attrib["{uri}key"] = "value"
1132 >>> serialize(elem) # 2.1
1133 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1134
1135 >>> elem.clear()
1136 >>> elem.attrib[ET.QName("{uri}key")] = "value"
1137 >>> serialize(elem) # 2.2
1138 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1139
1140 3) decorated values are not converted by default, but the
1141 QName wrapper can be used for values
1142
1143 >>> elem.clear()
1144 >>> elem.attrib["{uri}key"] = "{uri}value"
1145 >>> serialize(elem) # 3.1
1146 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />'
1147
1148 >>> elem.clear()
1149 >>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
1150 >>> serialize(elem) # 3.2
1151 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />'
1152
1153 >>> elem.clear()
1154 >>> subelem = ET.Element("tag")
1155 >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1156 >>> elem.append(subelem)
1157 >>> elem.append(subelem)
1158 >>> serialize(elem) # 3.3
1159 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
1160
1161 4) Direct QName tests
1162
1163 >>> str(ET.QName('ns', 'tag'))
1164 '{ns}tag'
1165 >>> str(ET.QName('{ns}tag'))
1166 '{ns}tag'
1167 >>> q1 = ET.QName('ns', 'tag')
1168 >>> q2 = ET.QName('ns', 'tag')
1169 >>> q1 == q2
1170 True
1171 >>> q2 = ET.QName('ns', 'other-tag')
1172 >>> q1 == q2
1173 False
1174 >>> q1 == 'ns:tag'
1175 False
1176 >>> q1 == '{ns}tag'
1177 True
1178 """
1179
1180def doctype_public():
1181 """
1182 Test PUBLIC doctype.
1183
1184 >>> elem = ET.XML('<!DOCTYPE html PUBLIC'
1185 ... ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1186 ... ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1187 ... '<html>text</html>')
1188
1189 """
1190
1191def xpath_tokenizer(p):
1192 """
1193 Test the XPath tokenizer.
1194
1195 >>> # tests from the xml specification
1196 >>> xpath_tokenizer("*")
1197 ['*']
1198 >>> xpath_tokenizer("text()")
1199 ['text', '()']
1200 >>> xpath_tokenizer("@name")
1201 ['@', 'name']
1202 >>> xpath_tokenizer("@*")
1203 ['@', '*']
1204 >>> xpath_tokenizer("para[1]")
1205 ['para', '[', '1', ']']
1206 >>> xpath_tokenizer("para[last()]")
1207 ['para', '[', 'last', '()', ']']
1208 >>> xpath_tokenizer("*/para")
1209 ['*', '/', 'para']
1210 >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
1211 ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
1212 >>> xpath_tokenizer("chapter//para")
1213 ['chapter', '//', 'para']
1214 >>> xpath_tokenizer("//para")
1215 ['//', 'para']
1216 >>> xpath_tokenizer("//olist/item")
1217 ['//', 'olist', '/', 'item']
1218 >>> xpath_tokenizer(".")
1219 ['.']
1220 >>> xpath_tokenizer(".//para")
1221 ['.', '//', 'para']
1222 >>> xpath_tokenizer("..")
1223 ['..']
1224 >>> xpath_tokenizer("../@lang")
1225 ['..', '/', '@', 'lang']
1226 >>> xpath_tokenizer("chapter[title]")
1227 ['chapter', '[', 'title', ']']
1228 >>> xpath_tokenizer("employee[@secretary and @assistant]")
1229 ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
1230
1231 >>> # additional tests
1232 >>> xpath_tokenizer("{http://spam}egg")
1233 ['{http://spam}egg']
1234 >>> xpath_tokenizer("./spam.egg")
1235 ['.', '/', 'spam.egg']
1236 >>> xpath_tokenizer(".//{http://spam}egg")
1237 ['.', '//', '{http://spam}egg']
1238 """
1239 from xml.etree import ElementPath
1240 out = []
1241 for op, tag in ElementPath.xpath_tokenizer(p):
1242 out.append(op or tag)
1243 return out
1244
1245def processinginstruction():
1246 """
1247 Test ProcessingInstruction directly
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001248
1249 >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
Florent Xiclunac17f1722010-08-08 19:48:29 +00001250 b'<?test instruction?>'
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001251 >>> ET.tostring(ET.PI('test', 'instruction'))
Florent Xiclunac17f1722010-08-08 19:48:29 +00001252 b'<?test instruction?>'
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001253
1254 Issue #2746
1255
1256 >>> ET.tostring(ET.PI('test', '<testing&>'))
Florent Xiclunac17f1722010-08-08 19:48:29 +00001257 b'<?test <testing&>?>'
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001258 >>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1')
1259 b"<?xml version='1.0' encoding='latin-1'?>\\n<?test <testing&>\\xe3?>"
Antoine Pitrouc77dd322010-02-09 16:51:16 +00001260 """
Fredrik Lundh8911ca3d2005-12-16 22:07:17 +00001261
Armin Rigo9ed73062005-12-14 18:10:45 +00001262#
1263# xinclude tests (samples from appendix C of the xinclude specification)
1264
1265XINCLUDE = {}
1266
1267XINCLUDE["C1.xml"] = """\
1268<?xml version='1.0'?>
1269<document xmlns:xi="http://www.w3.org/2001/XInclude">
1270 <p>120 Mz is adequate for an average home user.</p>
1271 <xi:include href="disclaimer.xml"/>
1272</document>
1273"""
1274
1275XINCLUDE["disclaimer.xml"] = """\
1276<?xml version='1.0'?>
1277<disclaimer>
1278 <p>The opinions represented herein represent those of the individual
1279 and should not be interpreted as official policy endorsed by this
1280 organization.</p>
1281</disclaimer>
1282"""
1283
1284XINCLUDE["C2.xml"] = """\
1285<?xml version='1.0'?>
1286<document xmlns:xi="http://www.w3.org/2001/XInclude">
1287 <p>This document has been accessed
1288 <xi:include href="count.txt" parse="text"/> times.</p>
1289</document>
1290"""
1291
1292XINCLUDE["count.txt"] = "324387"
1293
Florent Xiclunaba8a9862010-08-08 23:08:41 +00001294XINCLUDE["C2b.xml"] = """\
1295<?xml version='1.0'?>
1296<document xmlns:xi="http://www.w3.org/2001/XInclude">
1297 <p>This document has been <em>accessed</em>
1298 <xi:include href="count.txt" parse="text"/> times.</p>
1299</document>
1300"""
1301
Armin Rigo9ed73062005-12-14 18:10:45 +00001302XINCLUDE["C3.xml"] = """\
1303<?xml version='1.0'?>
1304<document xmlns:xi="http://www.w3.org/2001/XInclude">
1305 <p>The following is the source of the "data.xml" resource:</p>
1306 <example><xi:include href="data.xml" parse="text"/></example>
1307</document>
1308"""
1309
1310XINCLUDE["data.xml"] = """\
1311<?xml version='1.0'?>
1312<data>
1313 <item><![CDATA[Brooks & Shields]]></item>
1314</data>
1315"""
1316
1317XINCLUDE["C5.xml"] = """\
1318<?xml version='1.0'?>
1319<div xmlns:xi="http://www.w3.org/2001/XInclude">
1320 <xi:include href="example.txt" parse="text">
1321 <xi:fallback>
1322 <xi:include href="fallback-example.txt" parse="text">
1323 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1324 </xi:include>
1325 </xi:fallback>
1326 </xi:include>
1327</div>
1328"""
1329
1330XINCLUDE["default.xml"] = """\
1331<?xml version='1.0'?>
1332<document xmlns:xi="http://www.w3.org/2001/XInclude">
1333 <p>Example.</p>
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001334 <xi:include href="{}"/>
Armin Rigo9ed73062005-12-14 18:10:45 +00001335</document>
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001336""".format(html.escape(SIMPLE_XMLFILE, True))
Armin Rigo9ed73062005-12-14 18:10:45 +00001337
1338def xinclude_loader(href, parse="xml", encoding=None):
1339 try:
1340 data = XINCLUDE[href]
1341 except KeyError:
1342 raise IOError("resource not found")
1343 if parse == "xml":
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001344 from xml.etree.ElementTree import XML
1345 return XML(data)
Armin Rigo9ed73062005-12-14 18:10:45 +00001346 return data
1347
1348def xinclude():
1349 r"""
1350 Basic inclusion example (XInclude C.1)
1351
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001352 >>> from xml.etree import ElementTree as ET
1353 >>> from xml.etree import ElementInclude
Armin Rigo9ed73062005-12-14 18:10:45 +00001354
1355 >>> document = xinclude_loader("C1.xml")
1356 >>> ElementInclude.include(document, xinclude_loader)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001357 >>> print(serialize(document)) # C1
Armin Rigo9ed73062005-12-14 18:10:45 +00001358 <document>
1359 <p>120 Mz is adequate for an average home user.</p>
1360 <disclaimer>
1361 <p>The opinions represented herein represent those of the individual
1362 and should not be interpreted as official policy endorsed by this
1363 organization.</p>
1364 </disclaimer>
1365 </document>
1366
1367 Textual inclusion example (XInclude C.2)
1368
1369 >>> document = xinclude_loader("C2.xml")
1370 >>> ElementInclude.include(document, xinclude_loader)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001371 >>> print(serialize(document)) # C2
Armin Rigo9ed73062005-12-14 18:10:45 +00001372 <document>
1373 <p>This document has been accessed
1374 324387 times.</p>
1375 </document>
1376
Florent Xiclunaba8a9862010-08-08 23:08:41 +00001377 Textual inclusion after sibling element (based on modified XInclude C.2)
1378
1379 >>> document = xinclude_loader("C2b.xml")
1380 >>> ElementInclude.include(document, xinclude_loader)
1381 >>> print(serialize(document)) # C2b
1382 <document>
1383 <p>This document has been <em>accessed</em>
1384 324387 times.</p>
1385 </document>
1386
Armin Rigo9ed73062005-12-14 18:10:45 +00001387 Textual inclusion of XML example (XInclude C.3)
1388
1389 >>> document = xinclude_loader("C3.xml")
1390 >>> ElementInclude.include(document, xinclude_loader)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001391 >>> print(serialize(document)) # C3
Armin Rigo9ed73062005-12-14 18:10:45 +00001392 <document>
1393 <p>The following is the source of the "data.xml" resource:</p>
1394 <example>&lt;?xml version='1.0'?&gt;
1395 &lt;data&gt;
1396 &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
1397 &lt;/data&gt;
1398 </example>
1399 </document>
1400
1401 Fallback example (XInclude C.5)
1402 Note! Fallback support is not yet implemented
1403
1404 >>> document = xinclude_loader("C5.xml")
1405 >>> ElementInclude.include(document, xinclude_loader)
1406 Traceback (most recent call last):
1407 IOError: resource not found
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001408 >>> # print(serialize(document)) # C5
1409 """
1410
1411def xinclude_default():
1412 """
1413 >>> from xml.etree import ElementInclude
1414
1415 >>> document = xinclude_loader("default.xml")
1416 >>> ElementInclude.include(document)
1417 >>> print(serialize(document)) # default
1418 <document>
1419 <p>Example.</p>
1420 <root>
1421 <element key="value">text</element>
1422 <element>text</element>tail
1423 <empty-element />
1424 </root>
1425 </document>
1426 """
1427
1428#
1429# badly formatted xi:include tags
1430
1431XINCLUDE_BAD = {}
1432
1433XINCLUDE_BAD["B1.xml"] = """\
1434<?xml version='1.0'?>
1435<document xmlns:xi="http://www.w3.org/2001/XInclude">
1436 <p>120 Mz is adequate for an average home user.</p>
1437 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1438</document>
1439"""
1440
1441XINCLUDE_BAD["B2.xml"] = """\
1442<?xml version='1.0'?>
1443<div xmlns:xi="http://www.w3.org/2001/XInclude">
1444 <xi:fallback></xi:fallback>
1445</div>
1446"""
1447
1448def xinclude_failures():
1449 r"""
1450 Test failure to locate included XML file.
1451
1452 >>> from xml.etree import ElementInclude
1453
1454 >>> def none_loader(href, parser, encoding=None):
1455 ... return None
1456
1457 >>> document = ET.XML(XINCLUDE["C1.xml"])
1458 >>> ElementInclude.include(document, loader=none_loader)
1459 Traceback (most recent call last):
1460 xml.etree.ElementInclude.FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
1461
1462 Test failure to locate included text file.
1463
1464 >>> document = ET.XML(XINCLUDE["C2.xml"])
1465 >>> ElementInclude.include(document, loader=none_loader)
1466 Traceback (most recent call last):
1467 xml.etree.ElementInclude.FatalIncludeError: cannot load 'count.txt' as 'text'
1468
1469 Test bad parse type.
1470
1471 >>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
1472 >>> ElementInclude.include(document, loader=none_loader)
1473 Traceback (most recent call last):
1474 xml.etree.ElementInclude.FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
1475
1476 Test xi:fallback outside xi:include.
1477
1478 >>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
1479 >>> ElementInclude.include(document, loader=none_loader)
1480 Traceback (most recent call last):
1481 xml.etree.ElementInclude.FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
1482 """
1483
1484# --------------------------------------------------------------------
1485# reported bugs
1486
1487def bug_xmltoolkit21():
1488 """
1489
1490 marshaller gives obscure errors for non-string values
1491
1492 >>> elem = ET.Element(123)
1493 >>> serialize(elem) # tag
1494 Traceback (most recent call last):
1495 TypeError: cannot serialize 123 (type int)
1496 >>> elem = ET.Element("elem")
1497 >>> elem.text = 123
1498 >>> serialize(elem) # text
1499 Traceback (most recent call last):
1500 TypeError: cannot serialize 123 (type int)
1501 >>> elem = ET.Element("elem")
1502 >>> elem.tail = 123
1503 >>> serialize(elem) # tail
1504 Traceback (most recent call last):
1505 TypeError: cannot serialize 123 (type int)
1506 >>> elem = ET.Element("elem")
1507 >>> elem.set(123, "123")
1508 >>> serialize(elem) # attribute key
1509 Traceback (most recent call last):
1510 TypeError: cannot serialize 123 (type int)
1511 >>> elem = ET.Element("elem")
1512 >>> elem.set("123", 123)
1513 >>> serialize(elem) # attribute value
1514 Traceback (most recent call last):
1515 TypeError: cannot serialize 123 (type int)
Armin Rigo9ed73062005-12-14 18:10:45 +00001516
1517 """
1518
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001519def bug_xmltoolkit25():
1520 """
1521
1522 typo in ElementTree.findtext
1523
1524 >>> elem = ET.XML(SAMPLE_XML)
1525 >>> tree = ET.ElementTree(elem)
1526 >>> tree.findtext("tag")
1527 'text'
1528 >>> tree.findtext("section/tag")
1529 'subtext'
1530
1531 """
1532
1533def bug_xmltoolkit28():
1534 """
1535
1536 .//tag causes exceptions
1537
1538 >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1539 >>> summarize_list(tree.findall(".//thead"))
1540 []
1541 >>> summarize_list(tree.findall(".//tbody"))
1542 ['tbody']
1543
1544 """
1545
1546def bug_xmltoolkitX1():
1547 """
1548
1549 dump() doesn't flush the output buffer
1550
1551 >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1552 >>> ET.dump(tree); print("tail")
1553 <doc><table><tbody /></table></doc>
1554 tail
1555
1556 """
1557
1558def bug_xmltoolkit39():
1559 """
1560
1561 non-ascii element and attribute names doesn't work
1562
1563 >>> tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\\xe4g />")
1564 >>> ET.tostring(tree, "utf-8")
1565 b'<t\\xc3\\xa4g />'
1566
1567 >>> tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><tag \\xe4ttr='v&#228;lue' />")
1568 >>> tree.attrib
1569 {'\\xe4ttr': 'v\\xe4lue'}
1570 >>> ET.tostring(tree, "utf-8")
1571 b'<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1572
1573 >>> tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\\xe4g>text</t\\xe4g>")
1574 >>> ET.tostring(tree, "utf-8")
1575 b'<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
1576
1577 >>> tree = ET.Element("t\u00e4g")
1578 >>> ET.tostring(tree, "utf-8")
1579 b'<t\\xc3\\xa4g />'
1580
1581 >>> tree = ET.Element("tag")
1582 >>> tree.set("\u00e4ttr", "v\u00e4lue")
1583 >>> ET.tostring(tree, "utf-8")
1584 b'<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1585
1586 """
1587
1588def bug_xmltoolkit54():
1589 """
1590
1591 problems handling internally defined entities
1592
1593 >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]><doc>&ldots;</doc>")
1594 >>> serialize(e, encoding="us-ascii")
1595 b'<doc>&#33328;</doc>'
1596 >>> serialize(e)
1597 '<doc>\u8230</doc>'
1598
1599 """
1600
1601def bug_xmltoolkit55():
1602 """
1603
1604 make sure we're reporting the first error, not the last
1605
1606 >>> normalize_exception(ET.XML, b"<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
1607 Traceback (most recent call last):
1608 ParseError: undefined entity &ldots;: line 1, column 36
1609
1610 """
1611
1612class ExceptionFile:
1613 def read(self, x):
1614 raise IOError
1615
1616def xmltoolkit60():
1617 """
1618
1619 Handle crash in stream source.
1620 >>> tree = ET.parse(ExceptionFile())
1621 Traceback (most recent call last):
1622 IOError
1623
1624 """
1625
1626XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?>
1627<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1628<patent-application-publication>
1629<subdoc-abstract>
1630<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1631</subdoc-abstract>
1632</patent-application-publication>"""
1633
1634
1635def xmltoolkit62():
1636 """
1637
1638 Don't crash when using custom entities.
1639
1640 >>> xmltoolkit62()
1641 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
1642
1643 """
1644 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
1645 parser = ET.XMLTreeBuilder()
1646 parser.entity.update(ENTITIES)
1647 parser.feed(XMLTOOLKIT62_DOC)
1648 t = parser.close()
1649 return t.find('.//paragraph').text
1650
1651def xmltoolkit63():
1652 """
1653
1654 Check reference leak.
1655 >>> xmltoolkit63()
1656 >>> count = sys.getrefcount(None)
1657 >>> for i in range(1000):
1658 ... xmltoolkit63()
1659 >>> sys.getrefcount(None) - count
1660 0
1661
1662 """
1663 tree = ET.TreeBuilder()
1664 tree.start("tag", {})
1665 tree.data("text")
1666 tree.end("tag")
1667
1668# --------------------------------------------------------------------
1669
1670
1671def bug_200708_newline():
1672 r"""
1673
1674 Preserve newlines in attributes.
1675
1676 >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
1677 >>> ET.tostring(e)
Florent Xiclunac17f1722010-08-08 19:48:29 +00001678 b'<SomeTag text="def _f():&#10; return 3&#10;" />'
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001679 >>> ET.XML(ET.tostring(e)).get("text")
1680 'def _f():\n return 3\n'
1681 >>> ET.tostring(ET.XML(ET.tostring(e)))
Florent Xiclunac17f1722010-08-08 19:48:29 +00001682 b'<SomeTag text="def _f():&#10; return 3&#10;" />'
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001683
1684 """
1685
1686def bug_200708_close():
1687 """
1688
1689 Test default builder.
1690 >>> parser = ET.XMLParser() # default
1691 >>> parser.feed("<element>some text</element>")
1692 >>> summarize(parser.close())
1693 'element'
1694
1695 Test custom builder.
1696 >>> class EchoTarget:
1697 ... def close(self):
1698 ... return ET.Element("element") # simulate root
1699 >>> parser = ET.XMLParser(EchoTarget())
1700 >>> parser.feed("<element>some text</element>")
1701 >>> summarize(parser.close())
1702 'element'
1703
1704 """
1705
1706def bug_200709_default_namespace():
1707 """
1708
1709 >>> e = ET.Element("{default}elem")
1710 >>> s = ET.SubElement(e, "{default}elem")
1711 >>> serialize(e, default_namespace="default") # 1
1712 '<elem xmlns="default"><elem /></elem>'
1713
1714 >>> e = ET.Element("{default}elem")
1715 >>> s = ET.SubElement(e, "{default}elem")
1716 >>> s = ET.SubElement(e, "{not-default}elem")
1717 >>> serialize(e, default_namespace="default") # 2
1718 '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
1719
1720 >>> e = ET.Element("{default}elem")
1721 >>> s = ET.SubElement(e, "{default}elem")
1722 >>> s = ET.SubElement(e, "elem") # unprefixed name
1723 >>> serialize(e, default_namespace="default") # 3
1724 Traceback (most recent call last):
1725 ValueError: cannot use non-qualified names with default_namespace option
1726
1727 """
1728
1729def bug_200709_register_namespace():
1730 """
1731
1732 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
Florent Xiclunac17f1722010-08-08 19:48:29 +00001733 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001734 >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1735 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
Florent Xiclunac17f1722010-08-08 19:48:29 +00001736 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001737
1738 And the Dublin Core namespace is in the default list:
1739
1740 >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
Florent Xiclunac17f1722010-08-08 19:48:29 +00001741 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742
1743 """
1744
1745def bug_200709_element_comment():
1746 """
1747
1748 Not sure if this can be fixed, really (since the serializer needs
1749 ET.Comment, not cET.comment).
1750
1751 >>> a = ET.Element('a')
1752 >>> a.append(ET.Comment('foo'))
1753 >>> a[0].tag == ET.Comment
1754 True
1755
1756 >>> a = ET.Element('a')
1757 >>> a.append(ET.PI('foo'))
1758 >>> a[0].tag == ET.PI
1759 True
1760
1761 """
1762
1763def bug_200709_element_insert():
1764 """
1765
1766 >>> a = ET.Element('a')
1767 >>> b = ET.SubElement(a, 'b')
1768 >>> c = ET.SubElement(a, 'c')
1769 >>> d = ET.Element('d')
1770 >>> a.insert(0, d)
1771 >>> summarize_list(a)
1772 ['d', 'b', 'c']
1773 >>> a.insert(-1, d)
1774 >>> summarize_list(a)
1775 ['d', 'b', 'd', 'c']
1776
1777 """
1778
1779def bug_200709_iter_comment():
1780 """
1781
1782 >>> a = ET.Element('a')
1783 >>> b = ET.SubElement(a, 'b')
1784 >>> comment_b = ET.Comment("TEST-b")
1785 >>> b.append(comment_b)
1786 >>> summarize_list(a.iter(ET.Comment))
1787 ['<Comment>']
1788
1789 """
1790
1791# --------------------------------------------------------------------
1792# reported on bugs.python.org
1793
1794def bug_1534630():
1795 """
1796
1797 >>> bob = ET.TreeBuilder()
1798 >>> e = bob.data("data")
1799 >>> e = bob.start("tag", {})
1800 >>> e = bob.end("tag")
1801 >>> e = bob.close()
1802 >>> serialize(e)
1803 '<tag />'
1804
1805 """
1806
1807def check_issue6233():
1808 """
1809
1810 >>> e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>")
1811 >>> ET.tostring(e, 'ascii')
1812 b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1813 >>> e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>")
1814 >>> ET.tostring(e, 'ascii')
1815 b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1816
1817 """
1818
1819def check_issue3151():
1820 """
1821
1822 >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1823 >>> e.tag
1824 '{${stuff}}localname'
1825 >>> t = ET.ElementTree(e)
1826 >>> ET.tostring(e)
Florent Xiclunac17f1722010-08-08 19:48:29 +00001827 b'<ns0:localname xmlns:ns0="${stuff}" />'
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001828
1829 """
1830
1831def check_issue6565():
1832 """
1833
1834 >>> elem = ET.XML("<body><tag/></body>")
1835 >>> summarize_list(elem)
1836 ['tag']
1837 >>> newelem = ET.XML(SAMPLE_XML)
1838 >>> elem[:] = newelem[:]
1839 >>> summarize_list(elem)
1840 ['tag', 'tag', 'section']
1841
1842 """
1843
Georg Brandl90b20672010-12-28 10:38:33 +00001844def check_issue10777():
1845 """
1846 Registering a namespace twice caused a "dictionary changed size during
1847 iteration" bug.
1848
1849 >>> ET.register_namespace('test10777', 'http://myuri/')
1850 >>> ET.register_namespace('test10777', 'http://myuri/')
1851 """
1852
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001853# --------------------------------------------------------------------
1854
1855
1856class CleanContext(object):
1857 """Provide default namespace mapping and path cache."""
Florent Xicluna41fe6152010-04-02 18:52:12 +00001858 checkwarnings = None
1859
1860 def __init__(self, quiet=False):
Georg Brandlf99a15c2010-10-14 07:32:52 +00001861 if sys.flags.optimize >= 2:
1862 # under -OO, doctests cannot be run and therefore not all warnings
1863 # will be emitted
1864 quiet = True
Florent Xicluna41fe6152010-04-02 18:52:12 +00001865 deprecations = (
1866 # Search behaviour is broken if search path starts with "/".
1867 ("This search is broken in 1.3 and earlier, and will be fixed "
1868 "in a future version. If you rely on the current behaviour, "
1869 "change it to '.+'", FutureWarning),
1870 # Element.getchildren() and Element.getiterator() are deprecated.
1871 ("This method will be removed in future versions. "
1872 "Use .+ instead.", DeprecationWarning),
1873 ("This method will be removed in future versions. "
1874 "Use .+ instead.", PendingDeprecationWarning),
1875 # XMLParser.doctype() is deprecated.
1876 ("This method of XMLParser is deprecated. Define doctype.. "
1877 "method on the TreeBuilder target.", DeprecationWarning))
1878 self.checkwarnings = support.check_warnings(*deprecations, quiet=quiet)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001879
1880 def __enter__(self):
1881 from xml.etree import ElementTree
1882 self._nsmap = ElementTree._namespace_map
1883 self._path_cache = ElementTree.ElementPath._cache
1884 # Copy the default namespace mapping
1885 ElementTree._namespace_map = self._nsmap.copy()
1886 # Copy the path cache (should be empty)
1887 ElementTree.ElementPath._cache = self._path_cache.copy()
Florent Xicluna41fe6152010-04-02 18:52:12 +00001888 self.checkwarnings.__enter__()
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001889
1890 def __exit__(self, *args):
1891 from xml.etree import ElementTree
1892 # Restore mapping and path cache
1893 ElementTree._namespace_map = self._nsmap
1894 ElementTree.ElementPath._cache = self._path_cache
Florent Xicluna41fe6152010-04-02 18:52:12 +00001895 self.checkwarnings.__exit__(*args)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001896
1897
1898def test_main(module_name='xml.etree.ElementTree'):
Armin Rigo9ed73062005-12-14 18:10:45 +00001899 from test import test_xml_etree
Florent Xicluna41fe6152010-04-02 18:52:12 +00001900
1901 use_py_module = (module_name == 'xml.etree.ElementTree')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001902
1903 # The same doctests are used for both the Python and the C implementations
1904 assert test_xml_etree.ET.__name__ == module_name
1905
Florent Xicluna41fe6152010-04-02 18:52:12 +00001906 # XXX the C module should give the same warnings as the Python module
1907 with CleanContext(quiet=not use_py_module):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001908 support.run_doctest(test_xml_etree, verbosity=True)
1909
1910 # The module should not be changed by the tests
1911 assert test_xml_etree.ET.__name__ == module_name
Armin Rigo9ed73062005-12-14 18:10:45 +00001912
1913if __name__ == '__main__':
1914 test_main()