blob: 201266a12d9544e273305ed5a5770bbb35975ff2 [file] [log] [blame]
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001# -*- coding: utf-8 -*-
2# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
3# to ensure consistency between the C implementation and the Python
4# implementation.
Florent Xicluna3e8c1892010-03-11 14:36:19 +00005#
6# For this purpose, the module-level "ET" symbol is temporarily
7# monkey-patched when running the "test_xml_etree_c" test suite.
Armin Rigo9ed73062005-12-14 18:10:45 +00008
Florent Xicluna26cc99d2010-03-31 21:21:54 +00009import cgi
Serhiy Storchaka68903b62017-04-02 16:55:43 +030010import copy
11import io
12import pickle
13import StringIO
14import sys
15import types
16import unittest
17import warnings
18import weakref
Armin Rigo9ed73062005-12-14 18:10:45 +000019
Serhiy Storchaka68903b62017-04-02 16:55:43 +030020from test import test_support as support
21from test.test_support import TESTFN, findfile, gc_collect, swap_attr
Armin Rigo9ed73062005-12-14 18:10:45 +000022
Serhiy Storchaka68903b62017-04-02 16:55:43 +030023# pyET is the pure-Python implementation.
24#
25# ET is pyET in test_xml_etree and is the C accelerated version in
26# test_xml_etree_c.
27from xml.etree import ElementTree as pyET
28ET = None
Florent Xicluna3e8c1892010-03-11 14:36:19 +000029
Florent Xicluna1b51c3d2010-03-13 12:41:48 +000030SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
31SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
Florent Xicluna13ba1a12010-03-13 11:18:49 +000032
Florent Xicluna3e8c1892010-03-11 14:36:19 +000033SAMPLE_XML = """\
Armin Rigo9ed73062005-12-14 18:10:45 +000034<body>
Florent Xicluna3e8c1892010-03-11 14:36:19 +000035 <tag class='a'>text</tag>
36 <tag class='b' />
Armin Rigo9ed73062005-12-14 18:10:45 +000037 <section>
Florent Xicluna3e8c1892010-03-11 14:36:19 +000038 <tag class='b' id='inner'>subtext</tag>
Armin Rigo9ed73062005-12-14 18:10:45 +000039 </section>
40</body>
41"""
42
Florent Xicluna3e8c1892010-03-11 14:36:19 +000043SAMPLE_SECTION = """\
44<section>
45 <tag class='b' id='inner'>subtext</tag>
46 <nexttag />
47 <nextsection>
48 <tag />
49 </nextsection>
50</section>
51"""
52
Armin Rigo9ed73062005-12-14 18:10:45 +000053SAMPLE_XML_NS = """
54<body xmlns="http://effbot.org/ns">
55 <tag>text</tag>
56 <tag />
57 <section>
58 <tag>subtext</tag>
59 </section>
60</body>
61"""
62
Serhiy Storchaka68903b62017-04-02 16:55:43 +030063SAMPLE_XML_NS_ELEMS = """
64<root>
65<h:table xmlns:h="hello">
66 <h:tr>
67 <h:td>Apples</h:td>
68 <h:td>Bananas</h:td>
69 </h:tr>
70</h:table>
Florent Xicluna3e8c1892010-03-11 14:36:19 +000071
Serhiy Storchaka68903b62017-04-02 16:55:43 +030072<f:table xmlns:f="foo">
73 <f:name>African Coffee Table</f:name>
74 <f:width>80</f:width>
75 <f:length>120</f:length>
76</f:table>
77</root>
78"""
Florent Xicluna3e8c1892010-03-11 14:36:19 +000079
80ENTITY_XML = """\
81<!DOCTYPE points [
82<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
83%user-entities;
84]>
85<document>&entity;</document>
86"""
87
Florent Xicluna3e8c1892010-03-11 14:36:19 +000088
Serhiy Storchaka68903b62017-04-02 16:55:43 +030089class ModuleTest(unittest.TestCase):
90 # TODO: this should be removed once we get rid of the global module vars
Florent Xicluna3e8c1892010-03-11 14:36:19 +000091
Serhiy Storchaka68903b62017-04-02 16:55:43 +030092 def test_sanity(self):
93 # Import sanity.
Florent Xicluna3e8c1892010-03-11 14:36:19 +000094
Serhiy Storchaka68903b62017-04-02 16:55:43 +030095 from xml.etree import ElementTree
96 from xml.etree import ElementInclude
97 from xml.etree import ElementPath
Florent Xicluna3e8c1892010-03-11 14:36:19 +000098
Florent Xicluna3e8c1892010-03-11 14:36:19 +000099
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300100def serialize(elem, to_string=True, **options):
101 file = StringIO.StringIO()
102 tree = ET.ElementTree(elem)
103 tree.write(file, **options)
104 if to_string:
105 return file.getvalue()
106 else:
107 file.seek(0)
108 return file
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000109
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300110def summarize_list(seq):
111 return [elem.tag for elem in seq]
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000112
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300113def normalize_crlf(tree):
114 for elem in tree.iter():
115 if elem.text:
116 elem.text = elem.text.replace("\r\n", "\n")
117 if elem.tail:
118 elem.tail = elem.tail.replace("\r\n", "\n")
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000119
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300120def python_only(test):
121 def wrapper(*args):
122 if ET is not pyET:
123 raise unittest.SkipTest('only for the Python version')
124 return test(*args)
125 return wrapper
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000126
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300127# --------------------------------------------------------------------
128# element tree tests
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000129
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300130class ElementTreeTest(unittest.TestCase):
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000131
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300132 def serialize_check(self, elem, expected):
133 self.assertEqual(serialize(elem), expected)
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000134
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300135 def test_interface(self):
136 # Test element tree interface.
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000137
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300138 def check_string(string):
139 len(string)
140 for char in string:
141 self.assertEqual(len(char), 1,
142 msg="expected one-character string, got %r" % char)
143 new_string = string + ""
144 new_string = string + " "
145 string[:0]
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000146
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300147 def check_mapping(mapping):
148 len(mapping)
149 keys = mapping.keys()
150 items = mapping.items()
151 for key in keys:
152 item = mapping[key]
153 mapping["key"] = "value"
154 self.assertEqual(mapping["key"], "value",
155 msg="expected value string, got %r" % mapping["key"])
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000156
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300157 def check_element(element):
158 self.assertTrue(ET.iselement(element), msg="not an element")
159 self.assertTrue(hasattr(element, "tag"), msg="no tag member")
160 self.assertTrue(hasattr(element, "attrib"), msg="no attrib member")
161 self.assertTrue(hasattr(element, "text"), msg="no text member")
162 self.assertTrue(hasattr(element, "tail"), msg="no tail member")
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000163
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300164 check_string(element.tag)
165 check_mapping(element.attrib)
166 if element.text is not None:
167 check_string(element.text)
168 if element.tail is not None:
169 check_string(element.tail)
170 for elem in element:
171 check_element(elem)
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000172
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300173 element = ET.Element("tag")
174 check_element(element)
175 tree = ET.ElementTree(element)
176 check_element(tree.getroot())
177 element = ET.Element("t\xe4g", key="value")
178 tree = ET.ElementTree(element)
179 self.assertRegexpMatches(repr(element), r"^<Element 't\\xe4g' at 0x.*>$")
180 element = ET.Element("tag", key="value")
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000181
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300182 # Make sure all standard element methods exist.
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000183
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300184 def check_method(method):
185 self.assertTrue(hasattr(method, '__call__'),
186 msg="%s not callable" % method)
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000187
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300188 check_method(element.append)
189 check_method(element.extend)
190 check_method(element.insert)
191 check_method(element.remove)
192 check_method(element.getchildren)
193 check_method(element.find)
194 check_method(element.iterfind)
195 check_method(element.findall)
196 check_method(element.findtext)
197 check_method(element.clear)
198 check_method(element.get)
199 check_method(element.set)
200 check_method(element.keys)
201 check_method(element.items)
202 check_method(element.iter)
203 check_method(element.itertext)
204 check_method(element.getiterator)
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000205
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300206 # These methods return an iterable. See bug 6472.
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000207
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300208 def check_iter(it):
209 check_method(it.next)
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000210
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300211 check_iter(element.iter("tag"))
212 check_iter(element.iterfind("tag"))
213 check_iter(element.iterfind("*"))
214 check_iter(tree.iter("tag"))
215 check_iter(tree.iterfind("tag"))
216 check_iter(tree.iterfind("*"))
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000217
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300218 # These aliases are provided:
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000219
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300220 self.assertEqual(ET.XML, ET.fromstring)
221 self.assertEqual(ET.PI, ET.ProcessingInstruction)
222 self.assertEqual(ET.XMLParser, ET.XMLTreeBuilder)
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000223
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300224 def test_set_attribute(self):
225 element = ET.Element('tag')
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000226
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300227 self.assertEqual(element.tag, 'tag')
228 element.tag = 'Tag'
229 self.assertEqual(element.tag, 'Tag')
230 element.tag = 'TAG'
231 self.assertEqual(element.tag, 'TAG')
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000232
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300233 self.assertIsNone(element.text)
234 element.text = 'Text'
235 self.assertEqual(element.text, 'Text')
236 element.text = 'TEXT'
237 self.assertEqual(element.text, 'TEXT')
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000238
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300239 self.assertIsNone(element.tail)
240 element.tail = 'Tail'
241 self.assertEqual(element.tail, 'Tail')
242 element.tail = 'TAIL'
243 self.assertEqual(element.tail, 'TAIL')
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000244
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300245 self.assertEqual(element.attrib, {})
246 element.attrib = {'a': 'b', 'c': 'd'}
247 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
248 element.attrib = {'A': 'B', 'C': 'D'}
249 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000250
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300251 def test_simpleops(self):
252 # Basic method sanity checks.
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000253
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300254 elem = ET.XML("<body><tag/></body>")
255 self.serialize_check(elem, '<body><tag /></body>')
256 e = ET.Element("tag2")
257 elem.append(e)
258 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
259 elem.remove(e)
260 self.serialize_check(elem, '<body><tag /></body>')
261 elem.insert(0, e)
262 self.serialize_check(elem, '<body><tag2 /><tag /></body>')
263 elem.remove(e)
264 elem.extend([e])
265 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
266 elem.remove(e)
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000267
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300268 element = ET.Element("tag", key="value")
269 self.serialize_check(element, '<tag key="value" />') # 1
270 subelement = ET.Element("subtag")
271 element.append(subelement)
272 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
273 element.insert(0, subelement)
274 self.serialize_check(element,
275 '<tag key="value"><subtag /><subtag /></tag>') # 3
276 element.remove(subelement)
277 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
278 element.remove(subelement)
279 self.serialize_check(element, '<tag key="value" />') # 5
280 with self.assertRaises(ValueError) as cm:
281 element.remove(subelement)
282 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
283 self.serialize_check(element, '<tag key="value" />') # 6
284 element[0:0] = [subelement, subelement, subelement]
285 self.serialize_check(element[1], '<subtag />')
286 self.assertEqual(element[1:9], [element[1], element[2]])
287 self.assertEqual(element[:9:2], [element[0], element[2]])
288 del element[1:2]
289 self.serialize_check(element,
290 '<tag key="value"><subtag /><subtag /></tag>')
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000291
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300292 def test_cdata(self):
293 # Test CDATA handling (etc).
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000294
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300295 self.serialize_check(ET.XML("<tag>hello</tag>"),
296 '<tag>hello</tag>')
297 self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
298 '<tag>hello</tag>')
299 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
300 '<tag>hello</tag>')
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000301
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300302 def test_file_init(self):
303 stringfile = StringIO.StringIO(SAMPLE_XML.encode("utf-8"))
304 tree = ET.ElementTree(file=stringfile)
305 self.assertEqual(tree.find("tag").tag, 'tag')
306 self.assertEqual(tree.find("section/tag").tag, 'tag')
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000307
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300308 tree = ET.ElementTree(file=SIMPLE_XMLFILE)
309 self.assertEqual(tree.find("element").tag, 'element')
310 self.assertEqual(tree.find("element/../empty-element").tag,
311 'empty-element')
Florent Xicluna3e8c1892010-03-11 14:36:19 +0000312
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300313 def test_path_cache(self):
314 # Check that the path cache behaves sanely.
Antoine Pitrou42fb6ab2010-02-09 17:08:05 +0000315
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300316 from xml.etree import ElementPath
Antoine Pitrou42fb6ab2010-02-09 17:08:05 +0000317
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300318 elem = ET.XML(SAMPLE_XML)
319 for i in range(10): ET.ElementTree(elem).find('./'+str(i))
320 cache_len_10 = len(ElementPath._cache)
321 for i in range(10): ET.ElementTree(elem).find('./'+str(i))
322 self.assertEqual(len(ElementPath._cache), cache_len_10)
323 for i in range(20): ET.ElementTree(elem).find('./'+str(i))
324 self.assertGreater(len(ElementPath._cache), cache_len_10)
325 for i in range(600): ET.ElementTree(elem).find('./'+str(i))
326 self.assertLess(len(ElementPath._cache), 500)
Antoine Pitrou42fb6ab2010-02-09 17:08:05 +0000327
Serhiy Storchaka68903b62017-04-02 16:55:43 +0300328 def test_copy(self):
329 # Test copy handling (etc).
330
331 import copy
332 e1 = ET.XML("<tag>hello<foo/></tag>")
333 e2 = copy.copy(e1)
334 e3 = copy.deepcopy(e1)
335 e1.find("foo").tag = "bar"
336 self.serialize_check(e1, '<tag>hello<bar /></tag>')
337 self.serialize_check(e2, '<tag>hello<bar /></tag>')
338 self.serialize_check(e3, '<tag>hello<foo /></tag>')
339
340 def test_attrib(self):
341 # Test attribute handling.
342
343 elem = ET.Element("tag")
344 elem.get("key") # 1.1
345 self.assertEqual(elem.get("key", "default"), 'default') # 1.2
346
347 elem.set("key", "value")
348 self.assertEqual(elem.get("key"), 'value') # 1.3
349
350 elem = ET.Element("tag", key="value")
351 self.assertEqual(elem.get("key"), 'value') # 2.1
352 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
353
354 attrib = {"key": "value"}
355 elem = ET.Element("tag", attrib)
356 attrib.clear() # check for aliasing issues
357 self.assertEqual(elem.get("key"), 'value') # 3.1
358 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
359
360 attrib = {"key": "value"}
361 elem = ET.Element("tag", **attrib)
362 attrib.clear() # check for aliasing issues
363 self.assertEqual(elem.get("key"), 'value') # 4.1
364 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
365
366 elem = ET.Element("tag", {"key": "other"}, key="value")
367 self.assertEqual(elem.get("key"), 'value') # 5.1
368 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
369
370 elem = ET.Element('test')
371 elem.text = "aa"
372 elem.set('testa', 'testval')
373 elem.set('testb', 'test2')
374 self.assertEqual(ET.tostring(elem),
375 b'<test testa="testval" testb="test2">aa</test>')
376 self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
377 self.assertEqual(sorted(elem.items()),
378 [('testa', 'testval'), ('testb', 'test2')])
379 self.assertEqual(elem.attrib['testb'], 'test2')
380 elem.attrib['testb'] = 'test1'
381 elem.attrib['testc'] = 'test2'
382 self.assertEqual(ET.tostring(elem),
383 b'<test testa="testval" testb="test1" testc="test2">aa</test>')
384
385 elem = ET.Element('test')
386 elem.set('a', '\r')
387 elem.set('b', '\r\n')
388 elem.set('c', '\t\n\r ')
389 elem.set('d', '\n\n')
390 self.assertEqual(ET.tostring(elem),
391 b'<test a="\r" b="\r&#10;" c="\t&#10;\r " d="&#10;&#10;" />')
392
393 def test_makeelement(self):
394 # Test makeelement handling.
395
396 elem = ET.Element("tag")
397 attrib = {"key": "value"}
398 subelem = elem.makeelement("subtag", attrib)
399 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
400 elem.append(subelem)
401 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
402
403 elem.clear()
404 self.serialize_check(elem, '<tag />')
405 elem.append(subelem)
406 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
407 elem.extend([subelem, subelem])
408 self.serialize_check(elem,
409 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
410 elem[:] = [subelem]
411 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
412 elem[:] = tuple([subelem])
413 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
414
415 def test_parsefile(self):
416 # Test parsing from file.
417
418 tree = ET.parse(SIMPLE_XMLFILE)
419 normalize_crlf(tree)
420 stream = StringIO.StringIO()
421 tree.write(stream)
422 self.assertEqual(stream.getvalue(),
423 '<root>\n'
424 ' <element key="value">text</element>\n'
425 ' <element>text</element>tail\n'
426 ' <empty-element />\n'
427 '</root>')
428 tree = ET.parse(SIMPLE_NS_XMLFILE)
429 normalize_crlf(tree)
430 stream = StringIO.StringIO()
431 tree.write(stream)
432 self.assertEqual(stream.getvalue(),
433 '<ns0:root xmlns:ns0="namespace">\n'
434 ' <ns0:element key="value">text</ns0:element>\n'
435 ' <ns0:element>text</ns0:element>tail\n'
436 ' <ns0:empty-element />\n'
437 '</ns0:root>')
438
439 with open(SIMPLE_XMLFILE) as f:
440 data = f.read()
441
442 parser = ET.XMLParser()
443 self.assertRegexpMatches(parser.version, r'^Expat ')
444 parser.feed(data)
445 self.serialize_check(parser.close(),
446 '<root>\n'
447 ' <element key="value">text</element>\n'
448 ' <element>text</element>tail\n'
449 ' <empty-element />\n'
450 '</root>')
451
452 parser = ET.XMLTreeBuilder() # 1.2 compatibility
453 parser.feed(data)
454 self.serialize_check(parser.close(),
455 '<root>\n'
456 ' <element key="value">text</element>\n'
457 ' <element>text</element>tail\n'
458 ' <empty-element />\n'
459 '</root>')
460
461 target = ET.TreeBuilder()
462 parser = ET.XMLParser(target=target)
463 parser.feed(data)
464 self.serialize_check(parser.close(),
465 '<root>\n'
466 ' <element key="value">text</element>\n'
467 ' <element>text</element>tail\n'
468 ' <empty-element />\n'
469 '</root>')
470
471 def test_parseliteral(self):
472 element = ET.XML("<html><body>text</body></html>")
473 self.assertEqual(ET.tostring(element),
474 '<html><body>text</body></html>')
475 element = ET.fromstring("<html><body>text</body></html>")
476 self.assertEqual(ET.tostring(element),
477 '<html><body>text</body></html>')
478 sequence = ["<html><body>", "text</bo", "dy></html>"]
479 element = ET.fromstringlist(sequence)
480 self.assertEqual(ET.tostring(element),
481 '<html><body>text</body></html>')
482 self.assertEqual("".join(ET.tostringlist(element)),
483 '<html><body>text</body></html>')
484 self.assertEqual(ET.tostring(element, "ascii"),
485 "<?xml version='1.0' encoding='ascii'?>\n"
486 "<html><body>text</body></html>")
487 _, ids = ET.XMLID("<html><body>text</body></html>")
488 self.assertEqual(len(ids), 0)
489 _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
490 self.assertEqual(len(ids), 1)
491 self.assertEqual(ids["body"].tag, 'body')
492
493 def test_iterparse(self):
494 # Test iterparse interface.
495
496 iterparse = ET.iterparse
497
498 context = iterparse(SIMPLE_XMLFILE)
499 action, elem = next(context)
500 self.assertEqual((action, elem.tag), ('end', 'element'))
501 self.assertEqual([(action, elem.tag) for action, elem in context], [
502 ('end', 'element'),
503 ('end', 'empty-element'),
504 ('end', 'root'),
505 ])
506 self.assertEqual(context.root.tag, 'root')
507
508 context = iterparse(SIMPLE_NS_XMLFILE)
509 self.assertEqual([(action, elem.tag) for action, elem in context], [
510 ('end', '{namespace}element'),
511 ('end', '{namespace}element'),
512 ('end', '{namespace}empty-element'),
513 ('end', '{namespace}root'),
514 ])
515
516 events = ()
517 context = iterparse(SIMPLE_XMLFILE, events)
518 self.assertEqual([(action, elem.tag) for action, elem in context], [])
519
520 events = ()
521 context = iterparse(SIMPLE_XMLFILE, events=events)
522 self.assertEqual([(action, elem.tag) for action, elem in context], [])
523
524 events = ("start", "end")
525 context = iterparse(SIMPLE_XMLFILE, events)
526 self.assertEqual([(action, elem.tag) for action, elem in context], [
527 ('start', 'root'),
528 ('start', 'element'),
529 ('end', 'element'),
530 ('start', 'element'),
531 ('end', 'element'),
532 ('start', 'empty-element'),
533 ('end', 'empty-element'),
534 ('end', 'root'),
535 ])
536
537 events = ("start", "end", "start-ns", "end-ns")
538 context = iterparse(SIMPLE_NS_XMLFILE, events)
539 self.assertEqual([(action, elem.tag) if action in ("start", "end")
540 else (action, elem)
541 for action, elem in context], [
542 ('start-ns', ('', 'namespace')),
543 ('start', '{namespace}root'),
544 ('start', '{namespace}element'),
545 ('end', '{namespace}element'),
546 ('start', '{namespace}element'),
547 ('end', '{namespace}element'),
548 ('start', '{namespace}empty-element'),
549 ('end', '{namespace}empty-element'),
550 ('end', '{namespace}root'),
551 ('end-ns', None),
552 ])
553
554 events = ('start-ns', 'end-ns')
555 context = iterparse(StringIO.StringIO(r"<root xmlns=''/>"), events)
556 res = [(action, elem) for action, elem in context]
557 self.assertEqual(res, [('start-ns', ('', '')), ('end-ns', None)])
558
559 events = ("start", "end", "bogus")
560 with open(SIMPLE_XMLFILE, "rb") as f:
561 with self.assertRaises(ValueError) as cm:
562 iterparse(f, events)
563 self.assertFalse(f.closed)
564 self.assertEqual(str(cm.exception), "unknown event 'bogus'")
565
566 source = StringIO.StringIO(
567 "<?xml version='1.0' encoding='iso-8859-1'?>\n"
568 "<body xmlns='http://&#233;ffbot.org/ns'\n"
569 " xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
570 events = ("start-ns",)
571 context = iterparse(source, events)
572 self.assertEqual([(action, elem) for action, elem in context], [
573 ('start-ns', ('', u'http://\xe9ffbot.org/ns')),
574 ('start-ns', (u'cl\xe9', 'http://effbot.org/ns')),
575 ])
576
577 source = StringIO.StringIO("<document />junk")
578 it = iterparse(source)
579 action, elem = next(it)
580 self.assertEqual((action, elem.tag), ('end', 'document'))
581 with self.assertRaises(ET.ParseError) as cm:
582 next(it)
583 self.assertEqual(str(cm.exception),
584 'junk after document element: line 1, column 12')
585
586 def test_writefile(self):
587 elem = ET.Element("tag")
588 elem.text = "text"
589 self.serialize_check(elem, '<tag>text</tag>')
590 ET.SubElement(elem, "subtag").text = "subtext"
591 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
592
593 # Test tag suppression
594 elem.tag = None
595 self.serialize_check(elem, 'text<subtag>subtext</subtag>')
596 elem.insert(0, ET.Comment("comment"))
597 self.serialize_check(elem,
598 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3
599
600 elem[0] = ET.PI("key", "value")
601 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
602
603 def test_custom_builder(self):
604 # Test parser w. custom builder.
605
606 with open(SIMPLE_XMLFILE) as f:
607 data = f.read()
608 class Builder(list):
609 def start(self, tag, attrib):
610 self.append(("start", tag))
611 def end(self, tag):
612 self.append(("end", tag))
613 def data(self, text):
614 pass
615 builder = Builder()
616 parser = ET.XMLParser(target=builder)
617 parser.feed(data)
618 self.assertEqual(builder, [
619 ('start', 'root'),
620 ('start', 'element'),
621 ('end', 'element'),
622 ('start', 'element'),
623 ('end', 'element'),
624 ('start', 'empty-element'),
625 ('end', 'empty-element'),
626 ('end', 'root'),
627 ])
628
629 with open(SIMPLE_NS_XMLFILE) as f:
630 data = f.read()
631 class Builder(list):
632 def start(self, tag, attrib):
633 self.append(("start", tag))
634 def end(self, tag):
635 self.append(("end", tag))
636 def data(self, text):
637 pass
638 def pi(self, target, data):
639 self.append(("pi", target, data))
640 def comment(self, data):
641 self.append(("comment", data))
642 builder = Builder()
643 parser = ET.XMLParser(target=builder)
644 parser.feed(data)
645 self.assertEqual(builder, [
646 ('pi', 'pi', 'data'),
647 ('comment', ' comment '),
648 ('start', '{namespace}root'),
649 ('start', '{namespace}element'),
650 ('end', '{namespace}element'),
651 ('start', '{namespace}element'),
652 ('end', '{namespace}element'),
653 ('start', '{namespace}empty-element'),
654 ('end', '{namespace}empty-element'),
655 ('end', '{namespace}root'),
656 ])
657
658
659 def test_getchildren(self):
660 # Test Element.getchildren()
661
662 with open(SIMPLE_XMLFILE, "r") as f:
663 tree = ET.parse(f)
664 self.assertEqual([summarize_list(elem.getchildren())
665 for elem in tree.getroot().iter()], [
666 ['element', 'element', 'empty-element'],
667 [],
668 [],
669 [],
670 ])
671 self.assertEqual([summarize_list(elem.getchildren())
672 for elem in tree.getiterator()], [
673 ['element', 'element', 'empty-element'],
674 [],
675 [],
676 [],
677 ])
678
679 elem = ET.XML(SAMPLE_XML)
680 self.assertEqual(len(elem.getchildren()), 3)
681 self.assertEqual(len(elem[2].getchildren()), 1)
682 self.assertEqual(elem[:], elem.getchildren())
683 child1 = elem[0]
684 child2 = elem[2]
685 del elem[1:2]
686 self.assertEqual(len(elem.getchildren()), 2)
687 self.assertEqual(child1, elem[0])
688 self.assertEqual(child2, elem[1])
689 elem[0:2] = [child2, child1]
690 self.assertEqual(child2, elem[0])
691 self.assertEqual(child1, elem[1])
692 self.assertNotEqual(child1, elem[0])
693 elem.clear()
694 self.assertEqual(elem.getchildren(), [])
695
696 def test_writestring(self):
697 elem = ET.XML("<html><body>text</body></html>")
698 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
699 elem = ET.fromstring("<html><body>text</body></html>")
700 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
701
702 def test_encoding(self):
703 def check(encoding, body=''):
704 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
705 (encoding, body))
706 self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
707 check("ascii", 'a')
708 check("us-ascii", 'a')
709 check("iso-8859-1", u'\xbd')
710 check("iso-8859-15", u'\u20ac')
711 check("cp437", u'\u221a')
712 check("mac-roman", u'\u02da')
713
714 def xml(encoding):
715 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
716 def bxml(encoding):
717 return xml(encoding).encode(encoding)
718 supported_encodings = [
719 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
720 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
721 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
722 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
723 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
724 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
725 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006',
726 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
727 'cp1256', 'cp1257', 'cp1258',
728 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
729 'mac-roman', 'mac-turkish',
730 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
731 'iso2022-jp-3', 'iso2022-jp-ext',
732 'koi8-r', 'koi8-u',
733 'ptcp154',
734 ]
735 for encoding in supported_encodings:
736 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
737
738 unsupported_ascii_compatible_encodings = [
739 'big5', 'big5hkscs',
740 'cp932', 'cp949', 'cp950',
741 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
742 'gb2312', 'gbk', 'gb18030',
743 'iso2022-kr', 'johab', 'hz',
744 'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
745 'utf-7',
746 ]
747 for encoding in unsupported_ascii_compatible_encodings:
748 self.assertRaises(ValueError, ET.XML, bxml(encoding))
749
750 unsupported_ascii_incompatible_encodings = [
751 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
752 'utf_32', 'utf_32_be', 'utf_32_le',
753 ]
754 for encoding in unsupported_ascii_incompatible_encodings:
755 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
756
757 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
758 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
759
760 def test_methods(self):
761 # Test serialization methods.
762
763 e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
764 e.tail = "\n"
765 self.assertEqual(serialize(e),
766 '<html><link /><script>1 &lt; 2</script></html>\n')
767 self.assertEqual(serialize(e, method=None),
768 '<html><link /><script>1 &lt; 2</script></html>\n')
769 self.assertEqual(serialize(e, method="xml"),
770 '<html><link /><script>1 &lt; 2</script></html>\n')
771 self.assertEqual(serialize(e, method="html"),
772 '<html><link><script>1 < 2</script></html>\n')
773 self.assertEqual(serialize(e, method="text"), '1 < 2\n')
774
775 def test_issue18347(self):
776 e = ET.XML('<html><CamelCase>text</CamelCase></html>')
777 self.assertEqual(serialize(e),
778 '<html><CamelCase>text</CamelCase></html>')
779 self.assertEqual(serialize(e, method="html"),
780 '<html><CamelCase>text</CamelCase></html>')
781
782 def test_entity(self):
783 # Test entity handling.
784
785 # 1) good entities
786
787 e = ET.XML("<document title='&#x8230;'>test</document>")
788 self.assertEqual(serialize(e, encoding="us-ascii"),
789 '<document title="&#33328;">test</document>')
790 self.serialize_check(e, '<document title="&#33328;">test</document>')
791
792 # 2) bad entities
793
794 with self.assertRaises(ET.ParseError) as cm:
795 ET.XML("<document>&entity;</document>")
796 self.assertEqual(str(cm.exception),
797 'undefined entity: line 1, column 10')
798
799 with self.assertRaises(ET.ParseError) as cm:
800 ET.XML(ENTITY_XML)
801 self.assertEqual(str(cm.exception),
802 'undefined entity &entity;: line 5, column 10')
803
804 # 3) custom entity
805
806 parser = ET.XMLParser()
807 parser.entity["entity"] = "text"
808 parser.feed(ENTITY_XML)
809 root = parser.close()
810 self.serialize_check(root, '<document>text</document>')
811
812 def test_namespace(self):
813 # Test namespace issues.
814
815 # 1) xml namespace
816
817 elem = ET.XML("<tag xml:lang='en' />")
818 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
819
820 # 2) other "well-known" namespaces
821
822 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
823 self.serialize_check(elem,
824 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
825
826 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
827 self.serialize_check(elem,
828 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
829
830 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
831 self.serialize_check(elem,
832 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
833
834 # 3) unknown namespaces
835 elem = ET.XML(SAMPLE_XML_NS)
836 self.serialize_check(elem,
837 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
838 ' <ns0:tag>text</ns0:tag>\n'
839 ' <ns0:tag />\n'
840 ' <ns0:section>\n'
841 ' <ns0:tag>subtext</ns0:tag>\n'
842 ' </ns0:section>\n'
843 '</ns0:body>')
844
845 def test_qname(self):
846 # Test QName handling.
847
848 # 1) decorated tags
849
850 elem = ET.Element("{uri}tag")
851 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
852 elem = ET.Element(ET.QName("{uri}tag"))
853 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
854 elem = ET.Element(ET.QName("uri", "tag"))
855 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
856 elem = ET.Element(ET.QName("uri", "tag"))
857 subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
858 subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
859 self.serialize_check(elem,
860 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
861
862 # 2) decorated attributes
863
864 elem.clear()
865 elem.attrib["{uri}key"] = "value"
866 self.serialize_check(elem,
867 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
868
869 elem.clear()
870 elem.attrib[ET.QName("{uri}key")] = "value"
871 self.serialize_check(elem,
872 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
873
874 # 3) decorated values are not converted by default, but the
875 # QName wrapper can be used for values
876
877 elem.clear()
878 elem.attrib["{uri}key"] = "{uri}value"
879 self.serialize_check(elem,
880 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
881
882 elem.clear()
883 elem.attrib["{uri}key"] = ET.QName("{uri}value")
884 self.serialize_check(elem,
885 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
886
887 elem.clear()
888 subelem = ET.Element("tag")
889 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
890 elem.append(subelem)
891 elem.append(subelem)
892 self.serialize_check(elem,
893 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
894 '<tag ns1:key="ns2:value" />'
895 '<tag ns1:key="ns2:value" />'
896 '</ns0:tag>') # 3.3
897
898 # 4) Direct QName tests
899
900 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
901 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
902 q1 = ET.QName('ns', 'tag')
903 q2 = ET.QName('ns', 'tag')
904 self.assertEqual(q1, q2)
905 q2 = ET.QName('ns', 'other-tag')
906 self.assertNotEqual(q1, q2)
907 self.assertNotEqual(q1, 'ns:tag')
908 self.assertEqual(q1, '{ns}tag')
909
910 def test_doctype_public(self):
911 # Test PUBLIC doctype.
912
913 elem = ET.XML('<!DOCTYPE html PUBLIC'
914 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
915 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
916 '<html>text</html>')
917
918 def test_xpath_tokenizer(self):
919 # Test the XPath tokenizer.
920 from xml.etree import ElementPath
921 def check(p, expected):
922 self.assertEqual([op or tag
923 for op, tag in ElementPath.xpath_tokenizer(p)],
924 expected)
925
926 # tests from the xml specification
927 check("*", ['*'])
928 check("text()", ['text', '()'])
929 check("@name", ['@', 'name'])
930 check("@*", ['@', '*'])
931 check("para[1]", ['para', '[', '1', ']'])
932 check("para[last()]", ['para', '[', 'last', '()', ']'])
933 check("*/para", ['*', '/', 'para'])
934 check("/doc/chapter[5]/section[2]",
935 ['/', 'doc', '/', 'chapter', '[', '5', ']',
936 '/', 'section', '[', '2', ']'])
937 check("chapter//para", ['chapter', '//', 'para'])
938 check("//para", ['//', 'para'])
939 check("//olist/item", ['//', 'olist', '/', 'item'])
940 check(".", ['.'])
941 check(".//para", ['.', '//', 'para'])
942 check("..", ['..'])
943 check("../@lang", ['..', '/', '@', 'lang'])
944 check("chapter[title]", ['chapter', '[', 'title', ']'])
945 check("employee[@secretary and @assistant]", ['employee',
946 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
947
948 # additional tests
949 check("{http://spam}egg", ['{http://spam}egg'])
950 check("./spam.egg", ['.', '/', 'spam.egg'])
951 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
952
953 def test_processinginstruction(self):
954 # Test ProcessingInstruction directly
955
956 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
957 '<?test instruction?>')
958 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
959 '<?test instruction?>')
960
961 # Issue #2746
962
963 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
964 '<?test <testing&>?>')
965 self.assertEqual(ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1'),
966 "<?xml version='1.0' encoding='latin1'?>\n"
967 "<?test <testing&>\xe3?>")
968
969 def test_html_empty_elems_serialization(self):
970 # issue 15970
971 # from http://www.w3.org/TR/html401/index/elements.html
972 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
973 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
974 for elem in [element, element.lower()]:
975 expected = '<%s>' % elem
976 serialized = serialize(ET.XML('<%s />' % elem), method='html')
977 self.assertEqual(serialized, expected)
978 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
979 method='html')
980 self.assertEqual(serialized, expected)
981
Fred Drake9297e162006-07-29 18:19:19 +0000982
Armin Rigo9ed73062005-12-14 18:10:45 +0000983#
984# xinclude tests (samples from appendix C of the xinclude specification)
985
986XINCLUDE = {}
987
988XINCLUDE["C1.xml"] = """\
989<?xml version='1.0'?>
990<document xmlns:xi="http://www.w3.org/2001/XInclude">
991 <p>120 Mz is adequate for an average home user.</p>
992 <xi:include href="disclaimer.xml"/>
993</document>
994"""
995
996XINCLUDE["disclaimer.xml"] = """\
997<?xml version='1.0'?>
998<disclaimer>
999 <p>The opinions represented herein represent those of the individual
1000 and should not be interpreted as official policy endorsed by this
1001 organization.</p>
1002</disclaimer>
1003"""
1004
1005XINCLUDE["C2.xml"] = """\
1006<?xml version='1.0'?>
1007<document xmlns:xi="http://www.w3.org/2001/XInclude">
1008 <p>This document has been accessed
1009 <xi:include href="count.txt" parse="text"/> times.</p>
1010</document>
1011"""
1012
1013XINCLUDE["count.txt"] = "324387"
1014
Florent Xicluna2f1b1ff2010-08-09 20:46:49 +00001015XINCLUDE["C2b.xml"] = """\
1016<?xml version='1.0'?>
1017<document xmlns:xi="http://www.w3.org/2001/XInclude">
1018 <p>This document has been <em>accessed</em>
1019 <xi:include href="count.txt" parse="text"/> times.</p>
1020</document>
1021"""
1022
Armin Rigo9ed73062005-12-14 18:10:45 +00001023XINCLUDE["C3.xml"] = """\
1024<?xml version='1.0'?>
1025<document xmlns:xi="http://www.w3.org/2001/XInclude">
1026 <p>The following is the source of the "data.xml" resource:</p>
1027 <example><xi:include href="data.xml" parse="text"/></example>
1028</document>
1029"""
1030
1031XINCLUDE["data.xml"] = """\
1032<?xml version='1.0'?>
1033<data>
1034 <item><![CDATA[Brooks & Shields]]></item>
1035</data>
1036"""
1037
1038XINCLUDE["C5.xml"] = """\
1039<?xml version='1.0'?>
1040<div xmlns:xi="http://www.w3.org/2001/XInclude">
1041 <xi:include href="example.txt" parse="text">
1042 <xi:fallback>
1043 <xi:include href="fallback-example.txt" parse="text">
1044 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1045 </xi:include>
1046 </xi:fallback>
1047 </xi:include>
1048</div>
1049"""
1050
1051XINCLUDE["default.xml"] = """\
1052<?xml version='1.0'?>
1053<document xmlns:xi="http://www.w3.org/2001/XInclude">
1054 <p>Example.</p>
Florent Xicluna13ba1a12010-03-13 11:18:49 +00001055 <xi:include href="{}"/>
Armin Rigo9ed73062005-12-14 18:10:45 +00001056</document>
Florent Xicluna26cc99d2010-03-31 21:21:54 +00001057""".format(cgi.escape(SIMPLE_XMLFILE, True))
Armin Rigo9ed73062005-12-14 18:10:45 +00001058
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001059#
1060# badly formatted xi:include tags
1061
1062XINCLUDE_BAD = {}
1063
1064XINCLUDE_BAD["B1.xml"] = """\
1065<?xml version='1.0'?>
1066<document xmlns:xi="http://www.w3.org/2001/XInclude">
1067 <p>120 Mz is adequate for an average home user.</p>
1068 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1069</document>
1070"""
1071
1072XINCLUDE_BAD["B2.xml"] = """\
1073<?xml version='1.0'?>
1074<div xmlns:xi="http://www.w3.org/2001/XInclude">
1075 <xi:fallback></xi:fallback>
1076</div>
1077"""
1078
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001079class XIncludeTest(unittest.TestCase):
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001080
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001081 def xinclude_loader(self, href, parse="xml", encoding=None):
1082 try:
1083 data = XINCLUDE[href]
1084 except KeyError:
1085 raise IOError("resource not found")
1086 if parse == "xml":
1087 data = ET.XML(data)
1088 return data
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001089
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001090 def none_loader(self, href, parser, encoding=None):
1091 return None
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001092
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001093 def test_xinclude_default(self):
1094 from xml.etree import ElementInclude
1095 doc = self.xinclude_loader('default.xml')
1096 ElementInclude.include(doc)
1097 self.assertEqual(serialize(doc),
1098 '<document>\n'
1099 ' <p>Example.</p>\n'
1100 ' <root>\n'
1101 ' <element key="value">text</element>\n'
1102 ' <element>text</element>tail\n'
1103 ' <empty-element />\n'
1104 '</root>\n'
1105 '</document>')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001106
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001107 def test_xinclude(self):
1108 from xml.etree import ElementInclude
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001109
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001110 # Basic inclusion example (XInclude C.1)
1111 document = self.xinclude_loader("C1.xml")
1112 ElementInclude.include(document, self.xinclude_loader)
1113 self.assertEqual(serialize(document),
1114 '<document>\n'
1115 ' <p>120 Mz is adequate for an average home user.</p>\n'
1116 ' <disclaimer>\n'
1117 ' <p>The opinions represented herein represent those of the individual\n'
1118 ' and should not be interpreted as official policy endorsed by this\n'
1119 ' organization.</p>\n'
1120 '</disclaimer>\n'
1121 '</document>') # C1
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001122
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001123 # Textual inclusion example (XInclude C.2)
1124 document = self.xinclude_loader("C2.xml")
1125 ElementInclude.include(document, self.xinclude_loader)
1126 self.assertEqual(serialize(document),
1127 '<document>\n'
1128 ' <p>This document has been accessed\n'
1129 ' 324387 times.</p>\n'
1130 '</document>') # C2
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001131
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001132 # Textual inclusion after sibling element (based on modified XInclude C.2)
1133 document = self.xinclude_loader("C2b.xml")
1134 ElementInclude.include(document, self.xinclude_loader)
1135 self.assertEqual(serialize(document),
1136 '<document>\n'
1137 ' <p>This document has been <em>accessed</em>\n'
1138 ' 324387 times.</p>\n'
1139 '</document>') # C2b
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001140
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001141 # Textual inclusion of XML example (XInclude C.3)
1142 document = self.xinclude_loader("C3.xml")
1143 ElementInclude.include(document, self.xinclude_loader)
1144 self.assertEqual(serialize(document),
1145 '<document>\n'
1146 ' <p>The following is the source of the "data.xml" resource:</p>\n'
1147 " <example>&lt;?xml version='1.0'?&gt;\n"
1148 '&lt;data&gt;\n'
1149 ' &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1150 '&lt;/data&gt;\n'
1151 '</example>\n'
1152 '</document>') # C3
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001153
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001154 # Fallback example (XInclude C.5)
1155 # Note! Fallback support is not yet implemented
1156 document = self.xinclude_loader("C5.xml")
1157 with self.assertRaises(IOError) as cm:
1158 ElementInclude.include(document, self.xinclude_loader)
1159 self.assertEqual(str(cm.exception), 'resource not found')
1160 self.assertEqual(serialize(document),
1161 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1162 ' <ns0:include href="example.txt" parse="text">\n'
1163 ' <ns0:fallback>\n'
1164 ' <ns0:include href="fallback-example.txt" parse="text">\n'
1165 ' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1166 ' </ns0:include>\n'
1167 ' </ns0:fallback>\n'
1168 ' </ns0:include>\n'
1169 '</div>') # C5
1170
1171 def test_xinclude_failures(self):
1172 from xml.etree import ElementInclude
1173
1174 # Test failure to locate included XML file.
1175 document = ET.XML(XINCLUDE["C1.xml"])
1176 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1177 ElementInclude.include(document, loader=self.none_loader)
1178 self.assertEqual(str(cm.exception),
1179 "cannot load 'disclaimer.xml' as 'xml'")
1180
1181 # Test failure to locate included text file.
1182 document = ET.XML(XINCLUDE["C2.xml"])
1183 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1184 ElementInclude.include(document, loader=self.none_loader)
1185 self.assertEqual(str(cm.exception),
1186 "cannot load 'count.txt' as 'text'")
1187
1188 # Test bad parse type.
1189 document = ET.XML(XINCLUDE_BAD["B1.xml"])
1190 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1191 ElementInclude.include(document, loader=self.none_loader)
1192 self.assertEqual(str(cm.exception),
1193 "unknown parse type in xi:include tag ('BAD_TYPE')")
1194
1195 # Test xi:fallback outside xi:include.
1196 document = ET.XML(XINCLUDE_BAD["B2.xml"])
1197 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1198 ElementInclude.include(document, loader=self.none_loader)
1199 self.assertEqual(str(cm.exception),
1200 "xi:fallback tag must be child of xi:include "
1201 "('{http://www.w3.org/2001/XInclude}fallback')")
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001202
1203# --------------------------------------------------------------------
1204# reported bugs
1205
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001206class BugsTest(unittest.TestCase):
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001207
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001208 def test_bug_xmltoolkit21(self):
1209 # marshaller gives obscure errors for non-string values
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001210
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001211 def check(elem):
1212 with self.assertRaises(TypeError) as cm:
1213 serialize(elem)
1214 self.assertEqual(str(cm.exception),
1215 'cannot serialize 123 (type int)')
Armin Rigo9ed73062005-12-14 18:10:45 +00001216
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001217 elem = ET.Element(123)
1218 check(elem) # tag
Armin Rigo9ed73062005-12-14 18:10:45 +00001219
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001220 elem = ET.Element("elem")
1221 elem.text = 123
1222 check(elem) # text
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001223
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001224 elem = ET.Element("elem")
1225 elem.tail = 123
1226 check(elem) # tail
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001227
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001228 elem = ET.Element("elem")
1229 elem.set(123, "123")
1230 check(elem) # attribute key
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001231
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001232 elem = ET.Element("elem")
1233 elem.set("123", 123)
1234 check(elem) # attribute value
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001235
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001236 def test_bug_xmltoolkit25(self):
1237 # typo in ElementTree.findtext
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001238
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001239 elem = ET.XML(SAMPLE_XML)
1240 tree = ET.ElementTree(elem)
1241 self.assertEqual(tree.findtext("tag"), 'text')
1242 self.assertEqual(tree.findtext("section/tag"), 'subtext')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001243
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001244 def test_bug_xmltoolkit28(self):
1245 # .//tag causes exceptions
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001246
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001247 tree = ET.XML("<doc><table><tbody/></table></doc>")
1248 self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1249 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001250
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001251 def test_bug_xmltoolkitX1(self):
1252 # dump() doesn't flush the output buffer
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001253
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001254 tree = ET.XML("<doc><table><tbody/></table></doc>")
1255 with support.captured_stdout() as stdout:
1256 ET.dump(tree)
1257 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001258
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001259 def test_bug_xmltoolkit39(self):
1260 # non-ascii element and attribute names doesn't work
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001261
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001262 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1263 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001264
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001265 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1266 b"<tag \xe4ttr='v&#228;lue' />")
1267 self.assertEqual(tree.attrib, {u'\xe4ttr': u'v\xe4lue'})
1268 self.assertEqual(ET.tostring(tree, "utf-8"),
1269 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001270
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001271 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1272 b'<t\xe4g>text</t\xe4g>')
1273 self.assertEqual(ET.tostring(tree, "utf-8"),
1274 b'<t\xc3\xa4g>text</t\xc3\xa4g>')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001275
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001276 tree = ET.Element(u"t\u00e4g")
1277 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001278
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001279 tree = ET.Element("tag")
1280 tree.set(u"\u00e4ttr", u"v\u00e4lue")
1281 self.assertEqual(ET.tostring(tree, "utf-8"),
1282 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001283
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001284 def test_bug_xmltoolkit54(self):
1285 # problems handling internally defined entities
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001286
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001287 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
1288 '<doc>&ldots;</doc>')
1289 self.assertEqual(serialize(e), '<doc>&#33328;</doc>')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001290
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001291 def test_bug_xmltoolkit55(self):
1292 # make sure we're reporting the first error, not the last
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001293
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001294 with self.assertRaises(ET.ParseError) as cm:
1295 ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'>"
1296 '<doc>&ldots;&ndots;&rdots;</doc>')
1297 self.assertEqual(str(cm.exception),
1298 'undefined entity &ldots;: line 1, column 36')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001299
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001300 def test_bug_xmltoolkit60(self):
1301 # Handle crash in stream source.
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001302
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001303 class ExceptionFile:
1304 def read(self, x):
1305 raise IOError
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001306
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001307 self.assertRaises(IOError, ET.parse, ExceptionFile())
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001308
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001309 def test_bug_xmltoolkit62(self):
1310 # Don't crash when using custom entities.
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001311
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001312 ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
1313 parser = ET.XMLTreeBuilder()
1314 parser.entity.update(ENTITIES)
1315 parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001316<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1317<patent-application-publication>
1318<subdoc-abstract>
1319<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1320</subdoc-abstract>
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001321</patent-application-publication>""")
1322 t = parser.close()
1323 self.assertEqual(t.find('.//paragraph').text,
1324 u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001325
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001326 def test_bug_xmltoolkit63(self):
1327 # Check reference leak.
1328 def xmltoolkit63():
1329 tree = ET.TreeBuilder()
1330 tree.start("tag", {})
1331 tree.data("text")
1332 tree.end("tag")
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001333
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001334 xmltoolkit63()
1335 count = sys.getrefcount(None)
1336 for i in range(1000):
1337 xmltoolkit63()
1338 self.assertEqual(sys.getrefcount(None), count)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001339
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001340 def test_bug_200708_newline(self):
1341 # Preserve newlines in attributes.
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001342
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001343 e = ET.Element('SomeTag', text="def _f():\n return 3\n")
1344 self.assertEqual(ET.tostring(e),
1345 b'<SomeTag text="def _f():&#10; return 3&#10;" />')
1346 self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
1347 'def _f():\n return 3\n')
1348 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
1349 b'<SomeTag text="def _f():&#10; return 3&#10;" />')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001350
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001351 def test_bug_200708_close(self):
1352 # Test default builder.
1353 parser = ET.XMLParser() # default
1354 parser.feed("<element>some text</element>")
1355 self.assertEqual(parser.close().tag, 'element')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001356
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001357 # Test custom builder.
1358 class EchoTarget:
1359 def close(self):
1360 return ET.Element("element") # simulate root
1361 parser = ET.XMLParser(EchoTarget())
1362 parser.feed("<element>some text</element>")
1363 self.assertEqual(parser.close().tag, 'element')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001364
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001365 def test_bug_200709_default_namespace(self):
1366 e = ET.Element("{default}elem")
1367 s = ET.SubElement(e, "{default}elem")
1368 self.assertEqual(serialize(e, default_namespace="default"), # 1
1369 '<elem xmlns="default"><elem /></elem>')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001370
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001371 e = ET.Element("{default}elem")
1372 s = ET.SubElement(e, "{default}elem")
1373 s = ET.SubElement(e, "{not-default}elem")
1374 self.assertEqual(serialize(e, default_namespace="default"), # 2
1375 '<elem xmlns="default" xmlns:ns1="not-default">'
1376 '<elem />'
1377 '<ns1:elem />'
1378 '</elem>')
1379
1380 e = ET.Element("{default}elem")
1381 s = ET.SubElement(e, "{default}elem")
1382 s = ET.SubElement(e, "elem") # unprefixed name
1383 with self.assertRaises(ValueError) as cm:
1384 serialize(e, default_namespace="default") # 3
1385 self.assertEqual(str(cm.exception),
1386 'cannot use non-qualified names with default_namespace option')
1387
1388 def test_bug_200709_register_namespace(self):
1389 e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1390 self.assertEqual(ET.tostring(e),
1391 '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
1392 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1393 e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1394 self.assertEqual(ET.tostring(e),
1395 '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
1396
1397 # And the Dublin Core namespace is in the default list:
1398
1399 e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
1400 self.assertEqual(ET.tostring(e),
1401 '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
1402
1403 def test_bug_200709_element_comment(self):
1404 # Not sure if this can be fixed, really (since the serializer needs
1405 # ET.Comment, not cET.comment).
1406
1407 a = ET.Element('a')
1408 a.append(ET.Comment('foo'))
1409 self.assertEqual(a[0].tag, ET.Comment)
1410
1411 a = ET.Element('a')
1412 a.append(ET.PI('foo'))
1413 self.assertEqual(a[0].tag, ET.PI)
1414
1415 def test_bug_200709_element_insert(self):
1416 a = ET.Element('a')
1417 b = ET.SubElement(a, 'b')
1418 c = ET.SubElement(a, 'c')
1419 d = ET.Element('d')
1420 a.insert(0, d)
1421 self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
1422 a.insert(-1, d)
1423 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
1424
1425 def test_bug_200709_iter_comment(self):
1426 a = ET.Element('a')
1427 b = ET.SubElement(a, 'b')
1428 comment_b = ET.Comment("TEST-b")
1429 b.append(comment_b)
1430 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
1431
1432 # --------------------------------------------------------------------
1433 # reported on bugs.python.org
1434
1435 def test_bug_1534630(self):
1436 bob = ET.TreeBuilder()
1437 e = bob.data("data")
1438 e = bob.start("tag", {})
1439 e = bob.end("tag")
1440 e = bob.close()
1441 self.assertEqual(serialize(e), '<tag />')
1442
1443 def test_issue6233(self):
1444 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
1445 b'<body>t\xc3\xa3g</body>')
1446 self.assertEqual(ET.tostring(e, 'ascii'),
1447 b"<?xml version='1.0' encoding='ascii'?>\n"
1448 b'<body>t&#227;g</body>')
1449 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1450 b'<body>t\xe3g</body>')
1451 self.assertEqual(ET.tostring(e, 'ascii'),
1452 b"<?xml version='1.0' encoding='ascii'?>\n"
1453 b'<body>t&#227;g</body>')
1454
1455 def test_issue3151(self):
1456 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1457 self.assertEqual(e.tag, '{${stuff}}localname')
1458 t = ET.ElementTree(e)
1459 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
1460
1461 def test_issue6565(self):
1462 elem = ET.XML("<body><tag/></body>")
1463 self.assertEqual(summarize_list(elem), ['tag'])
1464 newelem = ET.XML(SAMPLE_XML)
1465 elem[:] = newelem[:]
1466 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
1467
1468 def test_issue10777(self):
1469 # Registering a namespace twice caused a "dictionary changed size during
1470 # iteration" bug.
1471
1472 ET.register_namespace('test10777', 'http://myuri/')
1473 ET.register_namespace('test10777', 'http://myuri/')
1474
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001475
1476# --------------------------------------------------------------------
1477
1478
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001479class BasicElementTest(unittest.TestCase):
1480 @python_only
1481 def test_cyclic_gc(self):
1482 class Dummy:
1483 pass
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001484
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001485 # Test the shortest cycle: d->element->d
1486 d = Dummy()
1487 d.dummyref = ET.Element('joe', attr=d)
1488 wref = weakref.ref(d)
1489 del d
1490 gc_collect()
1491 self.assertIsNone(wref())
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001492
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001493 # A longer cycle: d->e->e2->d
1494 e = ET.Element('joe')
1495 d = Dummy()
1496 d.dummyref = e
1497 wref = weakref.ref(d)
1498 e2 = ET.SubElement(e, 'foo', attr=d)
1499 del d, e, e2
1500 gc_collect()
1501 self.assertIsNone(wref())
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001502
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001503 # A cycle between Element objects as children of one another
1504 # e1->e2->e3->e1
1505 e1 = ET.Element('e1')
1506 e2 = ET.Element('e2')
1507 e3 = ET.Element('e3')
1508 e1.append(e2)
1509 e2.append(e2)
1510 e3.append(e1)
1511 wref = weakref.ref(e1)
1512 del e1, e2, e3
1513 gc_collect()
1514 self.assertIsNone(wref())
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001515
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001516 @python_only
1517 def test_weakref(self):
1518 flag = []
1519 def wref_cb(w):
1520 flag.append(True)
1521 e = ET.Element('e')
1522 wref = weakref.ref(e, wref_cb)
1523 self.assertEqual(wref().tag, 'e')
1524 del e
1525 self.assertEqual(flag, [True])
1526 self.assertEqual(wref(), None)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001527
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001528 @python_only
1529 def test_get_keyword_args(self):
1530 e1 = ET.Element('foo' , x=1, y=2, z=3)
1531 self.assertEqual(e1.get('x', default=7), 1)
1532 self.assertEqual(e1.get('w', default=7), 7)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001533
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001534
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001535class BadElementTest(unittest.TestCase):
1536 def test_extend_mutable_list(self):
1537 class X(object):
1538 @property
1539 def __class__(self):
1540 L[:] = [ET.Element('baz')]
1541 return ET.Element
1542 L = [X()]
1543 e = ET.Element('foo')
1544 try:
1545 e.extend(L)
1546 except TypeError:
1547 pass
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001548
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001549 if ET is pyET:
1550 class Y(X, ET.Element):
1551 pass
1552 L = [Y('x')]
1553 e = ET.Element('foo')
1554 e.extend(L)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001555
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001556 def test_extend_mutable_list2(self):
1557 class X(object):
1558 @property
1559 def __class__(self):
1560 del L[:]
1561 return ET.Element
1562 L = [X(), ET.Element('baz')]
1563 e = ET.Element('foo')
1564 try:
1565 e.extend(L)
1566 except TypeError:
1567 pass
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001568
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001569 if ET is pyET:
1570 class Y(X, ET.Element):
1571 pass
1572 L = [Y('bar'), ET.Element('baz')]
1573 e = ET.Element('foo')
1574 e.extend(L)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001575
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001576 @python_only
1577 def test_remove_with_mutating(self):
1578 class X(ET.Element):
1579 def __eq__(self, o):
1580 del e[:]
1581 return False
Serhiy Storchaka65c5b092017-04-12 16:00:14 +03001582 __hash__ = object.__hash__
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001583 e = ET.Element('foo')
1584 e.extend([X('bar')])
1585 self.assertRaises(ValueError, e.remove, ET.Element('baz'))
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001586
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001587 e = ET.Element('foo')
1588 e.extend([ET.Element('bar')])
1589 self.assertRaises(ValueError, e.remove, X('baz'))
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001590
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001591 def test_recursive_repr(self):
1592 # Issue #25455
1593 e = ET.Element('foo')
1594 with swap_attr(e, 'tag', e):
1595 with self.assertRaises(RuntimeError):
1596 repr(e) # Should not crash
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001597
Serhiy Storchaka9c2c42c2017-04-02 20:37:03 +03001598 def test_element_get_text(self):
1599 # Issue #27863
1600 class X(str):
1601 def __del__(self):
1602 try:
1603 elem.text
1604 except NameError:
1605 pass
1606
1607 b = ET.TreeBuilder()
1608 b.start('tag', {})
1609 b.data('ABCD')
1610 b.data(X('EFGH'))
1611 b.data('IJKL')
1612 b.end('tag')
1613
1614 elem = b.close()
1615 self.assertEqual(elem.text, 'ABCDEFGHIJKL')
1616
1617 def test_element_get_tail(self):
1618 # Issue #27863
1619 class X(str):
1620 def __del__(self):
1621 try:
1622 elem[0].tail
1623 except NameError:
1624 pass
1625
1626 b = ET.TreeBuilder()
1627 b.start('root', {})
1628 b.start('tag', {})
1629 b.end('tag')
1630 b.data('ABCD')
1631 b.data(X('EFGH'))
1632 b.data('IJKL')
1633 b.end('root')
1634
1635 elem = b.close()
1636 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
1637
1638 def test_element_iter(self):
1639 # Issue #27863
1640 e = ET.Element('tag')
1641 e.extend([None]) # non-Element
1642
1643 it = e.iter()
1644 self.assertIs(next(it), e)
1645 self.assertRaises((AttributeError, TypeError), list, it)
1646
1647 def test_subscr(self):
1648 # Issue #27863
1649 class X:
1650 def __index__(self):
1651 del e[:]
1652 return 1
1653
1654 e = ET.Element('elem')
1655 e.append(ET.Element('child'))
1656 e[:X()] # shouldn't crash
1657
1658 e.append(ET.Element('child'))
1659 e[0:10:X()] # shouldn't crash
1660
1661 def test_ass_subscr(self):
1662 # Issue #27863
1663 class X:
1664 def __index__(self):
1665 e[:] = []
1666 return 1
1667
1668 e = ET.Element('elem')
1669 for _ in range(10):
1670 e.insert(0, ET.Element('child'))
1671
1672 e[0:10:X()] = [] # shouldn't crash
1673
1674
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001675class MutatingElementPath(str):
1676 def __new__(cls, elem, *args):
1677 self = str.__new__(cls, *args)
1678 self.elem = elem
1679 return self
1680 def __eq__(self, o):
1681 del self.elem[:]
1682 return True
Serhiy Storchaka65c5b092017-04-12 16:00:14 +03001683 __hash__ = str.__hash__
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001684
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001685class BadElementPath(str):
1686 def __eq__(self, o):
Serhiy Storchaka65c5b092017-04-12 16:00:14 +03001687 raise 1.0/0.0
1688 __hash__ = str.__hash__
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001689
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001690class BadElementPathTest(unittest.TestCase):
1691 def setUp(self):
1692 super(BadElementPathTest, self).setUp()
1693 from xml.etree import ElementPath
1694 self.path_cache = ElementPath._cache
1695 ElementPath._cache = {}
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001696
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001697 def tearDown(self):
1698 from xml.etree import ElementPath
1699 ElementPath._cache = self.path_cache
1700 super(BadElementPathTest, self).tearDown()
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001701
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001702 def test_find_with_mutating(self):
1703 e = ET.Element('foo')
1704 e.extend([ET.Element('bar')])
1705 e.find(MutatingElementPath(e, 'x'))
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001706
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001707 def test_find_with_error(self):
1708 e = ET.Element('foo')
1709 e.extend([ET.Element('bar')])
1710 try:
1711 e.find(BadElementPath('x'))
1712 except ZeroDivisionError:
1713 pass
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001714
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001715 def test_findtext_with_mutating(self):
1716 e = ET.Element('foo')
1717 e.extend([ET.Element('bar')])
1718 e.findtext(MutatingElementPath(e, 'x'))
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001719
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001720 def test_findtext_with_error(self):
1721 e = ET.Element('foo')
1722 e.extend([ET.Element('bar')])
1723 try:
1724 e.findtext(BadElementPath('x'))
1725 except ZeroDivisionError:
1726 pass
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001727
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001728 def test_findall_with_mutating(self):
1729 e = ET.Element('foo')
1730 e.extend([ET.Element('bar')])
1731 e.findall(MutatingElementPath(e, 'x'))
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001732
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001733 def test_findall_with_error(self):
1734 e = ET.Element('foo')
1735 e.extend([ET.Element('bar')])
1736 try:
1737 e.findall(BadElementPath('x'))
1738 except ZeroDivisionError:
1739 pass
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001740
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001741
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001742class ElementTreeTypeTest(unittest.TestCase):
1743 def test_istype(self):
1744 self.assertIsInstance(ET.ParseError, type)
1745 self.assertIsInstance(ET.QName, type)
1746 self.assertIsInstance(ET.ElementTree, type)
1747 if ET is pyET:
1748 self.assertIsInstance(ET.Element, type)
1749 self.assertIsInstance(ET.TreeBuilder, type)
1750 self.assertIsInstance(ET.XMLParser, type)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001751
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001752 @python_only
1753 def test_Element_subclass_trivial(self):
1754 class MyElement(ET.Element):
1755 pass
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001756
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001757 mye = MyElement('foo')
1758 self.assertIsInstance(mye, ET.Element)
1759 self.assertIsInstance(mye, MyElement)
1760 self.assertEqual(mye.tag, 'foo')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001761
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001762 # test that attribute assignment works (issue 14849)
1763 mye.text = "joe"
1764 self.assertEqual(mye.text, "joe")
Florent Xicluna3e8c1892010-03-11 14:36:19 +00001765
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001766 @python_only
1767 def test_Element_subclass_constructor(self):
1768 class MyElement(ET.Element):
1769 def __init__(self, tag, attrib={}, **extra):
1770 super(MyElement, self).__init__(tag + '__', attrib, **extra)
Christian Heimes20d46692013-07-05 01:41:30 +02001771
Serhiy Storchaka68903b62017-04-02 16:55:43 +03001772 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
1773 self.assertEqual(mye.tag, 'foo__')
1774 self.assertEqual(sorted(mye.items()),
1775 [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
1776
1777 @python_only
1778 def test_Element_subclass_new_method(self):
1779 class MyElement(ET.Element):
1780 def newmethod(self):
1781 return self.tag
1782
1783 mye = MyElement('joe')
1784 self.assertEqual(mye.newmethod(), 'joe')
1785
1786
1787class ElementFindTest(unittest.TestCase):
1788 @python_only
1789 def test_simplefind(self):
1790 ET.ElementPath
1791 with swap_attr(ET, 'ElementPath', ET._SimpleElementPath()):
1792 e = ET.XML(SAMPLE_XML)
1793 self.assertEqual(e.find('tag').tag, 'tag')
1794 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
1795 self.assertEqual(e.findtext('tag'), 'text')
1796 self.assertIsNone(e.findtext('tog'))
1797 self.assertEqual(e.findtext('tog', 'default'), 'default')
1798 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
1799 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
1800 self.assertEqual(summarize_list(e.findall('.//tag')), ['tag', 'tag', 'tag'])
1801
1802 # Path syntax doesn't work in this case.
1803 self.assertIsNone(e.find('section/tag'))
1804 self.assertIsNone(e.findtext('section/tag'))
1805 self.assertEqual(summarize_list(e.findall('section/tag')), [])
1806
1807 def test_find_simple(self):
1808 e = ET.XML(SAMPLE_XML)
1809 self.assertEqual(e.find('tag').tag, 'tag')
1810 self.assertEqual(e.find('section/tag').tag, 'tag')
1811 self.assertEqual(e.find('./tag').tag, 'tag')
1812
1813 e[2] = ET.XML(SAMPLE_SECTION)
1814 self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
1815
1816 self.assertEqual(e.findtext('./tag'), 'text')
1817 self.assertEqual(e.findtext('section/tag'), 'subtext')
1818
1819 # section/nexttag is found but has no text
1820 self.assertEqual(e.findtext('section/nexttag'), '')
1821 self.assertEqual(e.findtext('section/nexttag', 'default'), '')
1822
1823 # tog doesn't exist and 'default' kicks in
1824 self.assertIsNone(e.findtext('tog'))
1825 self.assertEqual(e.findtext('tog', 'default'), 'default')
1826
1827 # Issue #16922
1828 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
1829
1830 def test_find_xpath(self):
1831 LINEAR_XML = '''
1832 <body>
1833 <tag class='a'/>
1834 <tag class='b'/>
1835 <tag class='c'/>
1836 <tag class='d'/>
1837 </body>'''
1838 e = ET.XML(LINEAR_XML)
1839
1840 # Test for numeric indexing and last()
1841 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
1842 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
1843 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
1844 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
1845 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
1846
1847 def test_findall(self):
1848 e = ET.XML(SAMPLE_XML)
1849 e[2] = ET.XML(SAMPLE_SECTION)
1850 self.assertEqual(summarize_list(e.findall('.')), ['body'])
1851 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
1852 self.assertEqual(summarize_list(e.findall('tog')), [])
1853 self.assertEqual(summarize_list(e.findall('tog/foo')), [])
1854 self.assertEqual(summarize_list(e.findall('*')),
1855 ['tag', 'tag', 'section'])
1856 self.assertEqual(summarize_list(e.findall('.//tag')),
1857 ['tag'] * 4)
1858 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
1859 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
1860 self.assertEqual(summarize_list(e.findall('section/*')),
1861 ['tag', 'nexttag', 'nextsection'])
1862 self.assertEqual(summarize_list(e.findall('section//*')),
1863 ['tag', 'nexttag', 'nextsection', 'tag'])
1864 self.assertEqual(summarize_list(e.findall('section/.//*')),
1865 ['tag', 'nexttag', 'nextsection', 'tag'])
1866 self.assertEqual(summarize_list(e.findall('*/*')),
1867 ['tag', 'nexttag', 'nextsection'])
1868 self.assertEqual(summarize_list(e.findall('*//*')),
1869 ['tag', 'nexttag', 'nextsection', 'tag'])
1870 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
1871 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
1872 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
1873 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
1874
1875 self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
1876 ['tag'] * 3)
1877 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
1878 ['tag'])
1879 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
1880 ['tag'] * 2)
1881 self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
1882 ['tag'])
1883 self.assertEqual(summarize_list(e.findall('.//section[tag]')),
1884 ['section'])
1885 self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
1886 self.assertEqual(summarize_list(e.findall('../tag')), [])
1887 self.assertEqual(summarize_list(e.findall('section/../tag')),
1888 ['tag'] * 2)
1889 self.assertEqual(e.findall('section//'), e.findall('section//*'))
1890
1891 def test_test_find_with_ns(self):
1892 e = ET.XML(SAMPLE_XML_NS)
1893 self.assertEqual(summarize_list(e.findall('tag')), [])
1894 self.assertEqual(
1895 summarize_list(e.findall("{http://effbot.org/ns}tag")),
1896 ['{http://effbot.org/ns}tag'] * 2)
1897 self.assertEqual(
1898 summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
1899 ['{http://effbot.org/ns}tag'] * 3)
1900
1901 def test_bad_find(self):
1902 e = ET.XML(SAMPLE_XML)
1903 with self.assertRaisesRegexp(SyntaxError,
1904 'cannot use absolute path on element'):
1905 e.findall('/tag')
1906
1907 def test_find_through_ElementTree(self):
1908 e = ET.XML(SAMPLE_XML)
1909 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
1910 self.assertEqual(ET.ElementTree(e).find('./tag').tag, 'tag')
1911 self.assertEqual(ET.ElementTree(e).find('/tag').tag, 'tag')
1912 e[2] = ET.XML(SAMPLE_SECTION)
1913 self.assertEqual(ET.ElementTree(e).find('section/tag').tag, 'tag')
1914 self.assertIsNone(ET.ElementTree(e).find('tog'))
1915 self.assertIsNone(ET.ElementTree(e).find('tog/foo'))
1916
1917 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
1918 self.assertIsNone(ET.ElementTree(e).findtext('tog/foo'))
1919 self.assertEqual(ET.ElementTree(e).findtext('tog/foo', 'default'),
1920 'default')
1921 self.assertEqual(ET.ElementTree(e).findtext('./tag'), 'text')
1922 self.assertEqual(ET.ElementTree(e).findtext('/tag'), 'text')
1923 self.assertEqual(ET.ElementTree(e).findtext('section/tag'), 'subtext')
1924
1925 self.assertEqual(summarize_list(ET.ElementTree(e).findall('./tag')),
1926 ['tag'] * 2)
1927 # this produces a warning
1928 self.assertEqual(summarize_list(ET.ElementTree(e).findall('/tag')),
1929 ['tag'] * 2)
1930
1931
1932class ElementIterTest(unittest.TestCase):
1933 def _ilist(self, elem, tag=None):
1934 return summarize_list(elem.iter(tag))
1935
1936 def test_basic(self):
1937 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
1938 self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
1939 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
1940 self.assertEqual(next(doc.iter()).tag, 'html')
1941 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
1942 self.assertEqual(''.join(doc.find('body').itertext()),
1943 'this is a paragraph.')
1944 self.assertEqual(next(doc.itertext()), 'this is a ')
1945
1946 # Method iterparse should return an iterator. See bug 6472.
1947 sourcefile = serialize(doc, to_string=False)
1948 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
1949
1950 if ET is pyET:
1951 # With an explitit parser too (issue #9708)
1952 sourcefile = serialize(doc, to_string=False)
1953 parser = ET.XMLParser(target=ET.TreeBuilder())
1954 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
1955 'end')
1956
1957 tree = ET.ElementTree(None)
1958 self.assertRaises(AttributeError, tree.iter)
1959
1960 # Issue #16913
1961 doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
1962 self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
1963
1964 def test_corners(self):
1965 # single root, no subelements
1966 a = ET.Element('a')
1967 self.assertEqual(self._ilist(a), ['a'])
1968
1969 # one child
1970 b = ET.SubElement(a, 'b')
1971 self.assertEqual(self._ilist(a), ['a', 'b'])
1972
1973 # one child and one grandchild
1974 c = ET.SubElement(b, 'c')
1975 self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
1976
1977 # two children, only first with grandchild
1978 d = ET.SubElement(a, 'd')
1979 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
1980
1981 # replace first child by second
1982 a[0] = a[1]
1983 del a[1]
1984 self.assertEqual(self._ilist(a), ['a', 'd'])
1985
1986 def test_iter_by_tag(self):
1987 doc = ET.XML('''
1988 <document>
1989 <house>
1990 <room>bedroom1</room>
1991 <room>bedroom2</room>
1992 </house>
1993 <shed>nothing here
1994 </shed>
1995 <house>
1996 <room>bedroom8</room>
1997 </house>
1998 </document>''')
1999
2000 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
2001 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
2002
2003 if ET is pyET:
2004 # test that iter also accepts 'tag' as a keyword arg
2005 self.assertEqual(
2006 summarize_list(doc.iter(tag='room')),
2007 ['room'] * 3)
2008
2009 # make sure both tag=None and tag='*' return all tags
2010 all_tags = ['document', 'house', 'room', 'room',
2011 'shed', 'house', 'room']
2012 self.assertEqual(summarize_list(doc.iter()), all_tags)
2013 self.assertEqual(self._ilist(doc), all_tags)
2014 self.assertEqual(self._ilist(doc, '*'), all_tags)
2015
2016 def test_getiterator(self):
2017 doc = ET.XML('''
2018 <document>
2019 <house>
2020 <room>bedroom1</room>
2021 <room>bedroom2</room>
2022 </house>
2023 <shed>nothing here
2024 </shed>
2025 <house>
2026 <room>bedroom8</room>
2027 </house>
2028 </document>''')
2029
2030 self.assertEqual(summarize_list(doc.getiterator('room')),
2031 ['room'] * 3)
2032 self.assertEqual(summarize_list(doc.getiterator('house')),
2033 ['house'] * 2)
2034
2035 if ET is pyET:
2036 # test that getiterator also accepts 'tag' as a keyword arg
2037 self.assertEqual(
2038 summarize_list(doc.getiterator(tag='room')),
2039 ['room'] * 3)
2040
2041 # make sure both tag=None and tag='*' return all tags
2042 all_tags = ['document', 'house', 'room', 'room',
2043 'shed', 'house', 'room']
2044 self.assertEqual(summarize_list(doc.getiterator()), all_tags)
2045 self.assertEqual(summarize_list(doc.getiterator(None)), all_tags)
2046 self.assertEqual(summarize_list(doc.getiterator('*')), all_tags)
2047
2048 def test_copy(self):
2049 a = ET.Element('a')
2050 it = a.iter()
2051 with self.assertRaises(TypeError):
2052 copy.copy(it)
2053
2054 def test_pickle(self):
2055 a = ET.Element('a')
2056 it = a.iter()
2057 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2058 with self.assertRaises((TypeError, pickle.PicklingError)):
2059 pickle.dumps(it, proto)
2060
2061
2062class TreeBuilderTest(unittest.TestCase):
2063 sample1 = ('<!DOCTYPE html PUBLIC'
2064 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2065 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2066 '<html>text<div>subtext</div>tail</html>')
2067
2068 sample2 = '''<toplevel>sometext</toplevel>'''
2069
2070 def _check_sample1_element(self, e):
2071 self.assertEqual(e.tag, 'html')
2072 self.assertEqual(e.text, 'text')
2073 self.assertEqual(e.tail, None)
2074 self.assertEqual(e.attrib, {})
2075 children = list(e)
2076 self.assertEqual(len(children), 1)
2077 child = children[0]
2078 self.assertEqual(child.tag, 'div')
2079 self.assertEqual(child.text, 'subtext')
2080 self.assertEqual(child.tail, 'tail')
2081 self.assertEqual(child.attrib, {})
2082
2083 def test_dummy_builder(self):
2084 class DummyBuilder:
2085 data = start = end = lambda *a: None
2086
2087 def close(self):
2088 return 42
2089
2090 parser = ET.XMLParser(target=DummyBuilder())
2091 parser.feed(self.sample1)
2092 self.assertEqual(parser.close(), 42)
2093
2094 @python_only
2095 def test_treebuilder_elementfactory_none(self):
2096 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
2097 parser.feed(self.sample1)
2098 e = parser.close()
2099 self._check_sample1_element(e)
2100
2101 @python_only
2102 def test_subclass(self):
2103 class MyTreeBuilder(ET.TreeBuilder):
2104 def foobar(self, x):
2105 return x * 2
2106
2107 tb = MyTreeBuilder()
2108 self.assertEqual(tb.foobar(10), 20)
2109
2110 parser = ET.XMLParser(target=tb)
2111 parser.feed(self.sample1)
2112
2113 e = parser.close()
2114 self._check_sample1_element(e)
2115
2116 @python_only
2117 def test_element_factory(self):
2118 lst = []
2119 def myfactory(tag, attrib):
2120 lst.append(tag)
2121 return ET.Element(tag, attrib)
2122
2123 tb = ET.TreeBuilder(element_factory=myfactory)
2124 parser = ET.XMLParser(target=tb)
2125 parser.feed(self.sample2)
2126 parser.close()
2127
2128 self.assertEqual(lst, ['toplevel'])
2129
2130 @python_only
2131 def test_element_factory_subclass(self):
2132 class MyElement(ET.Element):
2133 pass
2134
2135 tb = ET.TreeBuilder(element_factory=MyElement)
2136
2137 parser = ET.XMLParser(target=tb)
2138 parser.feed(self.sample1)
2139 e = parser.close()
2140 self.assertIsInstance(e, MyElement)
2141 self._check_sample1_element(e)
2142
2143
2144 @python_only
2145 def test_doctype(self):
2146 class DoctypeParser:
2147 _doctype = None
2148
2149 def doctype(self, name, pubid, system):
2150 self._doctype = (name, pubid, system)
2151
2152 data = start = end = lambda *a: None
2153
2154 def close(self):
2155 return self._doctype
2156
2157 parser = ET.XMLParser(target=DoctypeParser())
2158 parser.feed(self.sample1)
2159
2160 self.assertEqual(parser.close(),
2161 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2162 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2163
2164
2165class XMLParserTest(unittest.TestCase):
2166 sample1 = b'<file><line>22</line></file>'
2167 sample2 = (b'<!DOCTYPE html PUBLIC'
2168 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2169 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2170 b'<html>text</html>')
2171
2172 def _check_sample_element(self, e):
2173 self.assertEqual(e.tag, 'file')
2174 self.assertEqual(e[0].tag, 'line')
2175 self.assertEqual(e[0].text, '22')
2176
2177 @python_only
2178 def test_constructor_args(self):
2179 # Positional args. The first (html) is not supported, but should be
2180 # nevertheless correctly accepted.
2181 parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8')
2182 parser.feed(self.sample1)
2183 self._check_sample_element(parser.close())
2184
2185 # Now as keyword args.
2186 parser2 = ET.XMLParser(encoding='utf-8',
2187 html=[{}],
2188 target=ET.TreeBuilder())
2189 parser2.feed(self.sample1)
2190 self._check_sample_element(parser2.close())
2191
2192 @python_only
2193 def test_subclass(self):
2194 class MyParser(ET.XMLParser):
2195 pass
2196 parser = MyParser()
2197 parser.feed(self.sample1)
2198 self._check_sample_element(parser.close())
2199
2200 @python_only
2201 def test_doctype_warning(self):
2202 parser = ET.XMLParser()
2203 with support.check_warnings(('', DeprecationWarning)):
2204 parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2205 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')
2206 parser.feed('<html/>')
2207 parser.close()
2208
2209 @python_only
2210 def test_subclass_doctype(self):
2211 _doctype = []
2212 class MyParserWithDoctype(ET.XMLParser):
2213 def doctype(self, name, pubid, system):
2214 _doctype.append((name, pubid, system))
2215
2216 parser = MyParserWithDoctype()
2217 with support.check_warnings(('', DeprecationWarning)):
2218 parser.feed(self.sample2)
2219 parser.close()
2220 self.assertEqual(_doctype,
2221 [('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2222 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')])
2223
2224 _doctype = []
2225 _doctype2 = []
2226 with warnings.catch_warnings():
2227 warnings.simplefilter('error', DeprecationWarning)
2228 class DoctypeParser:
2229 data = start = end = close = lambda *a: None
2230
2231 def doctype(self, name, pubid, system):
2232 _doctype2.append((name, pubid, system))
2233
2234 parser = MyParserWithDoctype(target=DoctypeParser())
2235 parser.feed(self.sample2)
2236 parser.close()
2237 self.assertEqual(_doctype, [])
2238 self.assertEqual(_doctype2,
2239 [('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2240 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')])
2241
2242
2243class NamespaceParseTest(unittest.TestCase):
2244 def test_find_with_namespace(self):
2245 nsmap = {'h': 'hello', 'f': 'foo'}
2246 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
2247
2248 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
2249 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
2250 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
2251
2252
2253class ElementSlicingTest(unittest.TestCase):
2254 def _elem_tags(self, elemlist):
2255 return [e.tag for e in elemlist]
2256
2257 def _subelem_tags(self, elem):
2258 return self._elem_tags(list(elem))
2259
2260 def _make_elem_with_children(self, numchildren):
2261 """Create an Element with a tag 'a', with the given amount of children
2262 named 'a0', 'a1' ... and so on.
2263
2264 """
2265 e = ET.Element('a')
2266 for i in range(numchildren):
2267 ET.SubElement(e, 'a%s' % i)
2268 return e
2269
2270 def test_getslice_single_index(self):
2271 e = self._make_elem_with_children(10)
2272
2273 self.assertEqual(e[1].tag, 'a1')
2274 self.assertEqual(e[-2].tag, 'a8')
2275
2276 self.assertRaises(IndexError, lambda: e[12])
2277 self.assertRaises(IndexError, lambda: e[-12])
2278
2279 def test_getslice_range(self):
2280 e = self._make_elem_with_children(6)
2281
2282 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
2283 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
2284 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
2285 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
2286 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
2287 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
2288
2289 def test_getslice_steps(self):
2290 e = self._make_elem_with_children(10)
2291
2292 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
2293 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
2294 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
2295 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
2296 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
2297 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
2298
2299 def test_getslice_negative_steps(self):
2300 e = self._make_elem_with_children(4)
2301
2302 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
2303 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
2304 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
2305 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
2306 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
2307
2308 def test_delslice(self):
2309 e = self._make_elem_with_children(4)
2310 del e[0:2]
2311 self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
2312
2313 e = self._make_elem_with_children(4)
2314 del e[0:]
2315 self.assertEqual(self._subelem_tags(e), [])
2316
2317 if ET is pyET:
2318 e = self._make_elem_with_children(4)
2319 del e[::-1]
2320 self.assertEqual(self._subelem_tags(e), [])
2321
2322 e = self._make_elem_with_children(4)
2323 del e[::-2]
2324 self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2325
2326 e = self._make_elem_with_children(4)
2327 del e[1::2]
2328 self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2329
2330 e = self._make_elem_with_children(2)
2331 del e[::2]
2332 self.assertEqual(self._subelem_tags(e), ['a1'])
2333
2334 def test_setslice_single_index(self):
2335 e = self._make_elem_with_children(4)
2336 e[1] = ET.Element('b')
2337 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2338
2339 e[-2] = ET.Element('c')
2340 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
2341
2342 with self.assertRaises(IndexError):
2343 e[5] = ET.Element('d')
2344 with self.assertRaises(IndexError):
2345 e[-5] = ET.Element('d')
2346 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
2347
2348 def test_setslice_range(self):
2349 e = self._make_elem_with_children(4)
2350 e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
2351 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
2352
2353 e = self._make_elem_with_children(4)
2354 e[1:3] = [ET.Element('b')]
2355 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
2356
2357 e = self._make_elem_with_children(4)
2358 e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
2359 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
2360
2361 def test_setslice_steps(self):
2362 e = self._make_elem_with_children(6)
2363 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
2364 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
2365
2366 e = self._make_elem_with_children(6)
2367 with self.assertRaises(ValueError):
2368 e[1:5:2] = [ET.Element('b')]
2369 with self.assertRaises(ValueError):
2370 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
2371 with self.assertRaises(ValueError):
2372 e[1:5:2] = []
2373 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
2374
2375 e = self._make_elem_with_children(4)
2376 e[1::sys.maxsize] = [ET.Element('b')]
2377 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2378 e[1::sys.maxsize<<64] = [ET.Element('c')]
2379 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
2380
2381 def test_setslice_negative_steps(self):
2382 e = self._make_elem_with_children(4)
2383 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
2384 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
2385
2386 e = self._make_elem_with_children(4)
2387 with self.assertRaises(ValueError):
2388 e[2:0:-1] = [ET.Element('b')]
2389 with self.assertRaises(ValueError):
2390 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
2391 with self.assertRaises(ValueError):
2392 e[2:0:-1] = []
2393 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
2394
2395 e = self._make_elem_with_children(4)
2396 e[1::-sys.maxsize] = [ET.Element('b')]
2397 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2398 e[1::-sys.maxsize-1] = [ET.Element('c')]
2399 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
2400 e[1::-sys.maxsize<<64] = [ET.Element('d')]
2401 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
2402
2403
2404class IOTest(unittest.TestCase):
2405 def tearDown(self):
2406 support.unlink(TESTFN)
2407
2408 def test_encoding(self):
2409 # Test encoding issues.
2410 elem = ET.Element("tag")
2411 elem.text = u"abc"
2412 self.assertEqual(serialize(elem), '<tag>abc</tag>')
2413 self.assertEqual(serialize(elem, encoding="utf-8"),
2414 '<tag>abc</tag>')
2415 self.assertEqual(serialize(elem, encoding="us-ascii"),
2416 '<tag>abc</tag>')
2417 self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2418 "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2419 "<tag>abc</tag>")
2420
2421 elem = ET.Element("tag")
2422 elem.text = "<&\"\'>"
2423 self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
2424 self.assertEqual(serialize(elem, encoding="utf-8"),
2425 b'<tag>&lt;&amp;"\'&gt;</tag>')
2426 self.assertEqual(serialize(elem, encoding="us-ascii"),
2427 b'<tag>&lt;&amp;"\'&gt;</tag>')
2428 self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2429 "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2430 "<tag>&lt;&amp;\"'&gt;</tag>")
2431
2432 elem = ET.Element("tag")
2433 elem.attrib["key"] = "<&\"\'>"
2434 self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
2435 self.assertEqual(serialize(elem, encoding="utf-8"),
2436 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
2437 self.assertEqual(serialize(elem, encoding="us-ascii"),
2438 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
2439 self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2440 "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2441 "<tag key=\"&lt;&amp;&quot;'&gt;\" />")
2442
2443 elem = ET.Element("tag")
2444 elem.text = u'\xe5\xf6\xf6<>'
2445 self.assertEqual(serialize(elem),
2446 '<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
2447 self.assertEqual(serialize(elem, encoding="utf-8"),
2448 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
2449 self.assertEqual(serialize(elem, encoding="us-ascii"),
2450 '<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
2451 self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2452 "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2453 "<tag>\xe5\xf6\xf6&lt;&gt;</tag>")
2454
2455 elem = ET.Element("tag")
2456 elem.attrib["key"] = u'\xe5\xf6\xf6<>'
2457 self.assertEqual(serialize(elem),
2458 '<tag key="&#229;&#246;&#246;&lt;&gt;" />')
2459 self.assertEqual(serialize(elem, encoding="utf-8"),
2460 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
2461 self.assertEqual(serialize(elem, encoding="us-ascii"),
2462 '<tag key="&#229;&#246;&#246;&lt;&gt;" />')
2463 self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2464 "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2465 "<tag key=\"\xe5\xf6\xf6&lt;&gt;\" />")
2466
2467 def test_write_to_filename(self):
2468 tree = ET.ElementTree(ET.XML('''<site />'''))
2469 tree.write(TESTFN)
2470 with open(TESTFN, 'rb') as f:
2471 self.assertEqual(f.read(), b'''<site />''')
2472
2473 def test_write_to_file(self):
2474 tree = ET.ElementTree(ET.XML('''<site />'''))
2475 with open(TESTFN, 'wb') as f:
2476 tree.write(f)
2477 self.assertFalse(f.closed)
2478 with open(TESTFN, 'rb') as f:
2479 self.assertEqual(f.read(), b'''<site />''')
2480
2481 def test_read_from_stringio(self):
2482 tree = ET.ElementTree()
2483 stream = StringIO.StringIO('''<?xml version="1.0"?><site></site>''')
2484 tree.parse(stream)
2485 self.assertEqual(tree.getroot().tag, 'site')
2486
2487 def test_write_to_stringio(self):
2488 tree = ET.ElementTree(ET.XML('''<site />'''))
2489 stream = StringIO.StringIO()
2490 tree.write(stream)
2491 self.assertEqual(stream.getvalue(), '''<site />''')
2492
2493 class dummy:
2494 pass
2495
2496 def test_read_from_user_reader(self):
2497 stream = StringIO.StringIO('''<?xml version="1.0"?><site></site>''')
2498 reader = self.dummy()
2499 reader.read = stream.read
2500 tree = ET.ElementTree()
2501 tree.parse(reader)
2502 self.assertEqual(tree.getroot().tag, 'site')
2503
2504 def test_write_to_user_writer(self):
2505 tree = ET.ElementTree(ET.XML('''<site />'''))
2506 stream = StringIO.StringIO()
2507 writer = self.dummy()
2508 writer.write = stream.write
2509 tree.write(writer)
2510 self.assertEqual(stream.getvalue(), '''<site />''')
2511
2512 def test_tostringlist_invariant(self):
2513 root = ET.fromstring('<tag>foo</tag>')
2514 self.assertEqual(
2515 ET.tostring(root),
2516 ''.join(ET.tostringlist(root)))
2517 self.assertEqual(
2518 ET.tostring(root, 'utf-16'),
2519 b''.join(ET.tostringlist(root, 'utf-16')))
2520
2521
2522class ParseErrorTest(unittest.TestCase):
2523 def test_subclass(self):
2524 self.assertIsInstance(ET.ParseError(), SyntaxError)
2525
2526 def _get_error(self, s):
2527 try:
2528 ET.fromstring(s)
2529 except ET.ParseError as e:
2530 return e
2531
2532 def test_error_position(self):
2533 self.assertEqual(self._get_error('foo').position, (1, 0))
2534 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
2535 self.assertEqual(self._get_error('foobar<').position, (1, 6))
2536
2537 @python_only
2538 def test_error_code(self):
2539 from xml.parsers import expat
2540 self.assertEqual(expat.ErrorString(self._get_error('foo').code),
2541 expat.errors.XML_ERROR_SYNTAX)
2542
2543
2544class KeywordArgsTest(unittest.TestCase):
2545 # Test various issues with keyword arguments passed to ET.Element
2546 # constructor and methods
2547 def test_issue14818(self):
2548 x = ET.XML("<a>foo</a>")
2549 self.assertEqual(x.find('a', None),
2550 x.find(path='a', namespaces=None))
2551 self.assertEqual(x.findtext('a', None, None),
2552 x.findtext(path='a', default=None, namespaces=None))
2553 self.assertEqual(x.findall('a', None),
2554 x.findall(path='a', namespaces=None))
2555 self.assertEqual(list(x.iterfind('a', None)),
2556 list(x.iterfind(path='a', namespaces=None)))
2557
2558 self.assertEqual(ET.Element('a').attrib, {})
2559 elements = [
2560 ET.Element('a', dict(href="#", id="foo")),
2561 ET.Element('a', attrib=dict(href="#", id="foo")),
2562 ET.Element('a', dict(href="#"), id="foo"),
2563 ET.Element('a', href="#", id="foo"),
2564 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
2565 ]
2566 for e in elements:
2567 self.assertEqual(e.tag, 'a')
2568 self.assertEqual(e.attrib, dict(href="#", id="foo"))
2569
2570 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
2571 self.assertEqual(e2.attrib['key1'], 'value1')
2572
2573 with self.assertRaisesRegexp(TypeError, 'must be dict, not str'):
2574 ET.Element('a', "I'm not a dict")
2575 with self.assertRaisesRegexp(TypeError, 'must be dict, not str'):
2576 ET.Element('a', attrib="I'm not a dict")
Christian Heimes20d46692013-07-05 01:41:30 +02002577
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002578# --------------------------------------------------------------------
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002579
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002580class NoAcceleratorTest(unittest.TestCase):
2581 def setUp(self):
2582 if ET is not pyET:
2583 raise unittest.SkipTest('only for the Python version')
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002584
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002585 # Test that the C accelerator was not imported for pyET
2586 def test_correct_import_pyET(self):
2587 # The type of methods defined in Python code is types.FunctionType,
2588 # while the type of methods defined inside _elementtree is
2589 # <class 'wrapper_descriptor'>
2590 self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
2591 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
Ezio Melotti6d6fb3a2012-09-19 08:11:03 +03002592
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002593# --------------------------------------------------------------------
2594
2595
2596class CleanContext(object):
Florent Xicluna1b51c3d2010-03-13 12:41:48 +00002597 """Provide default namespace mapping and path cache."""
Florent Xicluna26cc99d2010-03-31 21:21:54 +00002598 checkwarnings = None
2599
2600 def __init__(self, quiet=False):
2601 deprecations = (
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002602 ("This method of XMLParser is deprecated. Define doctype\(\) "
2603 "method on the TreeBuilder target.", DeprecationWarning),
Florent Xicluna26cc99d2010-03-31 21:21:54 +00002604 # Search behaviour is broken if search path starts with "/".
2605 ("This search is broken in 1.3 and earlier, and will be fixed "
2606 "in a future version. If you rely on the current behaviour, "
2607 "change it to '.+'", FutureWarning),
2608 # Element.getchildren() and Element.getiterator() are deprecated.
2609 ("This method will be removed in future versions. "
2610 "Use .+ instead.", DeprecationWarning),
2611 ("This method will be removed in future versions. "
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002612 "Use .+ instead.", PendingDeprecationWarning))
2613 self.checkwarnings = support.check_warnings(*deprecations, quiet=quiet)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002614
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002615 def __enter__(self):
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002616 from xml.etree import ElementPath
2617 self._nsmap = pyET._namespace_map
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002618 # Copy the default namespace mapping
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002619 self._nsmap_copy = self._nsmap.copy()
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002620 # Copy the path cache (should be empty)
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002621 self._path_cache = ElementPath._cache
2622 ElementPath._cache = self._path_cache.copy()
Florent Xicluna26cc99d2010-03-31 21:21:54 +00002623 self.checkwarnings.__enter__()
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002624
2625 def __exit__(self, *args):
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002626 from xml.etree import ElementPath
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002627 # Restore mapping and path cache
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002628 self._nsmap.clear()
2629 self._nsmap.update(self._nsmap_copy)
2630 ElementPath._cache = self._path_cache
Florent Xicluna26cc99d2010-03-31 21:21:54 +00002631 self.checkwarnings.__exit__(*args)
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002632
2633
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002634def test_main(module=None):
2635 # When invoked without a module, runs the Python ET tests by loading pyET.
2636 # Otherwise, uses the given module as the ET.
2637 if module is None:
2638 module = pyET
Florent Xicluna26cc99d2010-03-31 21:21:54 +00002639
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002640 global ET
2641 ET = module
Florent Xicluna13ba1a12010-03-13 11:18:49 +00002642
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002643 test_classes = [
2644 ModuleTest,
2645 ElementSlicingTest,
2646 BasicElementTest,
2647 BadElementTest,
2648 BadElementPathTest,
2649 ElementTreeTest,
2650 IOTest,
2651 ParseErrorTest,
2652 XIncludeTest,
2653 ElementTreeTypeTest,
2654 ElementFindTest,
2655 ElementIterTest,
2656 TreeBuilderTest,
2657 XMLParserTest,
2658 BugsTest,
2659 ]
Florent Xicluna13ba1a12010-03-13 11:18:49 +00002660
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002661 # These tests will only run for the pure-Python version that doesn't import
2662 # _elementtree. We can't use skipUnless here, because pyET is filled in only
2663 # after the module is loaded.
2664 if pyET is not ET:
2665 test_classes.extend([
2666 NoAcceleratorTest,
2667 ])
Florent Xicluna3e8c1892010-03-11 14:36:19 +00002668
Serhiy Storchaka68903b62017-04-02 16:55:43 +03002669 try:
2670 # XXX the C module should give the same warnings as the Python module
2671 with CleanContext(quiet=(pyET is not ET)):
2672 support.run_unittest(*test_classes)
2673 finally:
2674 # don't interfere with subsequent tests
2675 ET = None
2676
Armin Rigo9ed73062005-12-14 18:10:45 +00002677
2678if __name__ == '__main__':
2679 test_main()