blob: d87924513cfa60700cde3204d876962a455a5354 [file] [log] [blame]
Eli Bendersky865756a2012-03-09 13:38:15 +02001# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2# to ensure consistency between the C implementation and the Python
3# implementation.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004#
5# For this purpose, the module-level "ET" symbol is temporarily
6# monkey-patched when running the "test_xml_etree_c" test suite.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00007
Serhiy Storchakad7a44152015-11-12 11:23:04 +02008import copy
Georg Brandl1f7fffb2010-10-15 15:57:45 +00009import html
Eli Benderskyf996e772012-03-16 05:53:30 +020010import io
Eli Bendersky698bdb22013-01-10 06:01:06 -080011import operator
Eli Bendersky7ec45f72012-12-30 06:17:49 -080012import pickle
Eli Bendersky0192ba32012-03-30 16:38:33 +030013import sys
Eli Benderskye26fa1b2013-05-19 17:49:54 -070014import types
Victor Stinner6c6f8512010-08-07 10:09:35 +000015import unittest
Serhiy Storchaka05744ac2015-06-29 22:35:58 +030016import warnings
Eli Benderskya5e82202012-03-31 13:55:38 +030017import weakref
Armin Rigo9ed73062005-12-14 18:10:45 +000018
Eli Bendersky698bdb22013-01-10 06:01:06 -080019from itertools import product
Benjamin Petersonee8712c2008-05-20 21:35:26 +000020from test import support
Eli Bendersky23687042013-02-26 05:53:23 -080021from test.support import TESTFN, findfile, import_fresh_module, gc_collect
Armin Rigo9ed73062005-12-14 18:10:45 +000022
Eli Bendersky698bdb22013-01-10 06:01:06 -080023# pyET is the pure-Python implementation.
Eli Bendersky458c0d52013-01-10 06:07:00 -080024#
Eli Bendersky698bdb22013-01-10 06:01:06 -080025# ET is pyET in test_xml_etree and is the C accelerated version in
26# test_xml_etree_c.
Eli Bendersky64d11e62012-06-15 07:42:50 +030027pyET = None
28ET = None
Florent Xiclunaf15351d2010-03-13 23:24:31 +000029
30SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
Victor Stinner6c6f8512010-08-07 10:09:35 +000031try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +000032 SIMPLE_XMLFILE.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +000033except UnicodeEncodeError:
34 raise unittest.SkipTest("filename is not encodable to utf8")
Florent Xiclunaf15351d2010-03-13 23:24:31 +000035SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
36
37SAMPLE_XML = """\
Armin Rigo9ed73062005-12-14 18:10:45 +000038<body>
Florent Xiclunaf15351d2010-03-13 23:24:31 +000039 <tag class='a'>text</tag>
40 <tag class='b' />
Armin Rigo9ed73062005-12-14 18:10:45 +000041 <section>
Florent Xiclunaf15351d2010-03-13 23:24:31 +000042 <tag class='b' id='inner'>subtext</tag>
Armin Rigo9ed73062005-12-14 18:10:45 +000043 </section>
44</body>
45"""
46
Florent Xiclunaf15351d2010-03-13 23:24:31 +000047SAMPLE_SECTION = """\
48<section>
49 <tag class='b' id='inner'>subtext</tag>
50 <nexttag />
51 <nextsection>
52 <tag />
53 </nextsection>
54</section>
55"""
56
Armin Rigo9ed73062005-12-14 18:10:45 +000057SAMPLE_XML_NS = """
58<body xmlns="http://effbot.org/ns">
59 <tag>text</tag>
60 <tag />
61 <section>
62 <tag>subtext</tag>
63 </section>
64</body>
65"""
66
Eli Bendersky737b1732012-05-29 06:02:56 +030067SAMPLE_XML_NS_ELEMS = """
68<root>
69<h:table xmlns:h="hello">
70 <h:tr>
71 <h:td>Apples</h:td>
72 <h:td>Bananas</h:td>
73 </h:tr>
74</h:table>
75
76<f:table xmlns:f="foo">
77 <f:name>African Coffee Table</f:name>
78 <f:width>80</f:width>
79 <f:length>120</f:length>
80</f:table>
81</root>
82"""
Florent Xiclunaf15351d2010-03-13 23:24:31 +000083
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +020084ENTITY_XML = """\
85<!DOCTYPE points [
86<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
87%user-entities;
88]>
89<document>&entity;</document>
90"""
Armin Rigo9ed73062005-12-14 18:10:45 +000091
Armin Rigo9ed73062005-12-14 18:10:45 +000092
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +020093class ModuleTest(unittest.TestCase):
Eli Bendersky23687042013-02-26 05:53:23 -080094 # TODO: this should be removed once we get rid of the global module vars
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +020095
96 def test_sanity(self):
97 # Import sanity.
98
99 from xml.etree import ElementTree
100 from xml.etree import ElementInclude
101 from xml.etree import ElementPath
102
Armin Rigo9ed73062005-12-14 18:10:45 +0000103
Florent Xiclunac17f1722010-08-08 19:48:29 +0000104def serialize(elem, to_string=True, encoding='unicode', **options):
Florent Xiclunac17f1722010-08-08 19:48:29 +0000105 if encoding != 'unicode':
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000106 file = io.BytesIO()
107 else:
108 file = io.StringIO()
Armin Rigo9ed73062005-12-14 18:10:45 +0000109 tree = ET.ElementTree(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000110 tree.write(file, encoding=encoding, **options)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000111 if to_string:
112 return file.getvalue()
113 else:
114 file.seek(0)
115 return file
Armin Rigo9ed73062005-12-14 18:10:45 +0000116
Armin Rigo9ed73062005-12-14 18:10:45 +0000117def summarize_list(seq):
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200118 return [elem.tag for elem in seq]
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000119
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000120
Eli Bendersky698bdb22013-01-10 06:01:06 -0800121class ElementTestCase:
122 @classmethod
123 def setUpClass(cls):
124 cls.modules = {pyET, ET}
125
Serhiy Storchakabad12572014-12-15 14:03:42 +0200126 def pickleRoundTrip(self, obj, name, dumper, loader, proto):
Eli Bendersky698bdb22013-01-10 06:01:06 -0800127 save_m = sys.modules[name]
128 try:
129 sys.modules[name] = dumper
Serhiy Storchakabad12572014-12-15 14:03:42 +0200130 temp = pickle.dumps(obj, proto)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800131 sys.modules[name] = loader
132 result = pickle.loads(temp)
133 except pickle.PicklingError as pe:
134 # pyET must be second, because pyET may be (equal to) ET.
135 human = dict([(ET, "cET"), (pyET, "pyET")])
136 raise support.TestFailed("Failed to round-trip %r from %r to %r"
137 % (obj,
138 human.get(dumper, dumper),
139 human.get(loader, loader))) from pe
140 finally:
141 sys.modules[name] = save_m
142 return result
143
144 def assertEqualElements(self, alice, bob):
145 self.assertIsInstance(alice, (ET.Element, pyET.Element))
146 self.assertIsInstance(bob, (ET.Element, pyET.Element))
147 self.assertEqual(len(list(alice)), len(list(bob)))
148 for x, y in zip(alice, bob):
149 self.assertEqualElements(x, y)
150 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
151 self.assertEqual(properties(alice), properties(bob))
152
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000153# --------------------------------------------------------------------
154# element tree tests
Armin Rigo9ed73062005-12-14 18:10:45 +0000155
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200156class ElementTreeTest(unittest.TestCase):
Armin Rigo9ed73062005-12-14 18:10:45 +0000157
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200158 def serialize_check(self, elem, expected):
159 self.assertEqual(serialize(elem), expected)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000160
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200161 def test_interface(self):
162 # Test element tree interface.
Armin Rigo9ed73062005-12-14 18:10:45 +0000163
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200164 def check_string(string):
165 len(string)
166 for char in string:
167 self.assertEqual(len(char), 1,
168 msg="expected one-character string, got %r" % char)
169 new_string = string + ""
170 new_string = string + " "
171 string[:0]
Armin Rigo9ed73062005-12-14 18:10:45 +0000172
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200173 def check_mapping(mapping):
174 len(mapping)
175 keys = mapping.keys()
176 items = mapping.items()
177 for key in keys:
178 item = mapping[key]
179 mapping["key"] = "value"
180 self.assertEqual(mapping["key"], "value",
181 msg="expected value string, got %r" % mapping["key"])
Armin Rigo9ed73062005-12-14 18:10:45 +0000182
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200183 def check_element(element):
184 self.assertTrue(ET.iselement(element), msg="not an element")
185 self.assertTrue(hasattr(element, "tag"), msg="no tag member")
186 self.assertTrue(hasattr(element, "attrib"), msg="no attrib member")
187 self.assertTrue(hasattr(element, "text"), msg="no text member")
188 self.assertTrue(hasattr(element, "tail"), msg="no tail member")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000189
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200190 check_string(element.tag)
191 check_mapping(element.attrib)
192 if element.text is not None:
193 check_string(element.text)
194 if element.tail is not None:
195 check_string(element.tail)
196 for elem in element:
197 check_element(elem)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000198
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200199 element = ET.Element("tag")
200 check_element(element)
201 tree = ET.ElementTree(element)
202 check_element(tree.getroot())
203 element = ET.Element("t\xe4g", key="value")
204 tree = ET.ElementTree(element)
205 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
206 element = ET.Element("tag", key="value")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000207
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200208 # Make sure all standard element methods exist.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000209
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200210 def check_method(method):
211 self.assertTrue(hasattr(method, '__call__'),
212 msg="%s not callable" % method)
Armin Rigo9ed73062005-12-14 18:10:45 +0000213
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200214 check_method(element.append)
215 check_method(element.extend)
216 check_method(element.insert)
217 check_method(element.remove)
218 check_method(element.getchildren)
219 check_method(element.find)
220 check_method(element.iterfind)
221 check_method(element.findall)
222 check_method(element.findtext)
223 check_method(element.clear)
224 check_method(element.get)
225 check_method(element.set)
226 check_method(element.keys)
227 check_method(element.items)
228 check_method(element.iter)
229 check_method(element.itertext)
230 check_method(element.getiterator)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000231
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200232 # These methods return an iterable. See bug 6472.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000233
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200234 def check_iter(it):
235 check_method(it.__next__)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000236
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200237 check_iter(element.iterfind("tag"))
238 check_iter(element.iterfind("*"))
239 check_iter(tree.iterfind("tag"))
240 check_iter(tree.iterfind("*"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000241
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200242 # These aliases are provided:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000243
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200244 self.assertEqual(ET.XML, ET.fromstring)
245 self.assertEqual(ET.PI, ET.ProcessingInstruction)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000246
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200247 def test_simpleops(self):
248 # Basic method sanity checks.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000249
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200250 elem = ET.XML("<body><tag/></body>")
251 self.serialize_check(elem, '<body><tag /></body>')
252 e = ET.Element("tag2")
253 elem.append(e)
254 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
255 elem.remove(e)
256 self.serialize_check(elem, '<body><tag /></body>')
257 elem.insert(0, e)
258 self.serialize_check(elem, '<body><tag2 /><tag /></body>')
259 elem.remove(e)
260 elem.extend([e])
261 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
262 elem.remove(e)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000263
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200264 element = ET.Element("tag", key="value")
265 self.serialize_check(element, '<tag key="value" />') # 1
266 subelement = ET.Element("subtag")
267 element.append(subelement)
268 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
269 element.insert(0, subelement)
270 self.serialize_check(element,
271 '<tag key="value"><subtag /><subtag /></tag>') # 3
272 element.remove(subelement)
273 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
274 element.remove(subelement)
275 self.serialize_check(element, '<tag key="value" />') # 5
276 with self.assertRaises(ValueError) as cm:
277 element.remove(subelement)
278 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
279 self.serialize_check(element, '<tag key="value" />') # 6
280 element[0:0] = [subelement, subelement, subelement]
281 self.serialize_check(element[1], '<subtag />')
282 self.assertEqual(element[1:9], [element[1], element[2]])
283 self.assertEqual(element[:9:2], [element[0], element[2]])
284 del element[1:2]
285 self.serialize_check(element,
286 '<tag key="value"><subtag /><subtag /></tag>')
Florent Xiclunaa72a98f2012-02-13 11:03:30 +0100287
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200288 def test_cdata(self):
289 # Test CDATA handling (etc).
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000290
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200291 self.serialize_check(ET.XML("<tag>hello</tag>"),
292 '<tag>hello</tag>')
293 self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
294 '<tag>hello</tag>')
295 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
296 '<tag>hello</tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000297
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200298 def test_file_init(self):
299 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
300 tree = ET.ElementTree(file=stringfile)
301 self.assertEqual(tree.find("tag").tag, 'tag')
302 self.assertEqual(tree.find("section/tag").tag, 'tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000303
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200304 tree = ET.ElementTree(file=SIMPLE_XMLFILE)
305 self.assertEqual(tree.find("element").tag, 'element')
306 self.assertEqual(tree.find("element/../empty-element").tag,
307 'empty-element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000308
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200309 def test_path_cache(self):
310 # Check that the path cache behaves sanely.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000311
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200312 from xml.etree import ElementPath
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000313
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200314 elem = ET.XML(SAMPLE_XML)
315 for i in range(10): ET.ElementTree(elem).find('./'+str(i))
316 cache_len_10 = len(ElementPath._cache)
317 for i in range(10): ET.ElementTree(elem).find('./'+str(i))
318 self.assertEqual(len(ElementPath._cache), cache_len_10)
319 for i in range(20): ET.ElementTree(elem).find('./'+str(i))
320 self.assertGreater(len(ElementPath._cache), cache_len_10)
321 for i in range(600): ET.ElementTree(elem).find('./'+str(i))
322 self.assertLess(len(ElementPath._cache), 500)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000323
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200324 def test_copy(self):
325 # Test copy handling (etc).
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000326
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200327 import copy
328 e1 = ET.XML("<tag>hello<foo/></tag>")
329 e2 = copy.copy(e1)
330 e3 = copy.deepcopy(e1)
331 e1.find("foo").tag = "bar"
332 self.serialize_check(e1, '<tag>hello<bar /></tag>')
333 self.serialize_check(e2, '<tag>hello<bar /></tag>')
334 self.serialize_check(e3, '<tag>hello<foo /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000335
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200336 def test_attrib(self):
337 # Test attribute handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000338
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200339 elem = ET.Element("tag")
340 elem.get("key") # 1.1
341 self.assertEqual(elem.get("key", "default"), 'default') # 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000342
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200343 elem.set("key", "value")
344 self.assertEqual(elem.get("key"), 'value') # 1.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000345
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200346 elem = ET.Element("tag", key="value")
347 self.assertEqual(elem.get("key"), 'value') # 2.1
348 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000349
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200350 attrib = {"key": "value"}
351 elem = ET.Element("tag", attrib)
352 attrib.clear() # check for aliasing issues
353 self.assertEqual(elem.get("key"), 'value') # 3.1
354 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000355
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200356 attrib = {"key": "value"}
357 elem = ET.Element("tag", **attrib)
358 attrib.clear() # check for aliasing issues
359 self.assertEqual(elem.get("key"), 'value') # 4.1
360 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000361
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200362 elem = ET.Element("tag", {"key": "other"}, key="value")
363 self.assertEqual(elem.get("key"), 'value') # 5.1
364 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000365
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200366 elem = ET.Element('test')
367 elem.text = "aa"
368 elem.set('testa', 'testval')
369 elem.set('testb', 'test2')
370 self.assertEqual(ET.tostring(elem),
371 b'<test testa="testval" testb="test2">aa</test>')
372 self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
373 self.assertEqual(sorted(elem.items()),
374 [('testa', 'testval'), ('testb', 'test2')])
375 self.assertEqual(elem.attrib['testb'], 'test2')
376 elem.attrib['testb'] = 'test1'
377 elem.attrib['testc'] = 'test2'
378 self.assertEqual(ET.tostring(elem),
379 b'<test testa="testval" testb="test1" testc="test2">aa</test>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000380
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200381 def test_makeelement(self):
382 # Test makeelement handling.
Antoine Pitroub86680e2010-10-14 21:15:17 +0000383
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200384 elem = ET.Element("tag")
385 attrib = {"key": "value"}
386 subelem = elem.makeelement("subtag", attrib)
387 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
388 elem.append(subelem)
389 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000390
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200391 elem.clear()
392 self.serialize_check(elem, '<tag />')
393 elem.append(subelem)
394 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
395 elem.extend([subelem, subelem])
396 self.serialize_check(elem,
397 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
398 elem[:] = [subelem]
399 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
400 elem[:] = tuple([subelem])
401 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000402
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200403 def test_parsefile(self):
404 # Test parsing from file.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000405
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200406 tree = ET.parse(SIMPLE_XMLFILE)
407 stream = io.StringIO()
408 tree.write(stream, encoding='unicode')
409 self.assertEqual(stream.getvalue(),
410 '<root>\n'
411 ' <element key="value">text</element>\n'
412 ' <element>text</element>tail\n'
413 ' <empty-element />\n'
414 '</root>')
415 tree = ET.parse(SIMPLE_NS_XMLFILE)
416 stream = io.StringIO()
417 tree.write(stream, encoding='unicode')
418 self.assertEqual(stream.getvalue(),
419 '<ns0:root xmlns:ns0="namespace">\n'
420 ' <ns0:element key="value">text</ns0:element>\n'
421 ' <ns0:element>text</ns0:element>tail\n'
422 ' <ns0:empty-element />\n'
423 '</ns0:root>')
Armin Rigo9ed73062005-12-14 18:10:45 +0000424
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200425 with open(SIMPLE_XMLFILE) as f:
426 data = f.read()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200428 parser = ET.XMLParser()
429 self.assertRegex(parser.version, r'^Expat ')
430 parser.feed(data)
431 self.serialize_check(parser.close(),
432 '<root>\n'
433 ' <element key="value">text</element>\n'
434 ' <element>text</element>tail\n'
435 ' <empty-element />\n'
436 '</root>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000437
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200438 target = ET.TreeBuilder()
439 parser = ET.XMLParser(target=target)
440 parser.feed(data)
441 self.serialize_check(parser.close(),
442 '<root>\n'
443 ' <element key="value">text</element>\n'
444 ' <element>text</element>tail\n'
445 ' <empty-element />\n'
446 '</root>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000447
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200448 def test_parseliteral(self):
449 element = ET.XML("<html><body>text</body></html>")
450 self.assertEqual(ET.tostring(element, encoding='unicode'),
451 '<html><body>text</body></html>')
452 element = ET.fromstring("<html><body>text</body></html>")
453 self.assertEqual(ET.tostring(element, encoding='unicode'),
454 '<html><body>text</body></html>')
455 sequence = ["<html><body>", "text</bo", "dy></html>"]
456 element = ET.fromstringlist(sequence)
457 self.assertEqual(ET.tostring(element),
458 b'<html><body>text</body></html>')
459 self.assertEqual(b"".join(ET.tostringlist(element)),
460 b'<html><body>text</body></html>')
461 self.assertEqual(ET.tostring(element, "ascii"),
462 b"<?xml version='1.0' encoding='ascii'?>\n"
463 b"<html><body>text</body></html>")
464 _, ids = ET.XMLID("<html><body>text</body></html>")
465 self.assertEqual(len(ids), 0)
466 _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
467 self.assertEqual(len(ids), 1)
468 self.assertEqual(ids["body"].tag, 'body')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000469
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200470 def test_iterparse(self):
471 # Test iterparse interface.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000472
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200473 iterparse = ET.iterparse
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000474
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200475 context = iterparse(SIMPLE_XMLFILE)
476 action, elem = next(context)
477 self.assertEqual((action, elem.tag), ('end', 'element'))
478 self.assertEqual([(action, elem.tag) for action, elem in context], [
479 ('end', 'element'),
480 ('end', 'empty-element'),
481 ('end', 'root'),
482 ])
483 self.assertEqual(context.root.tag, 'root')
484
485 context = iterparse(SIMPLE_NS_XMLFILE)
486 self.assertEqual([(action, elem.tag) for action, elem in context], [
487 ('end', '{namespace}element'),
488 ('end', '{namespace}element'),
489 ('end', '{namespace}empty-element'),
490 ('end', '{namespace}root'),
491 ])
492
493 events = ()
494 context = iterparse(SIMPLE_XMLFILE, events)
495 self.assertEqual([(action, elem.tag) for action, elem in context], [])
496
497 events = ()
498 context = iterparse(SIMPLE_XMLFILE, events=events)
499 self.assertEqual([(action, elem.tag) for action, elem in context], [])
500
501 events = ("start", "end")
502 context = iterparse(SIMPLE_XMLFILE, events)
503 self.assertEqual([(action, elem.tag) for action, elem in context], [
504 ('start', 'root'),
505 ('start', 'element'),
506 ('end', 'element'),
507 ('start', 'element'),
508 ('end', 'element'),
509 ('start', 'empty-element'),
510 ('end', 'empty-element'),
511 ('end', 'root'),
512 ])
513
514 events = ("start", "end", "start-ns", "end-ns")
515 context = iterparse(SIMPLE_NS_XMLFILE, events)
Eli Bendersky23687042013-02-26 05:53:23 -0800516 self.assertEqual([(action, elem.tag) if action in ("start", "end")
517 else (action, elem)
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200518 for action, elem in context], [
519 ('start-ns', ('', 'namespace')),
520 ('start', '{namespace}root'),
521 ('start', '{namespace}element'),
522 ('end', '{namespace}element'),
523 ('start', '{namespace}element'),
524 ('end', '{namespace}element'),
525 ('start', '{namespace}empty-element'),
526 ('end', '{namespace}empty-element'),
527 ('end', '{namespace}root'),
528 ('end-ns', None),
529 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000530
Eli Bendersky5dd40e52013-11-28 06:31:58 -0800531 events = ('start-ns', 'end-ns')
532 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
533 res = [action for action, elem in context]
534 self.assertEqual(res, ['start-ns', 'end-ns'])
535
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200536 events = ("start", "end", "bogus")
537 with self.assertRaises(ValueError) as cm:
538 with open(SIMPLE_XMLFILE, "rb") as f:
539 iterparse(f, events)
540 self.assertEqual(str(cm.exception), "unknown event 'bogus'")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000541
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200542 source = io.BytesIO(
543 b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
544 b"<body xmlns='http://&#233;ffbot.org/ns'\n"
545 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
546 events = ("start-ns",)
547 context = iterparse(source, events)
548 self.assertEqual([(action, elem) for action, elem in context], [
549 ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
550 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
551 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000552
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200553 source = io.StringIO("<document />junk")
554 it = iterparse(source)
555 action, elem = next(it)
556 self.assertEqual((action, elem.tag), ('end', 'document'))
557 with self.assertRaises(ET.ParseError) as cm:
558 next(it)
559 self.assertEqual(str(cm.exception),
560 'junk after document element: line 1, column 12')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000561
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200562 def test_writefile(self):
563 elem = ET.Element("tag")
564 elem.text = "text"
565 self.serialize_check(elem, '<tag>text</tag>')
566 ET.SubElement(elem, "subtag").text = "subtext"
567 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000568
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200569 # Test tag suppression
570 elem.tag = None
571 self.serialize_check(elem, 'text<subtag>subtext</subtag>')
572 elem.insert(0, ET.Comment("comment"))
573 self.serialize_check(elem,
574 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000575
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200576 elem[0] = ET.PI("key", "value")
577 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000578
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200579 def test_custom_builder(self):
580 # Test parser w. custom builder.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000581
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200582 with open(SIMPLE_XMLFILE) as f:
583 data = f.read()
584 class Builder(list):
585 def start(self, tag, attrib):
586 self.append(("start", tag))
587 def end(self, tag):
588 self.append(("end", tag))
589 def data(self, text):
590 pass
591 builder = Builder()
592 parser = ET.XMLParser(target=builder)
593 parser.feed(data)
594 self.assertEqual(builder, [
595 ('start', 'root'),
596 ('start', 'element'),
597 ('end', 'element'),
598 ('start', 'element'),
599 ('end', 'element'),
600 ('start', 'empty-element'),
601 ('end', 'empty-element'),
602 ('end', 'root'),
603 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000604
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200605 with open(SIMPLE_NS_XMLFILE) as f:
606 data = f.read()
607 class Builder(list):
608 def start(self, tag, attrib):
609 self.append(("start", tag))
610 def end(self, tag):
611 self.append(("end", tag))
612 def data(self, text):
613 pass
614 def pi(self, target, data):
615 self.append(("pi", target, data))
616 def comment(self, data):
617 self.append(("comment", data))
618 builder = Builder()
619 parser = ET.XMLParser(target=builder)
620 parser.feed(data)
621 self.assertEqual(builder, [
622 ('pi', 'pi', 'data'),
623 ('comment', ' comment '),
624 ('start', '{namespace}root'),
625 ('start', '{namespace}element'),
626 ('end', '{namespace}element'),
627 ('start', '{namespace}element'),
628 ('end', '{namespace}element'),
629 ('start', '{namespace}empty-element'),
630 ('end', '{namespace}empty-element'),
631 ('end', '{namespace}root'),
632 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000633
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000634
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200635 def test_getchildren(self):
636 # Test Element.getchildren()
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000637
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200638 with open(SIMPLE_XMLFILE, "rb") as f:
639 tree = ET.parse(f)
640 self.assertEqual([summarize_list(elem.getchildren())
641 for elem in tree.getroot().iter()], [
642 ['element', 'element', 'empty-element'],
643 [],
644 [],
645 [],
646 ])
647 self.assertEqual([summarize_list(elem.getchildren())
648 for elem in tree.getiterator()], [
649 ['element', 'element', 'empty-element'],
650 [],
651 [],
652 [],
653 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000654
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200655 elem = ET.XML(SAMPLE_XML)
656 self.assertEqual(len(elem.getchildren()), 3)
657 self.assertEqual(len(elem[2].getchildren()), 1)
658 self.assertEqual(elem[:], elem.getchildren())
659 child1 = elem[0]
660 child2 = elem[2]
661 del elem[1:2]
662 self.assertEqual(len(elem.getchildren()), 2)
663 self.assertEqual(child1, elem[0])
664 self.assertEqual(child2, elem[1])
665 elem[0:2] = [child2, child1]
666 self.assertEqual(child2, elem[0])
667 self.assertEqual(child1, elem[1])
668 self.assertNotEqual(child1, elem[0])
669 elem.clear()
670 self.assertEqual(elem.getchildren(), [])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000671
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200672 def test_writestring(self):
673 elem = ET.XML("<html><body>text</body></html>")
674 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
675 elem = ET.fromstring("<html><body>text</body></html>")
676 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000677
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +0300678 def test_encoding(self):
679 def check(encoding, body=''):
680 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
681 (encoding, body))
682 self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
683 self.assertEqual(ET.XML(xml).text, body)
684 check("ascii", 'a')
685 check("us-ascii", 'a')
686 check("iso-8859-1", '\xbd')
687 check("iso-8859-15", '\u20ac')
688 check("cp437", '\u221a')
689 check("mac-roman", '\u02da')
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000690
Eli Bendersky6dc32b32013-05-25 05:25:48 -0700691 def xml(encoding):
692 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
693 def bxml(encoding):
694 return xml(encoding).encode(encoding)
695 supported_encodings = [
696 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
697 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
698 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
699 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
700 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
701 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
Serhiy Storchakabe0c3252013-11-23 18:52:23 +0200702 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
703 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
704 'cp1256', 'cp1257', 'cp1258',
Eli Bendersky6dc32b32013-05-25 05:25:48 -0700705 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
706 'mac-roman', 'mac-turkish',
707 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
708 'iso2022-jp-3', 'iso2022-jp-ext',
709 'koi8-r', 'koi8-u',
710 'hz', 'ptcp154',
711 ]
712 for encoding in supported_encodings:
713 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
714
715 unsupported_ascii_compatible_encodings = [
716 'big5', 'big5hkscs',
717 'cp932', 'cp949', 'cp950',
718 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
719 'gb2312', 'gbk', 'gb18030',
720 'iso2022-kr', 'johab',
721 'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
722 'utf-7',
723 ]
724 for encoding in unsupported_ascii_compatible_encodings:
725 self.assertRaises(ValueError, ET.XML, bxml(encoding))
726
727 unsupported_ascii_incompatible_encodings = [
728 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
729 'utf_32', 'utf_32_be', 'utf_32_le',
730 ]
731 for encoding in unsupported_ascii_incompatible_encodings:
732 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
733
734 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
735 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
736
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200737 def test_methods(self):
738 # Test serialization methods.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000739
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200740 e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
741 e.tail = "\n"
742 self.assertEqual(serialize(e),
743 '<html><link /><script>1 &lt; 2</script></html>\n')
744 self.assertEqual(serialize(e, method=None),
745 '<html><link /><script>1 &lt; 2</script></html>\n')
746 self.assertEqual(serialize(e, method="xml"),
747 '<html><link /><script>1 &lt; 2</script></html>\n')
748 self.assertEqual(serialize(e, method="html"),
749 '<html><link><script>1 < 2</script></html>\n')
750 self.assertEqual(serialize(e, method="text"), '1 < 2\n')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000751
Christian Heimes54ad7e32013-07-05 01:39:49 +0200752 def test_issue18347(self):
753 e = ET.XML('<html><CamelCase>text</CamelCase></html>')
754 self.assertEqual(serialize(e),
755 '<html><CamelCase>text</CamelCase></html>')
756 self.assertEqual(serialize(e, method="html"),
757 '<html><CamelCase>text</CamelCase></html>')
758
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200759 def test_entity(self):
760 # Test entity handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000761
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200762 # 1) good entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000763
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200764 e = ET.XML("<document title='&#x8230;'>test</document>")
765 self.assertEqual(serialize(e, encoding="us-ascii"),
766 b'<document title="&#33328;">test</document>')
767 self.serialize_check(e, '<document title="\u8230">test</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000768
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200769 # 2) bad entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000770
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200771 with self.assertRaises(ET.ParseError) as cm:
772 ET.XML("<document>&entity;</document>")
773 self.assertEqual(str(cm.exception),
774 'undefined entity: line 1, column 10')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000775
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200776 with self.assertRaises(ET.ParseError) as cm:
777 ET.XML(ENTITY_XML)
778 self.assertEqual(str(cm.exception),
779 'undefined entity &entity;: line 5, column 10')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000780
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200781 # 3) custom entity
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000782
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200783 parser = ET.XMLParser()
784 parser.entity["entity"] = "text"
785 parser.feed(ENTITY_XML)
786 root = parser.close()
787 self.serialize_check(root, '<document>text</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000788
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200789 def test_namespace(self):
790 # Test namespace issues.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000791
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200792 # 1) xml namespace
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000793
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200794 elem = ET.XML("<tag xml:lang='en' />")
795 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000796
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200797 # 2) other "well-known" namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000798
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200799 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
800 self.serialize_check(elem,
801 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000802
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200803 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
804 self.serialize_check(elem,
805 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000806
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200807 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
808 self.serialize_check(elem,
809 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000810
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200811 # 3) unknown namespaces
812 elem = ET.XML(SAMPLE_XML_NS)
813 self.serialize_check(elem,
814 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
815 ' <ns0:tag>text</ns0:tag>\n'
816 ' <ns0:tag />\n'
817 ' <ns0:section>\n'
818 ' <ns0:tag>subtext</ns0:tag>\n'
819 ' </ns0:section>\n'
820 '</ns0:body>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000821
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200822 def test_qname(self):
823 # Test QName handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200825 # 1) decorated tags
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000826
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200827 elem = ET.Element("{uri}tag")
828 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
829 elem = ET.Element(ET.QName("{uri}tag"))
830 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
831 elem = ET.Element(ET.QName("uri", "tag"))
832 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
833 elem = ET.Element(ET.QName("uri", "tag"))
834 subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
835 subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
836 self.serialize_check(elem,
837 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000838
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200839 # 2) decorated attributes
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000840
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200841 elem.clear()
842 elem.attrib["{uri}key"] = "value"
843 self.serialize_check(elem,
844 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000845
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200846 elem.clear()
847 elem.attrib[ET.QName("{uri}key")] = "value"
848 self.serialize_check(elem,
849 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000850
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200851 # 3) decorated values are not converted by default, but the
852 # QName wrapper can be used for values
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000853
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200854 elem.clear()
855 elem.attrib["{uri}key"] = "{uri}value"
856 self.serialize_check(elem,
857 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000858
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200859 elem.clear()
860 elem.attrib["{uri}key"] = ET.QName("{uri}value")
861 self.serialize_check(elem,
862 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000863
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200864 elem.clear()
865 subelem = ET.Element("tag")
866 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
867 elem.append(subelem)
868 elem.append(subelem)
869 self.serialize_check(elem,
870 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
871 '<tag ns1:key="ns2:value" />'
872 '<tag ns1:key="ns2:value" />'
873 '</ns0:tag>') # 3.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000874
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200875 # 4) Direct QName tests
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000876
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200877 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
878 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
879 q1 = ET.QName('ns', 'tag')
880 q2 = ET.QName('ns', 'tag')
881 self.assertEqual(q1, q2)
882 q2 = ET.QName('ns', 'other-tag')
883 self.assertNotEqual(q1, q2)
884 self.assertNotEqual(q1, 'ns:tag')
885 self.assertEqual(q1, '{ns}tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000886
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200887 def test_doctype_public(self):
888 # Test PUBLIC doctype.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000889
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200890 elem = ET.XML('<!DOCTYPE html PUBLIC'
891 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
892 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
893 '<html>text</html>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000894
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200895 def test_xpath_tokenizer(self):
896 # Test the XPath tokenizer.
897 from xml.etree import ElementPath
898 def check(p, expected):
899 self.assertEqual([op or tag
900 for op, tag in ElementPath.xpath_tokenizer(p)],
901 expected)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000902
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200903 # tests from the xml specification
904 check("*", ['*'])
905 check("text()", ['text', '()'])
906 check("@name", ['@', 'name'])
907 check("@*", ['@', '*'])
908 check("para[1]", ['para', '[', '1', ']'])
909 check("para[last()]", ['para', '[', 'last', '()', ']'])
910 check("*/para", ['*', '/', 'para'])
911 check("/doc/chapter[5]/section[2]",
912 ['/', 'doc', '/', 'chapter', '[', '5', ']',
913 '/', 'section', '[', '2', ']'])
914 check("chapter//para", ['chapter', '//', 'para'])
915 check("//para", ['//', 'para'])
916 check("//olist/item", ['//', 'olist', '/', 'item'])
917 check(".", ['.'])
918 check(".//para", ['.', '//', 'para'])
919 check("..", ['..'])
920 check("../@lang", ['..', '/', '@', 'lang'])
921 check("chapter[title]", ['chapter', '[', 'title', ']'])
922 check("employee[@secretary and @assistant]", ['employee',
923 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000924
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200925 # additional tests
926 check("{http://spam}egg", ['{http://spam}egg'])
927 check("./spam.egg", ['.', '/', 'spam.egg'])
928 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000929
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200930 def test_processinginstruction(self):
931 # Test ProcessingInstruction directly
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000932
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200933 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
934 b'<?test instruction?>')
935 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
936 b'<?test instruction?>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000937
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200938 # Issue #2746
Antoine Pitrou99f69ee2010-02-09 17:25:47 +0000939
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200940 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
941 b'<?test <testing&>?>')
942 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
943 b"<?xml version='1.0' encoding='latin-1'?>\n"
944 b"<?test <testing&>\xe3?>")
Antoine Pitrou99f69ee2010-02-09 17:25:47 +0000945
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200946 def test_html_empty_elems_serialization(self):
947 # issue 15970
948 # from http://www.w3.org/TR/html401/index/elements.html
949 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
950 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
951 for elem in [element, element.lower()]:
952 expected = '<%s>' % elem
953 serialized = serialize(ET.XML('<%s />' % elem), method='html')
954 self.assertEqual(serialized, expected)
955 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
956 method='html')
957 self.assertEqual(serialized, expected)
Antoine Pitrou99f69ee2010-02-09 17:25:47 +0000958
Fredrik Lundh8911ca3d2005-12-16 22:07:17 +0000959
Eli Benderskyb5869342013-08-30 05:51:20 -0700960class XMLPullParserTest(unittest.TestCase):
Antoine Pitrou5b235d02013-04-18 19:37:06 +0200961
962 def _feed(self, parser, data, chunk_size=None):
963 if chunk_size is None:
Eli Benderskyb5869342013-08-30 05:51:20 -0700964 parser.feed(data)
Antoine Pitrou5b235d02013-04-18 19:37:06 +0200965 else:
966 for i in range(0, len(data), chunk_size):
Eli Benderskyb5869342013-08-30 05:51:20 -0700967 parser.feed(data[i:i+chunk_size])
Antoine Pitrou5b235d02013-04-18 19:37:06 +0200968
969 def assert_event_tags(self, parser, expected):
Eli Benderskyb5869342013-08-30 05:51:20 -0700970 events = parser.read_events()
Antoine Pitrou5b235d02013-04-18 19:37:06 +0200971 self.assertEqual([(action, elem.tag) for action, elem in events],
972 expected)
973
974 def test_simple_xml(self):
975 for chunk_size in (None, 1, 5):
976 with self.subTest(chunk_size=chunk_size):
Eli Benderskyb5869342013-08-30 05:51:20 -0700977 parser = ET.XMLPullParser()
Antoine Pitrou5b235d02013-04-18 19:37:06 +0200978 self.assert_event_tags(parser, [])
979 self._feed(parser, "<!-- comment -->\n", chunk_size)
980 self.assert_event_tags(parser, [])
981 self._feed(parser,
982 "<root>\n <element key='value'>text</element",
983 chunk_size)
984 self.assert_event_tags(parser, [])
985 self._feed(parser, ">\n", chunk_size)
986 self.assert_event_tags(parser, [('end', 'element')])
987 self._feed(parser, "<element>text</element>tail\n", chunk_size)
988 self._feed(parser, "<empty-element/>\n", chunk_size)
989 self.assert_event_tags(parser, [
990 ('end', 'element'),
991 ('end', 'empty-element'),
992 ])
993 self._feed(parser, "</root>\n", chunk_size)
994 self.assert_event_tags(parser, [('end', 'root')])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +1000995 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +0200996
Eli Benderskyb5869342013-08-30 05:51:20 -0700997 def test_feed_while_iterating(self):
998 parser = ET.XMLPullParser()
999 it = parser.read_events()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001000 self._feed(parser, "<root>\n <element key='value'>text</element>\n")
1001 action, elem = next(it)
1002 self.assertEqual((action, elem.tag), ('end', 'element'))
1003 self._feed(parser, "</root>\n")
1004 action, elem = next(it)
1005 self.assertEqual((action, elem.tag), ('end', 'root'))
1006 with self.assertRaises(StopIteration):
1007 next(it)
1008
1009 def test_simple_xml_with_ns(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001010 parser = ET.XMLPullParser()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001011 self.assert_event_tags(parser, [])
1012 self._feed(parser, "<!-- comment -->\n")
1013 self.assert_event_tags(parser, [])
1014 self._feed(parser, "<root xmlns='namespace'>\n")
1015 self.assert_event_tags(parser, [])
1016 self._feed(parser, "<element key='value'>text</element")
1017 self.assert_event_tags(parser, [])
1018 self._feed(parser, ">\n")
1019 self.assert_event_tags(parser, [('end', '{namespace}element')])
1020 self._feed(parser, "<element>text</element>tail\n")
1021 self._feed(parser, "<empty-element/>\n")
1022 self.assert_event_tags(parser, [
1023 ('end', '{namespace}element'),
1024 ('end', '{namespace}empty-element'),
1025 ])
1026 self._feed(parser, "</root>\n")
1027 self.assert_event_tags(parser, [('end', '{namespace}root')])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001028 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001029
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001030 def test_ns_events(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001031 parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001032 self._feed(parser, "<!-- comment -->\n")
1033 self._feed(parser, "<root xmlns='namespace'>\n")
1034 self.assertEqual(
Eli Benderskyb5869342013-08-30 05:51:20 -07001035 list(parser.read_events()),
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001036 [('start-ns', ('', 'namespace'))])
1037 self._feed(parser, "<element key='value'>text</element")
1038 self._feed(parser, ">\n")
1039 self._feed(parser, "<element>text</element>tail\n")
1040 self._feed(parser, "<empty-element/>\n")
1041 self._feed(parser, "</root>\n")
Eli Benderskyb5869342013-08-30 05:51:20 -07001042 self.assertEqual(list(parser.read_events()), [('end-ns', None)])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001043 self.assertIsNone(parser.close())
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001044
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001045 def test_events(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001046 parser = ET.XMLPullParser(events=())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001047 self._feed(parser, "<root/>\n")
1048 self.assert_event_tags(parser, [])
1049
Eli Benderskyb5869342013-08-30 05:51:20 -07001050 parser = ET.XMLPullParser(events=('start', 'end'))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001051 self._feed(parser, "<!-- comment -->\n")
1052 self.assert_event_tags(parser, [])
1053 self._feed(parser, "<root>\n")
1054 self.assert_event_tags(parser, [('start', 'root')])
1055 self._feed(parser, "<element key='value'>text</element")
1056 self.assert_event_tags(parser, [('start', 'element')])
1057 self._feed(parser, ">\n")
1058 self.assert_event_tags(parser, [('end', 'element')])
1059 self._feed(parser,
1060 "<element xmlns='foo'>text<empty-element/></element>tail\n")
1061 self.assert_event_tags(parser, [
1062 ('start', '{foo}element'),
1063 ('start', '{foo}empty-element'),
1064 ('end', '{foo}empty-element'),
1065 ('end', '{foo}element'),
1066 ])
1067 self._feed(parser, "</root>")
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001068 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001069 self.assert_event_tags(parser, [('end', 'root')])
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001070
Eli Benderskyb5869342013-08-30 05:51:20 -07001071 parser = ET.XMLPullParser(events=('start',))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001072 self._feed(parser, "<!-- comment -->\n")
1073 self.assert_event_tags(parser, [])
1074 self._feed(parser, "<root>\n")
1075 self.assert_event_tags(parser, [('start', 'root')])
1076 self._feed(parser, "<element key='value'>text</element")
1077 self.assert_event_tags(parser, [('start', 'element')])
1078 self._feed(parser, ">\n")
1079 self.assert_event_tags(parser, [])
1080 self._feed(parser,
1081 "<element xmlns='foo'>text<empty-element/></element>tail\n")
1082 self.assert_event_tags(parser, [
1083 ('start', '{foo}element'),
1084 ('start', '{foo}empty-element'),
1085 ])
1086 self._feed(parser, "</root>")
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001087 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001088
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001089 def test_events_sequence(self):
1090 # Test that events can be some sequence that's not just a tuple or list
1091 eventset = {'end', 'start'}
Eli Benderskyb5869342013-08-30 05:51:20 -07001092 parser = ET.XMLPullParser(events=eventset)
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001093 self._feed(parser, "<foo>bar</foo>")
1094 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1095
1096 class DummyIter:
1097 def __init__(self):
1098 self.events = iter(['start', 'end', 'start-ns'])
1099 def __iter__(self):
1100 return self
1101 def __next__(self):
1102 return next(self.events)
1103
Eli Benderskyb5869342013-08-30 05:51:20 -07001104 parser = ET.XMLPullParser(events=DummyIter())
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001105 self._feed(parser, "<foo>bar</foo>")
1106 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1107
1108
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001109 def test_unknown_event(self):
1110 with self.assertRaises(ValueError):
Eli Benderskyb5869342013-08-30 05:51:20 -07001111 ET.XMLPullParser(events=('start', 'end', 'bogus'))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001112
1113
Armin Rigo9ed73062005-12-14 18:10:45 +00001114#
1115# xinclude tests (samples from appendix C of the xinclude specification)
1116
1117XINCLUDE = {}
1118
1119XINCLUDE["C1.xml"] = """\
1120<?xml version='1.0'?>
1121<document xmlns:xi="http://www.w3.org/2001/XInclude">
1122 <p>120 Mz is adequate for an average home user.</p>
1123 <xi:include href="disclaimer.xml"/>
1124</document>
1125"""
1126
1127XINCLUDE["disclaimer.xml"] = """\
1128<?xml version='1.0'?>
1129<disclaimer>
1130 <p>The opinions represented herein represent those of the individual
1131 and should not be interpreted as official policy endorsed by this
1132 organization.</p>
1133</disclaimer>
1134"""
1135
1136XINCLUDE["C2.xml"] = """\
1137<?xml version='1.0'?>
1138<document xmlns:xi="http://www.w3.org/2001/XInclude">
1139 <p>This document has been accessed
1140 <xi:include href="count.txt" parse="text"/> times.</p>
1141</document>
1142"""
1143
1144XINCLUDE["count.txt"] = "324387"
1145
Florent Xiclunaba8a9862010-08-08 23:08:41 +00001146XINCLUDE["C2b.xml"] = """\
1147<?xml version='1.0'?>
1148<document xmlns:xi="http://www.w3.org/2001/XInclude">
1149 <p>This document has been <em>accessed</em>
1150 <xi:include href="count.txt" parse="text"/> times.</p>
1151</document>
1152"""
1153
Armin Rigo9ed73062005-12-14 18:10:45 +00001154XINCLUDE["C3.xml"] = """\
1155<?xml version='1.0'?>
1156<document xmlns:xi="http://www.w3.org/2001/XInclude">
1157 <p>The following is the source of the "data.xml" resource:</p>
1158 <example><xi:include href="data.xml" parse="text"/></example>
1159</document>
1160"""
1161
1162XINCLUDE["data.xml"] = """\
1163<?xml version='1.0'?>
1164<data>
1165 <item><![CDATA[Brooks & Shields]]></item>
1166</data>
1167"""
1168
1169XINCLUDE["C5.xml"] = """\
1170<?xml version='1.0'?>
1171<div xmlns:xi="http://www.w3.org/2001/XInclude">
1172 <xi:include href="example.txt" parse="text">
1173 <xi:fallback>
1174 <xi:include href="fallback-example.txt" parse="text">
1175 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1176 </xi:include>
1177 </xi:fallback>
1178 </xi:include>
1179</div>
1180"""
1181
1182XINCLUDE["default.xml"] = """\
1183<?xml version='1.0'?>
1184<document xmlns:xi="http://www.w3.org/2001/XInclude">
1185 <p>Example.</p>
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001186 <xi:include href="{}"/>
Armin Rigo9ed73062005-12-14 18:10:45 +00001187</document>
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001188""".format(html.escape(SIMPLE_XMLFILE, True))
Armin Rigo9ed73062005-12-14 18:10:45 +00001189
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001190#
1191# badly formatted xi:include tags
1192
1193XINCLUDE_BAD = {}
1194
1195XINCLUDE_BAD["B1.xml"] = """\
1196<?xml version='1.0'?>
1197<document xmlns:xi="http://www.w3.org/2001/XInclude">
1198 <p>120 Mz is adequate for an average home user.</p>
1199 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1200</document>
1201"""
1202
1203XINCLUDE_BAD["B2.xml"] = """\
1204<?xml version='1.0'?>
1205<div xmlns:xi="http://www.w3.org/2001/XInclude">
1206 <xi:fallback></xi:fallback>
1207</div>
1208"""
1209
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001210class XIncludeTest(unittest.TestCase):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001211
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001212 def xinclude_loader(self, href, parse="xml", encoding=None):
1213 try:
1214 data = XINCLUDE[href]
1215 except KeyError:
1216 raise OSError("resource not found")
1217 if parse == "xml":
1218 data = ET.XML(data)
1219 return data
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001220
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001221 def none_loader(self, href, parser, encoding=None):
1222 return None
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001223
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001224 def _my_loader(self, href, parse):
1225 # Used to avoid a test-dependency problem where the default loader
1226 # of ElementInclude uses the pyET parser for cET tests.
1227 if parse == 'xml':
1228 with open(href, 'rb') as f:
1229 return ET.parse(f).getroot()
1230 else:
1231 return None
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001232
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001233 def test_xinclude_default(self):
1234 from xml.etree import ElementInclude
1235 doc = self.xinclude_loader('default.xml')
1236 ElementInclude.include(doc, self._my_loader)
1237 self.assertEqual(serialize(doc),
1238 '<document>\n'
1239 ' <p>Example.</p>\n'
1240 ' <root>\n'
1241 ' <element key="value">text</element>\n'
1242 ' <element>text</element>tail\n'
1243 ' <empty-element />\n'
1244 '</root>\n'
1245 '</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001246
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001247 def test_xinclude(self):
1248 from xml.etree import ElementInclude
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001249
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001250 # Basic inclusion example (XInclude C.1)
1251 document = self.xinclude_loader("C1.xml")
1252 ElementInclude.include(document, self.xinclude_loader)
1253 self.assertEqual(serialize(document),
1254 '<document>\n'
1255 ' <p>120 Mz is adequate for an average home user.</p>\n'
1256 ' <disclaimer>\n'
1257 ' <p>The opinions represented herein represent those of the individual\n'
1258 ' and should not be interpreted as official policy endorsed by this\n'
1259 ' organization.</p>\n'
1260 '</disclaimer>\n'
1261 '</document>') # C1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001262
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001263 # Textual inclusion example (XInclude C.2)
1264 document = self.xinclude_loader("C2.xml")
1265 ElementInclude.include(document, self.xinclude_loader)
1266 self.assertEqual(serialize(document),
1267 '<document>\n'
1268 ' <p>This document has been accessed\n'
1269 ' 324387 times.</p>\n'
1270 '</document>') # C2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001271
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001272 # Textual inclusion after sibling element (based on modified XInclude C.2)
1273 document = self.xinclude_loader("C2b.xml")
1274 ElementInclude.include(document, self.xinclude_loader)
1275 self.assertEqual(serialize(document),
1276 '<document>\n'
1277 ' <p>This document has been <em>accessed</em>\n'
1278 ' 324387 times.</p>\n'
1279 '</document>') # C2b
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001280
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001281 # Textual inclusion of XML example (XInclude C.3)
1282 document = self.xinclude_loader("C3.xml")
1283 ElementInclude.include(document, self.xinclude_loader)
1284 self.assertEqual(serialize(document),
1285 '<document>\n'
1286 ' <p>The following is the source of the "data.xml" resource:</p>\n'
1287 " <example>&lt;?xml version='1.0'?&gt;\n"
1288 '&lt;data&gt;\n'
1289 ' &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1290 '&lt;/data&gt;\n'
1291 '</example>\n'
1292 '</document>') # C3
1293
1294 # Fallback example (XInclude C.5)
1295 # Note! Fallback support is not yet implemented
1296 document = self.xinclude_loader("C5.xml")
1297 with self.assertRaises(OSError) as cm:
1298 ElementInclude.include(document, self.xinclude_loader)
1299 self.assertEqual(str(cm.exception), 'resource not found')
1300 self.assertEqual(serialize(document),
1301 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1302 ' <ns0:include href="example.txt" parse="text">\n'
1303 ' <ns0:fallback>\n'
1304 ' <ns0:include href="fallback-example.txt" parse="text">\n'
1305 ' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1306 ' </ns0:include>\n'
1307 ' </ns0:fallback>\n'
1308 ' </ns0:include>\n'
1309 '</div>') # C5
1310
1311 def test_xinclude_failures(self):
1312 from xml.etree import ElementInclude
1313
1314 # Test failure to locate included XML file.
1315 document = ET.XML(XINCLUDE["C1.xml"])
1316 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1317 ElementInclude.include(document, loader=self.none_loader)
1318 self.assertEqual(str(cm.exception),
1319 "cannot load 'disclaimer.xml' as 'xml'")
1320
1321 # Test failure to locate included text file.
1322 document = ET.XML(XINCLUDE["C2.xml"])
1323 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1324 ElementInclude.include(document, loader=self.none_loader)
1325 self.assertEqual(str(cm.exception),
1326 "cannot load 'count.txt' as 'text'")
1327
1328 # Test bad parse type.
1329 document = ET.XML(XINCLUDE_BAD["B1.xml"])
1330 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1331 ElementInclude.include(document, loader=self.none_loader)
1332 self.assertEqual(str(cm.exception),
1333 "unknown parse type in xi:include tag ('BAD_TYPE')")
1334
1335 # Test xi:fallback outside xi:include.
1336 document = ET.XML(XINCLUDE_BAD["B2.xml"])
1337 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1338 ElementInclude.include(document, loader=self.none_loader)
1339 self.assertEqual(str(cm.exception),
1340 "xi:fallback tag must be child of xi:include "
1341 "('{http://www.w3.org/2001/XInclude}fallback')")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001342
1343# --------------------------------------------------------------------
1344# reported bugs
1345
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001346class BugsTest(unittest.TestCase):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001347
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001348 def test_bug_xmltoolkit21(self):
1349 # marshaller gives obscure errors for non-string values
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001350
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001351 def check(elem):
1352 with self.assertRaises(TypeError) as cm:
1353 serialize(elem)
1354 self.assertEqual(str(cm.exception),
1355 'cannot serialize 123 (type int)')
Armin Rigo9ed73062005-12-14 18:10:45 +00001356
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001357 elem = ET.Element(123)
1358 check(elem) # tag
Armin Rigo9ed73062005-12-14 18:10:45 +00001359
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001360 elem = ET.Element("elem")
1361 elem.text = 123
1362 check(elem) # text
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001363
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001364 elem = ET.Element("elem")
1365 elem.tail = 123
1366 check(elem) # tail
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001367
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001368 elem = ET.Element("elem")
1369 elem.set(123, "123")
1370 check(elem) # attribute key
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001371
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001372 elem = ET.Element("elem")
1373 elem.set("123", 123)
1374 check(elem) # attribute value
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001375
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001376 def test_bug_xmltoolkit25(self):
1377 # typo in ElementTree.findtext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001378
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001379 elem = ET.XML(SAMPLE_XML)
1380 tree = ET.ElementTree(elem)
1381 self.assertEqual(tree.findtext("tag"), 'text')
1382 self.assertEqual(tree.findtext("section/tag"), 'subtext')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001383
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001384 def test_bug_xmltoolkit28(self):
1385 # .//tag causes exceptions
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001386
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001387 tree = ET.XML("<doc><table><tbody/></table></doc>")
1388 self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1389 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001390
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001391 def test_bug_xmltoolkitX1(self):
1392 # dump() doesn't flush the output buffer
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001394 tree = ET.XML("<doc><table><tbody/></table></doc>")
1395 with support.captured_stdout() as stdout:
1396 ET.dump(tree)
1397 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001398
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001399 def test_bug_xmltoolkit39(self):
1400 # non-ascii element and attribute names doesn't work
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001401
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001402 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1403 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001404
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001405 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1406 b"<tag \xe4ttr='v&#228;lue' />")
1407 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
1408 self.assertEqual(ET.tostring(tree, "utf-8"),
1409 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001410
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001411 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1412 b'<t\xe4g>text</t\xe4g>')
1413 self.assertEqual(ET.tostring(tree, "utf-8"),
1414 b'<t\xc3\xa4g>text</t\xc3\xa4g>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001415
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001416 tree = ET.Element("t\u00e4g")
1417 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001418
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001419 tree = ET.Element("tag")
1420 tree.set("\u00e4ttr", "v\u00e4lue")
1421 self.assertEqual(ET.tostring(tree, "utf-8"),
1422 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001423
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001424 def test_bug_xmltoolkit54(self):
1425 # problems handling internally defined entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001426
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001427 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
1428 '<doc>&ldots;</doc>')
1429 self.assertEqual(serialize(e, encoding="us-ascii"),
1430 b'<doc>&#33328;</doc>')
1431 self.assertEqual(serialize(e), '<doc>\u8230</doc>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001432
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001433 def test_bug_xmltoolkit55(self):
1434 # make sure we're reporting the first error, not the last
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001435
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001436 with self.assertRaises(ET.ParseError) as cm:
1437 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
1438 b'<doc>&ldots;&ndots;&rdots;</doc>')
1439 self.assertEqual(str(cm.exception),
1440 'undefined entity &ldots;: line 1, column 36')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001441
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001442 def test_bug_xmltoolkit60(self):
1443 # Handle crash in stream source.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001444
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001445 class ExceptionFile:
1446 def read(self, x):
1447 raise OSError
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001448
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001449 self.assertRaises(OSError, ET.parse, ExceptionFile())
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001450
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001451 def test_bug_xmltoolkit62(self):
1452 # Don't crash when using custom entities.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001453
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001454 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
Eli Benderskyc4e98a62013-05-19 09:24:43 -07001455 parser = ET.XMLParser()
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001456 parser.entity.update(ENTITIES)
1457 parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001458<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1459<patent-application-publication>
1460<subdoc-abstract>
1461<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1462</subdoc-abstract>
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001463</patent-application-publication>""")
1464 t = parser.close()
1465 self.assertEqual(t.find('.//paragraph').text,
1466 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001467
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001468 def test_bug_xmltoolkit63(self):
1469 # Check reference leak.
1470 def xmltoolkit63():
1471 tree = ET.TreeBuilder()
1472 tree.start("tag", {})
1473 tree.data("text")
1474 tree.end("tag")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001475
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001476 xmltoolkit63()
1477 count = sys.getrefcount(None)
1478 for i in range(1000):
1479 xmltoolkit63()
1480 self.assertEqual(sys.getrefcount(None), count)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001481
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001482 def test_bug_200708_newline(self):
1483 # Preserve newlines in attributes.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001484
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001485 e = ET.Element('SomeTag', text="def _f():\n return 3\n")
1486 self.assertEqual(ET.tostring(e),
1487 b'<SomeTag text="def _f():&#10; return 3&#10;" />')
1488 self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
1489 'def _f():\n return 3\n')
1490 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
1491 b'<SomeTag text="def _f():&#10; return 3&#10;" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001493 def test_bug_200708_close(self):
1494 # Test default builder.
1495 parser = ET.XMLParser() # default
1496 parser.feed("<element>some text</element>")
1497 self.assertEqual(parser.close().tag, 'element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001498
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001499 # Test custom builder.
1500 class EchoTarget:
1501 def close(self):
1502 return ET.Element("element") # simulate root
1503 parser = ET.XMLParser(EchoTarget())
1504 parser.feed("<element>some text</element>")
1505 self.assertEqual(parser.close().tag, 'element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001506
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001507 def test_bug_200709_default_namespace(self):
1508 e = ET.Element("{default}elem")
1509 s = ET.SubElement(e, "{default}elem")
1510 self.assertEqual(serialize(e, default_namespace="default"), # 1
1511 '<elem xmlns="default"><elem /></elem>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001512
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001513 e = ET.Element("{default}elem")
1514 s = ET.SubElement(e, "{default}elem")
1515 s = ET.SubElement(e, "{not-default}elem")
1516 self.assertEqual(serialize(e, default_namespace="default"), # 2
1517 '<elem xmlns="default" xmlns:ns1="not-default">'
1518 '<elem />'
1519 '<ns1:elem />'
1520 '</elem>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001521
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001522 e = ET.Element("{default}elem")
1523 s = ET.SubElement(e, "{default}elem")
1524 s = ET.SubElement(e, "elem") # unprefixed name
1525 with self.assertRaises(ValueError) as cm:
1526 serialize(e, default_namespace="default") # 3
1527 self.assertEqual(str(cm.exception),
1528 'cannot use non-qualified names with default_namespace option')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001529
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001530 def test_bug_200709_register_namespace(self):
1531 e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1532 self.assertEqual(ET.tostring(e),
1533 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
1534 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1535 e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1536 self.assertEqual(ET.tostring(e),
1537 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001538
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001539 # And the Dublin Core namespace is in the default list:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001540
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001541 e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
1542 self.assertEqual(ET.tostring(e),
1543 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001544
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001545 def test_bug_200709_element_comment(self):
1546 # Not sure if this can be fixed, really (since the serializer needs
1547 # ET.Comment, not cET.comment).
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001548
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001549 a = ET.Element('a')
1550 a.append(ET.Comment('foo'))
1551 self.assertEqual(a[0].tag, ET.Comment)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001552
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001553 a = ET.Element('a')
1554 a.append(ET.PI('foo'))
1555 self.assertEqual(a[0].tag, ET.PI)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001556
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001557 def test_bug_200709_element_insert(self):
1558 a = ET.Element('a')
1559 b = ET.SubElement(a, 'b')
1560 c = ET.SubElement(a, 'c')
1561 d = ET.Element('d')
1562 a.insert(0, d)
1563 self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
1564 a.insert(-1, d)
1565 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001566
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001567 def test_bug_200709_iter_comment(self):
1568 a = ET.Element('a')
1569 b = ET.SubElement(a, 'b')
1570 comment_b = ET.Comment("TEST-b")
1571 b.append(comment_b)
1572 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001573
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001574 # --------------------------------------------------------------------
1575 # reported on bugs.python.org
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001576
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001577 def test_bug_1534630(self):
1578 bob = ET.TreeBuilder()
1579 e = bob.data("data")
1580 e = bob.start("tag", {})
1581 e = bob.end("tag")
1582 e = bob.close()
1583 self.assertEqual(serialize(e), '<tag />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001584
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001585 def test_issue6233(self):
1586 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
1587 b'<body>t\xc3\xa3g</body>')
1588 self.assertEqual(ET.tostring(e, 'ascii'),
1589 b"<?xml version='1.0' encoding='ascii'?>\n"
1590 b'<body>t&#227;g</body>')
1591 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1592 b'<body>t\xe3g</body>')
1593 self.assertEqual(ET.tostring(e, 'ascii'),
1594 b"<?xml version='1.0' encoding='ascii'?>\n"
1595 b'<body>t&#227;g</body>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001596
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001597 def test_issue3151(self):
1598 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1599 self.assertEqual(e.tag, '{${stuff}}localname')
1600 t = ET.ElementTree(e)
1601 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001602
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001603 def test_issue6565(self):
1604 elem = ET.XML("<body><tag/></body>")
1605 self.assertEqual(summarize_list(elem), ['tag'])
1606 newelem = ET.XML(SAMPLE_XML)
1607 elem[:] = newelem[:]
1608 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001609
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001610 def test_issue10777(self):
1611 # Registering a namespace twice caused a "dictionary changed size during
1612 # iteration" bug.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001613
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001614 ET.register_namespace('test10777', 'http://myuri/')
1615 ET.register_namespace('test10777', 'http://myuri/')
Georg Brandl90b20672010-12-28 10:38:33 +00001616
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001617
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001618# --------------------------------------------------------------------
1619
1620
Eli Bendersky698bdb22013-01-10 06:01:06 -08001621class BasicElementTest(ElementTestCase, unittest.TestCase):
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001622 def test_augmentation_type_errors(self):
1623 e = ET.Element('joe')
1624 self.assertRaises(TypeError, e.append, 'b')
1625 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
1626 self.assertRaises(TypeError, e.insert, 0, 'foo')
Florent Xicluna41fe6152010-04-02 18:52:12 +00001627
Eli Bendersky0192ba32012-03-30 16:38:33 +03001628 def test_cyclic_gc(self):
Eli Benderskya5e82202012-03-31 13:55:38 +03001629 class Dummy:
1630 pass
Eli Bendersky0192ba32012-03-30 16:38:33 +03001631
Eli Benderskya5e82202012-03-31 13:55:38 +03001632 # Test the shortest cycle: d->element->d
1633 d = Dummy()
1634 d.dummyref = ET.Element('joe', attr=d)
1635 wref = weakref.ref(d)
1636 del d
1637 gc_collect()
1638 self.assertIsNone(wref())
Eli Bendersky0192ba32012-03-30 16:38:33 +03001639
Eli Benderskyebf37a22012-04-03 22:02:37 +03001640 # A longer cycle: d->e->e2->d
1641 e = ET.Element('joe')
1642 d = Dummy()
1643 d.dummyref = e
1644 wref = weakref.ref(d)
1645 e2 = ET.SubElement(e, 'foo', attr=d)
1646 del d, e, e2
1647 gc_collect()
1648 self.assertIsNone(wref())
1649
1650 # A cycle between Element objects as children of one another
1651 # e1->e2->e3->e1
1652 e1 = ET.Element('e1')
1653 e2 = ET.Element('e2')
1654 e3 = ET.Element('e3')
1655 e1.append(e2)
1656 e2.append(e2)
1657 e3.append(e1)
1658 wref = weakref.ref(e1)
1659 del e1, e2, e3
1660 gc_collect()
1661 self.assertIsNone(wref())
1662
1663 def test_weakref(self):
1664 flag = False
1665 def wref_cb(w):
1666 nonlocal flag
1667 flag = True
1668 e = ET.Element('e')
1669 wref = weakref.ref(e, wref_cb)
1670 self.assertEqual(wref().tag, 'e')
1671 del e
1672 self.assertEqual(flag, True)
1673 self.assertEqual(wref(), None)
1674
Eli Benderskya8736902013-01-05 06:26:39 -08001675 def test_get_keyword_args(self):
1676 e1 = ET.Element('foo' , x=1, y=2, z=3)
1677 self.assertEqual(e1.get('x', default=7), 1)
1678 self.assertEqual(e1.get('w', default=7), 7)
1679
Eli Bendersky7ec45f72012-12-30 06:17:49 -08001680 def test_pickle(self):
Eli Bendersky698bdb22013-01-10 06:01:06 -08001681 # issue #16076: the C implementation wasn't pickleable.
Serhiy Storchakabad12572014-12-15 14:03:42 +02001682 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1683 for dumper, loader in product(self.modules, repeat=2):
1684 e = dumper.Element('foo', bar=42)
1685 e.text = "text goes here"
1686 e.tail = "opposite of head"
1687 dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
1688 e.append(dumper.Element('child'))
1689 e.findall('.//grandchild')[0].set('attr', 'other value')
Eli Bendersky7ec45f72012-12-30 06:17:49 -08001690
Serhiy Storchakabad12572014-12-15 14:03:42 +02001691 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
1692 dumper, loader, proto)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001693
Serhiy Storchakabad12572014-12-15 14:03:42 +02001694 self.assertEqual(e2.tag, 'foo')
1695 self.assertEqual(e2.attrib['bar'], 42)
1696 self.assertEqual(len(e2), 2)
1697 self.assertEqualElements(e, e2)
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001698
Eli Benderskydd3661e2013-09-13 06:24:25 -07001699 def test_pickle_issue18997(self):
Serhiy Storchakabad12572014-12-15 14:03:42 +02001700 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1701 for dumper, loader in product(self.modules, repeat=2):
1702 XMLTEXT = """<?xml version="1.0"?>
1703 <group><dogs>4</dogs>
1704 </group>"""
1705 e1 = dumper.fromstring(XMLTEXT)
1706 if hasattr(e1, '__getstate__'):
1707 self.assertEqual(e1.__getstate__()['tag'], 'group')
1708 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
1709 dumper, loader, proto)
1710 self.assertEqual(e2.tag, 'group')
1711 self.assertEqual(e2[0].tag, 'dogs')
Eli Benderskydd3661e2013-09-13 06:24:25 -07001712
Eli Bendersky23687042013-02-26 05:53:23 -08001713
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001714class BadElementTest(ElementTestCase, unittest.TestCase):
1715 def test_extend_mutable_list(self):
1716 class X:
1717 @property
1718 def __class__(self):
1719 L[:] = [ET.Element('baz')]
1720 return ET.Element
1721 L = [X()]
1722 e = ET.Element('foo')
1723 try:
1724 e.extend(L)
1725 except TypeError:
1726 pass
1727
1728 class Y(X, ET.Element):
1729 pass
1730 L = [Y('x')]
1731 e = ET.Element('foo')
1732 e.extend(L)
1733
1734 def test_extend_mutable_list2(self):
1735 class X:
1736 @property
1737 def __class__(self):
1738 del L[:]
1739 return ET.Element
1740 L = [X(), ET.Element('baz')]
1741 e = ET.Element('foo')
1742 try:
1743 e.extend(L)
1744 except TypeError:
1745 pass
1746
1747 class Y(X, ET.Element):
1748 pass
1749 L = [Y('bar'), ET.Element('baz')]
1750 e = ET.Element('foo')
1751 e.extend(L)
1752
1753 def test_remove_with_mutating(self):
1754 class X(ET.Element):
1755 def __eq__(self, o):
1756 del e[:]
1757 return False
1758 e = ET.Element('foo')
1759 e.extend([X('bar')])
1760 self.assertRaises(ValueError, e.remove, ET.Element('baz'))
1761
1762 e = ET.Element('foo')
1763 e.extend([ET.Element('bar')])
1764 self.assertRaises(ValueError, e.remove, X('baz'))
1765
1766
1767class MutatingElementPath(str):
1768 def __new__(cls, elem, *args):
1769 self = str.__new__(cls, *args)
1770 self.elem = elem
1771 return self
1772 def __eq__(self, o):
1773 del self.elem[:]
1774 return True
1775MutatingElementPath.__hash__ = str.__hash__
1776
1777class BadElementPath(str):
1778 def __eq__(self, o):
1779 raise 1/0
1780BadElementPath.__hash__ = str.__hash__
1781
1782class BadElementPathTest(ElementTestCase, unittest.TestCase):
1783 def setUp(self):
1784 super().setUp()
1785 from xml.etree import ElementPath
1786 self.path_cache = ElementPath._cache
1787 ElementPath._cache = {}
1788
1789 def tearDown(self):
1790 from xml.etree import ElementPath
1791 ElementPath._cache = self.path_cache
1792 super().tearDown()
1793
1794 def test_find_with_mutating(self):
1795 e = ET.Element('foo')
1796 e.extend([ET.Element('bar')])
1797 e.find(MutatingElementPath(e, 'x'))
1798
1799 def test_find_with_error(self):
1800 e = ET.Element('foo')
1801 e.extend([ET.Element('bar')])
1802 try:
1803 e.find(BadElementPath('x'))
1804 except ZeroDivisionError:
1805 pass
1806
1807 def test_findtext_with_mutating(self):
1808 e = ET.Element('foo')
1809 e.extend([ET.Element('bar')])
1810 e.findtext(MutatingElementPath(e, 'x'))
1811
1812 def test_findtext_with_error(self):
1813 e = ET.Element('foo')
1814 e.extend([ET.Element('bar')])
1815 try:
1816 e.findtext(BadElementPath('x'))
1817 except ZeroDivisionError:
1818 pass
1819
1820 def test_findall_with_mutating(self):
1821 e = ET.Element('foo')
1822 e.extend([ET.Element('bar')])
1823 e.findall(MutatingElementPath(e, 'x'))
1824
1825 def test_findall_with_error(self):
1826 e = ET.Element('foo')
1827 e.extend([ET.Element('bar')])
1828 try:
1829 e.findall(BadElementPath('x'))
1830 except ZeroDivisionError:
1831 pass
1832
1833
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001834class ElementTreeTypeTest(unittest.TestCase):
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01001835 def test_istype(self):
1836 self.assertIsInstance(ET.ParseError, type)
1837 self.assertIsInstance(ET.QName, type)
1838 self.assertIsInstance(ET.ElementTree, type)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001839 self.assertIsInstance(ET.Element, type)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001840 self.assertIsInstance(ET.TreeBuilder, type)
1841 self.assertIsInstance(ET.XMLParser, type)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001842
1843 def test_Element_subclass_trivial(self):
1844 class MyElement(ET.Element):
1845 pass
1846
1847 mye = MyElement('foo')
1848 self.assertIsInstance(mye, ET.Element)
1849 self.assertIsInstance(mye, MyElement)
1850 self.assertEqual(mye.tag, 'foo')
1851
Eli Benderskyb20df952012-05-20 06:33:29 +03001852 # test that attribute assignment works (issue 14849)
1853 mye.text = "joe"
1854 self.assertEqual(mye.text, "joe")
1855
Eli Bendersky092af1f2012-03-04 07:14:03 +02001856 def test_Element_subclass_constructor(self):
1857 class MyElement(ET.Element):
1858 def __init__(self, tag, attrib={}, **extra):
1859 super(MyElement, self).__init__(tag + '__', attrib, **extra)
1860
1861 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
1862 self.assertEqual(mye.tag, 'foo__')
1863 self.assertEqual(sorted(mye.items()),
1864 [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
1865
1866 def test_Element_subclass_new_method(self):
1867 class MyElement(ET.Element):
1868 def newmethod(self):
1869 return self.tag
1870
1871 mye = MyElement('joe')
1872 self.assertEqual(mye.newmethod(), 'joe')
Eli Benderskyda578192012-02-16 06:52:39 +02001873
Eli Benderskyceab1a92013-01-12 07:42:46 -08001874
1875class ElementFindTest(unittest.TestCase):
1876 def test_find_simple(self):
1877 e = ET.XML(SAMPLE_XML)
1878 self.assertEqual(e.find('tag').tag, 'tag')
1879 self.assertEqual(e.find('section/tag').tag, 'tag')
1880 self.assertEqual(e.find('./tag').tag, 'tag')
1881
1882 e[2] = ET.XML(SAMPLE_SECTION)
1883 self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
1884
1885 self.assertEqual(e.findtext('./tag'), 'text')
1886 self.assertEqual(e.findtext('section/tag'), 'subtext')
1887
1888 # section/nexttag is found but has no text
1889 self.assertEqual(e.findtext('section/nexttag'), '')
1890 self.assertEqual(e.findtext('section/nexttag', 'default'), '')
1891
1892 # tog doesn't exist and 'default' kicks in
1893 self.assertIsNone(e.findtext('tog'))
1894 self.assertEqual(e.findtext('tog', 'default'), 'default')
1895
Eli Bendersky25771b32013-01-13 05:26:07 -08001896 # Issue #16922
1897 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
1898
Eli Benderskya80f7612013-01-22 06:12:54 -08001899 def test_find_xpath(self):
1900 LINEAR_XML = '''
1901 <body>
1902 <tag class='a'/>
1903 <tag class='b'/>
1904 <tag class='c'/>
1905 <tag class='d'/>
1906 </body>'''
1907 e = ET.XML(LINEAR_XML)
1908
1909 # Test for numeric indexing and last()
1910 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
1911 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
1912 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
1913 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
1914 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
1915
Eli Bendersky5c6198b2013-01-24 06:29:26 -08001916 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
1917 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
1918 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
1919 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
1920
Eli Benderskyceab1a92013-01-12 07:42:46 -08001921 def test_findall(self):
1922 e = ET.XML(SAMPLE_XML)
1923 e[2] = ET.XML(SAMPLE_SECTION)
1924 self.assertEqual(summarize_list(e.findall('.')), ['body'])
1925 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
1926 self.assertEqual(summarize_list(e.findall('tog')), [])
1927 self.assertEqual(summarize_list(e.findall('tog/foo')), [])
1928 self.assertEqual(summarize_list(e.findall('*')),
1929 ['tag', 'tag', 'section'])
1930 self.assertEqual(summarize_list(e.findall('.//tag')),
1931 ['tag'] * 4)
1932 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
1933 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
1934 self.assertEqual(summarize_list(e.findall('section/*')),
1935 ['tag', 'nexttag', 'nextsection'])
1936 self.assertEqual(summarize_list(e.findall('section//*')),
1937 ['tag', 'nexttag', 'nextsection', 'tag'])
1938 self.assertEqual(summarize_list(e.findall('section/.//*')),
1939 ['tag', 'nexttag', 'nextsection', 'tag'])
1940 self.assertEqual(summarize_list(e.findall('*/*')),
1941 ['tag', 'nexttag', 'nextsection'])
1942 self.assertEqual(summarize_list(e.findall('*//*')),
1943 ['tag', 'nexttag', 'nextsection', 'tag'])
1944 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
1945 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
1946 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
1947 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
1948
1949 self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
1950 ['tag'] * 3)
1951 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
1952 ['tag'])
1953 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
1954 ['tag'] * 2)
1955 self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
1956 ['tag'])
1957 self.assertEqual(summarize_list(e.findall('.//section[tag]')),
1958 ['section'])
1959 self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
1960 self.assertEqual(summarize_list(e.findall('../tag')), [])
1961 self.assertEqual(summarize_list(e.findall('section/../tag')),
1962 ['tag'] * 2)
1963 self.assertEqual(e.findall('section//'), e.findall('section//*'))
1964
1965 def test_test_find_with_ns(self):
1966 e = ET.XML(SAMPLE_XML_NS)
1967 self.assertEqual(summarize_list(e.findall('tag')), [])
1968 self.assertEqual(
1969 summarize_list(e.findall("{http://effbot.org/ns}tag")),
1970 ['{http://effbot.org/ns}tag'] * 2)
1971 self.assertEqual(
1972 summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
1973 ['{http://effbot.org/ns}tag'] * 3)
1974
Eli Bendersky2acc5252013-08-03 17:47:47 -07001975 def test_findall_different_nsmaps(self):
1976 root = ET.XML('''
1977 <a xmlns:x="X" xmlns:y="Y">
1978 <x:b><c/></x:b>
1979 <b/>
1980 <c><x:b/><b/></c><y:b/>
1981 </a>''')
1982 nsmap = {'xx': 'X'}
1983 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
1984 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
1985 nsmap = {'xx': 'Y'}
1986 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
1987 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
1988
Eli Benderskyceab1a92013-01-12 07:42:46 -08001989 def test_bad_find(self):
1990 e = ET.XML(SAMPLE_XML)
1991 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
1992 e.findall('/tag')
Eli Benderskyc31f7732013-01-12 07:44:32 -08001993
Eli Benderskyceab1a92013-01-12 07:42:46 -08001994 def test_find_through_ElementTree(self):
1995 e = ET.XML(SAMPLE_XML)
1996 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
1997 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
1998 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
1999 ['tag'] * 2)
2000 # this produces a warning
2001 self.assertEqual(summarize_list(ET.ElementTree(e).findall('//tag')),
2002 ['tag'] * 3)
Eli Benderskyc31f7732013-01-12 07:44:32 -08002003
Eli Benderskyceab1a92013-01-12 07:42:46 -08002004
Eli Bendersky64d11e62012-06-15 07:42:50 +03002005class ElementIterTest(unittest.TestCase):
2006 def _ilist(self, elem, tag=None):
2007 return summarize_list(elem.iter(tag))
2008
2009 def test_basic(self):
2010 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
2011 self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
2012 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
2013 self.assertEqual(next(doc.iter()).tag, 'html')
2014 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
2015 self.assertEqual(''.join(doc.find('body').itertext()),
2016 'this is a paragraph.')
2017 self.assertEqual(next(doc.itertext()), 'this is a ')
2018
2019 # iterparse should return an iterator
2020 sourcefile = serialize(doc, to_string=False)
2021 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
2022
Eli Benderskyaaa97802013-01-24 07:15:19 -08002023 # With an explitit parser too (issue #9708)
2024 sourcefile = serialize(doc, to_string=False)
2025 parser = ET.XMLParser(target=ET.TreeBuilder())
2026 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
2027 'end')
2028
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029 tree = ET.ElementTree(None)
2030 self.assertRaises(AttributeError, tree.iter)
2031
Eli Benderskye6174ca2013-01-10 06:27:53 -08002032 # Issue #16913
2033 doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
2034 self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
2035
Eli Bendersky64d11e62012-06-15 07:42:50 +03002036 def test_corners(self):
2037 # single root, no subelements
2038 a = ET.Element('a')
2039 self.assertEqual(self._ilist(a), ['a'])
2040
2041 # one child
2042 b = ET.SubElement(a, 'b')
2043 self.assertEqual(self._ilist(a), ['a', 'b'])
2044
2045 # one child and one grandchild
2046 c = ET.SubElement(b, 'c')
2047 self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
2048
2049 # two children, only first with grandchild
2050 d = ET.SubElement(a, 'd')
2051 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
2052
2053 # replace first child by second
2054 a[0] = a[1]
2055 del a[1]
2056 self.assertEqual(self._ilist(a), ['a', 'd'])
2057
2058 def test_iter_by_tag(self):
2059 doc = ET.XML('''
2060 <document>
2061 <house>
2062 <room>bedroom1</room>
2063 <room>bedroom2</room>
2064 </house>
2065 <shed>nothing here
2066 </shed>
2067 <house>
2068 <room>bedroom8</room>
2069 </house>
2070 </document>''')
2071
2072 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
2073 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
2074
Eli Benderskya8736902013-01-05 06:26:39 -08002075 # test that iter also accepts 'tag' as a keyword arg
2076 self.assertEqual(
2077 summarize_list(doc.iter(tag='room')),
2078 ['room'] * 3)
2079
Eli Bendersky64d11e62012-06-15 07:42:50 +03002080 # make sure both tag=None and tag='*' return all tags
2081 all_tags = ['document', 'house', 'room', 'room',
2082 'shed', 'house', 'room']
2083 self.assertEqual(self._ilist(doc), all_tags)
2084 self.assertEqual(self._ilist(doc, '*'), all_tags)
2085
Serhiy Storchakad7a44152015-11-12 11:23:04 +02002086 def test_copy(self):
2087 a = ET.Element('a')
2088 it = a.iter()
2089 with self.assertRaises(TypeError):
2090 copy.copy(it)
2091
2092 def test_pickle(self):
2093 a = ET.Element('a')
2094 it = a.iter()
2095 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2096 with self.assertRaises((TypeError, pickle.PicklingError)):
2097 pickle.dumps(it, proto)
2098
Eli Bendersky64d11e62012-06-15 07:42:50 +03002099
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002100class TreeBuilderTest(unittest.TestCase):
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002101 sample1 = ('<!DOCTYPE html PUBLIC'
2102 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2103 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
Antoine Pitrouee329312012-10-04 19:53:29 +02002104 '<html>text<div>subtext</div>tail</html>')
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002105
Eli Bendersky48d358b2012-05-30 17:57:50 +03002106 sample2 = '''<toplevel>sometext</toplevel>'''
2107
Antoine Pitrouee329312012-10-04 19:53:29 +02002108 def _check_sample1_element(self, e):
2109 self.assertEqual(e.tag, 'html')
2110 self.assertEqual(e.text, 'text')
2111 self.assertEqual(e.tail, None)
2112 self.assertEqual(e.attrib, {})
2113 children = list(e)
2114 self.assertEqual(len(children), 1)
2115 child = children[0]
2116 self.assertEqual(child.tag, 'div')
2117 self.assertEqual(child.text, 'subtext')
2118 self.assertEqual(child.tail, 'tail')
2119 self.assertEqual(child.attrib, {})
2120
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002121 def test_dummy_builder(self):
2122 class BaseDummyBuilder:
2123 def close(self):
2124 return 42
2125
2126 class DummyBuilder(BaseDummyBuilder):
2127 data = start = end = lambda *a: None
2128
2129 parser = ET.XMLParser(target=DummyBuilder())
2130 parser.feed(self.sample1)
2131 self.assertEqual(parser.close(), 42)
2132
2133 parser = ET.XMLParser(target=BaseDummyBuilder())
2134 parser.feed(self.sample1)
2135 self.assertEqual(parser.close(), 42)
2136
2137 parser = ET.XMLParser(target=object())
2138 parser.feed(self.sample1)
2139 self.assertIsNone(parser.close())
2140
Eli Bendersky08231a92013-05-18 15:47:16 -07002141 def test_treebuilder_elementfactory_none(self):
2142 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
2143 parser.feed(self.sample1)
2144 e = parser.close()
2145 self._check_sample1_element(e)
2146
Eli Bendersky58d548d2012-05-29 15:45:16 +03002147 def test_subclass(self):
2148 class MyTreeBuilder(ET.TreeBuilder):
2149 def foobar(self, x):
2150 return x * 2
2151
2152 tb = MyTreeBuilder()
2153 self.assertEqual(tb.foobar(10), 20)
2154
2155 parser = ET.XMLParser(target=tb)
2156 parser.feed(self.sample1)
2157
2158 e = parser.close()
Antoine Pitrouee329312012-10-04 19:53:29 +02002159 self._check_sample1_element(e)
Eli Bendersky58d548d2012-05-29 15:45:16 +03002160
Eli Bendersky2b711402012-03-16 15:29:50 +02002161 def test_element_factory(self):
Eli Bendersky48d358b2012-05-30 17:57:50 +03002162 lst = []
2163 def myfactory(tag, attrib):
2164 nonlocal lst
2165 lst.append(tag)
2166 return ET.Element(tag, attrib)
2167
2168 tb = ET.TreeBuilder(element_factory=myfactory)
2169 parser = ET.XMLParser(target=tb)
2170 parser.feed(self.sample2)
2171 parser.close()
2172
2173 self.assertEqual(lst, ['toplevel'])
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002174
Antoine Pitrouee329312012-10-04 19:53:29 +02002175 def _check_element_factory_class(self, cls):
2176 tb = ET.TreeBuilder(element_factory=cls)
2177
2178 parser = ET.XMLParser(target=tb)
2179 parser.feed(self.sample1)
2180 e = parser.close()
2181 self.assertIsInstance(e, cls)
2182 self._check_sample1_element(e)
2183
2184 def test_element_factory_subclass(self):
2185 class MyElement(ET.Element):
2186 pass
2187 self._check_element_factory_class(MyElement)
2188
2189 def test_element_factory_pure_python_subclass(self):
2190 # Mimick SimpleTAL's behaviour (issue #16089): both versions of
2191 # TreeBuilder should be able to cope with a subclass of the
2192 # pure Python Element class.
Eli Bendersky46955b22013-05-19 09:20:50 -07002193 base = ET._Element_Py
Antoine Pitrouee329312012-10-04 19:53:29 +02002194 # Not from a C extension
2195 self.assertEqual(base.__module__, 'xml.etree.ElementTree')
2196 # Force some multiple inheritance with a C class to make things
2197 # more interesting.
2198 class MyElement(base, ValueError):
2199 pass
2200 self._check_element_factory_class(MyElement)
2201
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002202 def test_doctype(self):
2203 class DoctypeParser:
2204 _doctype = None
2205
2206 def doctype(self, name, pubid, system):
2207 self._doctype = (name, pubid, system)
2208
2209 def close(self):
2210 return self._doctype
2211
2212 parser = ET.XMLParser(target=DoctypeParser())
2213 parser.feed(self.sample1)
2214
2215 self.assertEqual(parser.close(),
2216 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2217 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2218
Eli Bendersky175fada2012-06-15 08:37:08 +03002219
Eli Bendersky52467b12012-06-01 07:13:08 +03002220class XMLParserTest(unittest.TestCase):
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03002221 sample1 = b'<file><line>22</line></file>'
2222 sample2 = (b'<!DOCTYPE html PUBLIC'
2223 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2224 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2225 b'<html>text</html>')
2226 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
2227 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
Eli Bendersky52467b12012-06-01 07:13:08 +03002228
2229 def _check_sample_element(self, e):
2230 self.assertEqual(e.tag, 'file')
2231 self.assertEqual(e[0].tag, 'line')
2232 self.assertEqual(e[0].text, '22')
2233
2234 def test_constructor_args(self):
2235 # Positional args. The first (html) is not supported, but should be
2236 # nevertheless correctly accepted.
2237 parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8')
2238 parser.feed(self.sample1)
2239 self._check_sample_element(parser.close())
2240
2241 # Now as keyword args.
Eli Bendersky23687042013-02-26 05:53:23 -08002242 parser2 = ET.XMLParser(encoding='utf-8',
2243 html=[{}],
2244 target=ET.TreeBuilder())
Eli Bendersky52467b12012-06-01 07:13:08 +03002245 parser2.feed(self.sample1)
2246 self._check_sample_element(parser2.close())
2247
2248 def test_subclass(self):
2249 class MyParser(ET.XMLParser):
2250 pass
2251 parser = MyParser()
2252 parser.feed(self.sample1)
2253 self._check_sample_element(parser.close())
2254
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03002255 def test_doctype_warning(self):
2256 parser = ET.XMLParser()
2257 with self.assertWarns(DeprecationWarning):
2258 parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2259 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')
2260 parser.feed('<html/>')
2261 parser.close()
2262
2263 with warnings.catch_warnings():
2264 warnings.simplefilter('error', DeprecationWarning)
2265 parser = ET.XMLParser()
2266 parser.feed(self.sample2)
2267 parser.close()
2268
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002269 def test_subclass_doctype(self):
2270 _doctype = None
2271 class MyParserWithDoctype(ET.XMLParser):
2272 def doctype(self, name, pubid, system):
2273 nonlocal _doctype
2274 _doctype = (name, pubid, system)
2275
2276 parser = MyParserWithDoctype()
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03002277 with self.assertWarns(DeprecationWarning):
2278 parser.feed(self.sample2)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002279 parser.close()
2280 self.assertEqual(_doctype,
2281 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2282 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2283
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03002284 _doctype = _doctype2 = None
2285 with warnings.catch_warnings():
2286 warnings.simplefilter('error', DeprecationWarning)
2287 class DoctypeParser:
2288 def doctype(self, name, pubid, system):
2289 nonlocal _doctype2
2290 _doctype2 = (name, pubid, system)
2291
2292 parser = MyParserWithDoctype(target=DoctypeParser())
2293 parser.feed(self.sample2)
2294 parser.close()
2295 self.assertIsNone(_doctype)
2296 self.assertEqual(_doctype2,
2297 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2298 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2299
2300 def test_inherited_doctype(self):
2301 '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
2302 with warnings.catch_warnings():
2303 warnings.simplefilter('error', DeprecationWarning)
2304 class MyParserWithoutDoctype(ET.XMLParser):
2305 pass
2306 parser = MyParserWithoutDoctype()
2307 parser.feed(self.sample2)
2308 parser.close()
2309
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03002310 def test_parse_string(self):
2311 parser = ET.XMLParser(target=ET.TreeBuilder())
2312 parser.feed(self.sample3)
2313 e = parser.close()
2314 self.assertEqual(e.tag, 'money')
2315 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
2316 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
2317
Eli Bendersky52467b12012-06-01 07:13:08 +03002318
Eli Bendersky737b1732012-05-29 06:02:56 +03002319class NamespaceParseTest(unittest.TestCase):
2320 def test_find_with_namespace(self):
2321 nsmap = {'h': 'hello', 'f': 'foo'}
2322 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
2323
2324 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
2325 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
2326 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
2327
2328
Eli Bendersky865756a2012-03-09 13:38:15 +02002329class ElementSlicingTest(unittest.TestCase):
2330 def _elem_tags(self, elemlist):
2331 return [e.tag for e in elemlist]
2332
2333 def _subelem_tags(self, elem):
2334 return self._elem_tags(list(elem))
2335
2336 def _make_elem_with_children(self, numchildren):
2337 """Create an Element with a tag 'a', with the given amount of children
2338 named 'a0', 'a1' ... and so on.
2339
2340 """
2341 e = ET.Element('a')
2342 for i in range(numchildren):
2343 ET.SubElement(e, 'a%s' % i)
2344 return e
2345
2346 def test_getslice_single_index(self):
2347 e = self._make_elem_with_children(10)
2348
2349 self.assertEqual(e[1].tag, 'a1')
2350 self.assertEqual(e[-2].tag, 'a8')
2351
2352 self.assertRaises(IndexError, lambda: e[12])
2353
2354 def test_getslice_range(self):
2355 e = self._make_elem_with_children(6)
2356
2357 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
2358 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
2359 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
2360 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
2361 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
2362 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
2363
2364 def test_getslice_steps(self):
2365 e = self._make_elem_with_children(10)
2366
2367 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
2368 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
2369 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
2370 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
2371
2372 def test_getslice_negative_steps(self):
2373 e = self._make_elem_with_children(4)
2374
2375 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
2376 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
2377
2378 def test_delslice(self):
2379 e = self._make_elem_with_children(4)
2380 del e[0:2]
2381 self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
2382
2383 e = self._make_elem_with_children(4)
2384 del e[0:]
2385 self.assertEqual(self._subelem_tags(e), [])
2386
2387 e = self._make_elem_with_children(4)
2388 del e[::-1]
2389 self.assertEqual(self._subelem_tags(e), [])
2390
2391 e = self._make_elem_with_children(4)
2392 del e[::-2]
2393 self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2394
2395 e = self._make_elem_with_children(4)
2396 del e[1::2]
2397 self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2398
2399 e = self._make_elem_with_children(2)
2400 del e[::2]
2401 self.assertEqual(self._subelem_tags(e), ['a1'])
2402
Eli Benderskyf996e772012-03-16 05:53:30 +02002403
Eli Bendersky00f402b2012-07-15 06:02:22 +03002404class IOTest(unittest.TestCase):
2405 def tearDown(self):
Eli Bendersky23687042013-02-26 05:53:23 -08002406 support.unlink(TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03002407
2408 def test_encoding(self):
2409 # Test encoding issues.
2410 elem = ET.Element("tag")
2411 elem.text = "abc"
2412 self.assertEqual(serialize(elem), '<tag>abc</tag>')
Martin Panter89f76d32015-09-23 01:14:35 +00002413 for enc in ("utf-8", "us-ascii"):
2414 with self.subTest(enc):
2415 self.assertEqual(serialize(elem, encoding=enc),
2416 b'<tag>abc</tag>')
2417 self.assertEqual(serialize(elem, encoding=enc.upper()),
2418 b'<tag>abc</tag>')
Eli Bendersky00f402b2012-07-15 06:02:22 +03002419 for enc in ("iso-8859-1", "utf-16", "utf-32"):
Martin Panter89f76d32015-09-23 01:14:35 +00002420 with self.subTest(enc):
2421 self.assertEqual(serialize(elem, encoding=enc),
2422 ("<?xml version='1.0' encoding='%s'?>\n"
2423 "<tag>abc</tag>" % enc).encode(enc))
2424 upper = enc.upper()
2425 self.assertEqual(serialize(elem, encoding=upper),
2426 ("<?xml version='1.0' encoding='%s'?>\n"
2427 "<tag>abc</tag>" % upper).encode(enc))
Eli Bendersky00f402b2012-07-15 06:02:22 +03002428
2429 elem = ET.Element("tag")
2430 elem.text = "<&\"\'>"
2431 self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
2432 self.assertEqual(serialize(elem, encoding="utf-8"),
2433 b'<tag>&lt;&amp;"\'&gt;</tag>')
2434 self.assertEqual(serialize(elem, encoding="us-ascii"),
2435 b'<tag>&lt;&amp;"\'&gt;</tag>')
2436 for enc in ("iso-8859-1", "utf-16", "utf-32"):
2437 self.assertEqual(serialize(elem, encoding=enc),
2438 ("<?xml version='1.0' encoding='%s'?>\n"
2439 "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
2440
2441 elem = ET.Element("tag")
2442 elem.attrib["key"] = "<&\"\'>"
2443 self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
2444 self.assertEqual(serialize(elem, encoding="utf-8"),
2445 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
2446 self.assertEqual(serialize(elem, encoding="us-ascii"),
2447 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
2448 for enc in ("iso-8859-1", "utf-16", "utf-32"):
2449 self.assertEqual(serialize(elem, encoding=enc),
2450 ("<?xml version='1.0' encoding='%s'?>\n"
2451 "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
2452
2453 elem = ET.Element("tag")
2454 elem.text = '\xe5\xf6\xf6<>'
2455 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
2456 self.assertEqual(serialize(elem, encoding="utf-8"),
2457 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
2458 self.assertEqual(serialize(elem, encoding="us-ascii"),
2459 b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
2460 for enc in ("iso-8859-1", "utf-16", "utf-32"):
2461 self.assertEqual(serialize(elem, encoding=enc),
2462 ("<?xml version='1.0' encoding='%s'?>\n"
2463 "<tag>Ă¥Ă¶Ă¶&lt;&gt;</tag>" % enc).encode(enc))
2464
2465 elem = ET.Element("tag")
2466 elem.attrib["key"] = '\xe5\xf6\xf6<>'
2467 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
2468 self.assertEqual(serialize(elem, encoding="utf-8"),
2469 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
2470 self.assertEqual(serialize(elem, encoding="us-ascii"),
2471 b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
2472 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
2473 self.assertEqual(serialize(elem, encoding=enc),
2474 ("<?xml version='1.0' encoding='%s'?>\n"
2475 "<tag key=\"Ă¥Ă¶Ă¶&lt;&gt;\" />" % enc).encode(enc))
2476
2477 def test_write_to_filename(self):
2478 tree = ET.ElementTree(ET.XML('''<site />'''))
2479 tree.write(TESTFN)
2480 with open(TESTFN, 'rb') as f:
2481 self.assertEqual(f.read(), b'''<site />''')
2482
2483 def test_write_to_text_file(self):
2484 tree = ET.ElementTree(ET.XML('''<site />'''))
2485 with open(TESTFN, 'w', encoding='utf-8') as f:
2486 tree.write(f, encoding='unicode')
2487 self.assertFalse(f.closed)
2488 with open(TESTFN, 'rb') as f:
2489 self.assertEqual(f.read(), b'''<site />''')
2490
2491 def test_write_to_binary_file(self):
2492 tree = ET.ElementTree(ET.XML('''<site />'''))
2493 with open(TESTFN, 'wb') as f:
2494 tree.write(f)
2495 self.assertFalse(f.closed)
2496 with open(TESTFN, 'rb') as f:
2497 self.assertEqual(f.read(), b'''<site />''')
2498
2499 def test_write_to_binary_file_with_bom(self):
2500 tree = ET.ElementTree(ET.XML('''<site />'''))
2501 # test BOM writing to buffered file
2502 with open(TESTFN, 'wb') as f:
2503 tree.write(f, encoding='utf-16')
2504 self.assertFalse(f.closed)
2505 with open(TESTFN, 'rb') as f:
2506 self.assertEqual(f.read(),
2507 '''<?xml version='1.0' encoding='utf-16'?>\n'''
2508 '''<site />'''.encode("utf-16"))
2509 # test BOM writing to non-buffered file
2510 with open(TESTFN, 'wb', buffering=0) as f:
2511 tree.write(f, encoding='utf-16')
2512 self.assertFalse(f.closed)
2513 with open(TESTFN, 'rb') as f:
2514 self.assertEqual(f.read(),
2515 '''<?xml version='1.0' encoding='utf-16'?>\n'''
2516 '''<site />'''.encode("utf-16"))
2517
Eli Benderskyf996e772012-03-16 05:53:30 +02002518 def test_read_from_stringio(self):
2519 tree = ET.ElementTree()
Eli Bendersky00f402b2012-07-15 06:02:22 +03002520 stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
Eli Benderskyf996e772012-03-16 05:53:30 +02002521 tree.parse(stream)
Eli Benderskyf996e772012-03-16 05:53:30 +02002522 self.assertEqual(tree.getroot().tag, 'site')
2523
Eli Bendersky00f402b2012-07-15 06:02:22 +03002524 def test_write_to_stringio(self):
2525 tree = ET.ElementTree(ET.XML('''<site />'''))
2526 stream = io.StringIO()
2527 tree.write(stream, encoding='unicode')
2528 self.assertEqual(stream.getvalue(), '''<site />''')
2529
2530 def test_read_from_bytesio(self):
2531 tree = ET.ElementTree()
2532 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
2533 tree.parse(raw)
2534 self.assertEqual(tree.getroot().tag, 'site')
2535
2536 def test_write_to_bytesio(self):
2537 tree = ET.ElementTree(ET.XML('''<site />'''))
2538 raw = io.BytesIO()
2539 tree.write(raw)
2540 self.assertEqual(raw.getvalue(), b'''<site />''')
2541
2542 class dummy:
2543 pass
2544
2545 def test_read_from_user_text_reader(self):
2546 stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
2547 reader = self.dummy()
2548 reader.read = stream.read
2549 tree = ET.ElementTree()
2550 tree.parse(reader)
2551 self.assertEqual(tree.getroot().tag, 'site')
2552
2553 def test_write_to_user_text_writer(self):
2554 tree = ET.ElementTree(ET.XML('''<site />'''))
2555 stream = io.StringIO()
2556 writer = self.dummy()
2557 writer.write = stream.write
2558 tree.write(writer, encoding='unicode')
2559 self.assertEqual(stream.getvalue(), '''<site />''')
2560
2561 def test_read_from_user_binary_reader(self):
2562 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
2563 reader = self.dummy()
2564 reader.read = raw.read
2565 tree = ET.ElementTree()
2566 tree.parse(reader)
2567 self.assertEqual(tree.getroot().tag, 'site')
2568 tree = ET.ElementTree()
2569
2570 def test_write_to_user_binary_writer(self):
2571 tree = ET.ElementTree(ET.XML('''<site />'''))
2572 raw = io.BytesIO()
2573 writer = self.dummy()
2574 writer.write = raw.write
2575 tree.write(writer)
2576 self.assertEqual(raw.getvalue(), b'''<site />''')
2577
2578 def test_write_to_user_binary_writer_with_bom(self):
2579 tree = ET.ElementTree(ET.XML('''<site />'''))
2580 raw = io.BytesIO()
2581 writer = self.dummy()
2582 writer.write = raw.write
2583 writer.seekable = lambda: True
2584 writer.tell = raw.tell
2585 tree.write(writer, encoding="utf-16")
2586 self.assertEqual(raw.getvalue(),
2587 '''<?xml version='1.0' encoding='utf-16'?>\n'''
2588 '''<site />'''.encode("utf-16"))
2589
Eli Bendersky426e2482012-07-17 05:45:11 +03002590 def test_tostringlist_invariant(self):
2591 root = ET.fromstring('<tag>foo</tag>')
2592 self.assertEqual(
2593 ET.tostring(root, 'unicode'),
2594 ''.join(ET.tostringlist(root, 'unicode')))
2595 self.assertEqual(
2596 ET.tostring(root, 'utf-16'),
2597 b''.join(ET.tostringlist(root, 'utf-16')))
2598
Eli Benderskya9a2ef52013-01-13 06:04:43 -08002599 def test_short_empty_elements(self):
2600 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
2601 self.assertEqual(
2602 ET.tostring(root, 'unicode'),
2603 '<tag>a<x />b<y />c</tag>')
2604 self.assertEqual(
2605 ET.tostring(root, 'unicode', short_empty_elements=True),
2606 '<tag>a<x />b<y />c</tag>')
2607 self.assertEqual(
2608 ET.tostring(root, 'unicode', short_empty_elements=False),
2609 '<tag>a<x></x>b<y></y>c</tag>')
2610
Eli Benderskyf996e772012-03-16 05:53:30 +02002611
Eli Bendersky5b77d812012-03-16 08:20:05 +02002612class ParseErrorTest(unittest.TestCase):
2613 def test_subclass(self):
2614 self.assertIsInstance(ET.ParseError(), SyntaxError)
2615
2616 def _get_error(self, s):
2617 try:
2618 ET.fromstring(s)
2619 except ET.ParseError as e:
2620 return e
2621
2622 def test_error_position(self):
2623 self.assertEqual(self._get_error('foo').position, (1, 0))
2624 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
2625 self.assertEqual(self._get_error('foobar<').position, (1, 6))
2626
2627 def test_error_code(self):
2628 import xml.parsers.expat.errors as ERRORS
2629 self.assertEqual(self._get_error('foo').code,
2630 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
2631
2632
Eli Bendersky737b1732012-05-29 06:02:56 +03002633class KeywordArgsTest(unittest.TestCase):
2634 # Test various issues with keyword arguments passed to ET.Element
2635 # constructor and methods
2636 def test_issue14818(self):
2637 x = ET.XML("<a>foo</a>")
2638 self.assertEqual(x.find('a', None),
2639 x.find(path='a', namespaces=None))
2640 self.assertEqual(x.findtext('a', None, None),
2641 x.findtext(path='a', default=None, namespaces=None))
2642 self.assertEqual(x.findall('a', None),
2643 x.findall(path='a', namespaces=None))
2644 self.assertEqual(list(x.iterfind('a', None)),
2645 list(x.iterfind(path='a', namespaces=None)))
2646
2647 self.assertEqual(ET.Element('a').attrib, {})
2648 elements = [
2649 ET.Element('a', dict(href="#", id="foo")),
2650 ET.Element('a', attrib=dict(href="#", id="foo")),
2651 ET.Element('a', dict(href="#"), id="foo"),
2652 ET.Element('a', href="#", id="foo"),
2653 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
2654 ]
2655 for e in elements:
2656 self.assertEqual(e.tag, 'a')
2657 self.assertEqual(e.attrib, dict(href="#", id="foo"))
2658
2659 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
2660 self.assertEqual(e2.attrib['key1'], 'value1')
2661
2662 with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
2663 ET.Element('a', "I'm not a dict")
2664 with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
2665 ET.Element('a', attrib="I'm not a dict")
2666
Eli Bendersky64d11e62012-06-15 07:42:50 +03002667# --------------------------------------------------------------------
2668
Eli Bendersky64d11e62012-06-15 07:42:50 +03002669class NoAcceleratorTest(unittest.TestCase):
Eli Bendersky52280c42012-12-30 06:27:56 -08002670 def setUp(self):
2671 if not pyET:
Eli Bendersky698bdb22013-01-10 06:01:06 -08002672 raise unittest.SkipTest('only for the Python version')
Eli Bendersky52280c42012-12-30 06:27:56 -08002673
Eli Bendersky64d11e62012-06-15 07:42:50 +03002674 # Test that the C accelerator was not imported for pyET
2675 def test_correct_import_pyET(self):
Eli Benderskye26fa1b2013-05-19 17:49:54 -07002676 # The type of methods defined in Python code is types.FunctionType,
2677 # while the type of methods defined inside _elementtree is
2678 # <class 'wrapper_descriptor'>
2679 self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
2680 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002681
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002682# --------------------------------------------------------------------
2683
2684
2685class CleanContext(object):
2686 """Provide default namespace mapping and path cache."""
2687 checkwarnings = None
2688
2689 def __init__(self, quiet=False):
2690 if sys.flags.optimize >= 2:
2691 # under -OO, doctests cannot be run and therefore not all warnings
2692 # will be emitted
2693 quiet = True
2694 deprecations = (
2695 # Search behaviour is broken if search path starts with "/".
2696 ("This search is broken in 1.3 and earlier, and will be fixed "
2697 "in a future version. If you rely on the current behaviour, "
2698 "change it to '.+'", FutureWarning),
2699 # Element.getchildren() and Element.getiterator() are deprecated.
2700 ("This method will be removed in future versions. "
2701 "Use .+ instead.", DeprecationWarning),
2702 ("This method will be removed in future versions. "
2703 "Use .+ instead.", PendingDeprecationWarning))
2704 self.checkwarnings = support.check_warnings(*deprecations, quiet=quiet)
2705
2706 def __enter__(self):
2707 from xml.etree import ElementPath
2708 self._nsmap = ET.register_namespace._namespace_map
2709 # Copy the default namespace mapping
2710 self._nsmap_copy = self._nsmap.copy()
2711 # Copy the path cache (should be empty)
2712 self._path_cache = ElementPath._cache
2713 ElementPath._cache = self._path_cache.copy()
2714 self.checkwarnings.__enter__()
2715
2716 def __exit__(self, *args):
2717 from xml.etree import ElementPath
2718 # Restore mapping and path cache
2719 self._nsmap.clear()
2720 self._nsmap.update(self._nsmap_copy)
2721 ElementPath._cache = self._path_cache
2722 self.checkwarnings.__exit__(*args)
2723
2724
Eli Bendersky64d11e62012-06-15 07:42:50 +03002725def test_main(module=None):
2726 # When invoked without a module, runs the Python ET tests by loading pyET.
2727 # Otherwise, uses the given module as the ET.
Eli Bendersky698bdb22013-01-10 06:01:06 -08002728 global pyET
2729 pyET = import_fresh_module('xml.etree.ElementTree',
2730 blocked=['_elementtree'])
Eli Bendersky64d11e62012-06-15 07:42:50 +03002731 if module is None:
Eli Bendersky64d11e62012-06-15 07:42:50 +03002732 module = pyET
Florent Xicluna41fe6152010-04-02 18:52:12 +00002733
Eli Bendersky64d11e62012-06-15 07:42:50 +03002734 global ET
2735 ET = module
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002736
Eli Bendersky865756a2012-03-09 13:38:15 +02002737 test_classes = [
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002738 ModuleTest,
Eli Bendersky865756a2012-03-09 13:38:15 +02002739 ElementSlicingTest,
Eli Bendersky396e8fc2012-03-23 14:24:20 +02002740 BasicElementTest,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002741 BadElementTest,
2742 BadElementPathTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002743 ElementTreeTest,
Eli Bendersky00f402b2012-07-15 06:02:22 +03002744 IOTest,
Eli Bendersky5b77d812012-03-16 08:20:05 +02002745 ParseErrorTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002746 XIncludeTest,
2747 ElementTreeTypeTest,
Eli Benderskyceab1a92013-01-12 07:42:46 -08002748 ElementFindTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03002749 ElementIterTest,
Eli Bendersky737b1732012-05-29 06:02:56 +03002750 TreeBuilderTest,
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03002751 XMLParserTest,
Eli Benderskyb5869342013-08-30 05:51:20 -07002752 XMLPullParserTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002753 BugsTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03002754 ]
2755
2756 # These tests will only run for the pure-Python version that doesn't import
2757 # _elementtree. We can't use skipUnless here, because pyET is filled in only
2758 # after the module is loaded.
Eli Bendersky698bdb22013-01-10 06:01:06 -08002759 if pyET is not ET:
Eli Bendersky64d11e62012-06-15 07:42:50 +03002760 test_classes.extend([
2761 NoAcceleratorTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03002762 ])
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002763
Eli Bendersky6319e0f2012-06-16 06:47:44 +03002764 try:
Eli Bendersky6319e0f2012-06-16 06:47:44 +03002765 # XXX the C module should give the same warnings as the Python module
Eli Bendersky698bdb22013-01-10 06:01:06 -08002766 with CleanContext(quiet=(pyET is not ET)):
Eli Benderskyceab1a92013-01-12 07:42:46 -08002767 support.run_unittest(*test_classes)
Eli Bendersky6319e0f2012-06-16 06:47:44 +03002768 finally:
2769 # don't interfere with subsequent tests
2770 ET = pyET = None
2771
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002772
Armin Rigo9ed73062005-12-14 18:10:45 +00002773if __name__ == '__main__':
2774 test_main()