blob: b2492cda848f9e0ead7261938cbbf90c949d048a [file] [log] [blame]
Eli Bendersky865756a2012-03-09 13:38:15 +02001# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2# to ensure consistency between the C implementation and the Python
3# implementation.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004#
5# For this purpose, the module-level "ET" symbol is temporarily
6# monkey-patched when running the "test_xml_etree_c" test suite.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00007
Serhiy Storchakad7a44152015-11-12 11:23:04 +02008import copy
Serhiy Storchaka762ec972017-03-30 18:12:06 +03009import functools
Georg Brandl1f7fffb2010-10-15 15:57:45 +000010import html
Eli Benderskyf996e772012-03-16 05:53:30 +020011import io
Gordon P. Hemsley50fed0b2019-04-28 00:41:43 -040012import itertools
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +020013import locale
Eli Bendersky698bdb22013-01-10 06:01:06 -080014import operator
Stefan Behnele1d5dd62019-05-01 22:34:13 +020015import os
Eli Bendersky7ec45f72012-12-30 06:17:49 -080016import pickle
Eli Bendersky0192ba32012-03-30 16:38:33 +030017import sys
Stefan Behneldde3eeb2019-05-01 21:49:58 +020018import textwrap
Eli Benderskye26fa1b2013-05-19 17:49:54 -070019import types
Victor Stinner6c6f8512010-08-07 10:09:35 +000020import unittest
Serhiy Storchaka05744ac2015-06-29 22:35:58 +030021import warnings
Eli Benderskya5e82202012-03-31 13:55:38 +030022import weakref
Armin Rigo9ed73062005-12-14 18:10:45 +000023
Stefan Behnele1d5dd62019-05-01 22:34:13 +020024from functools import partial
Stefan Behneldde3eeb2019-05-01 21:49:58 +020025from itertools import product, islice
Benjamin Petersonee8712c2008-05-20 21:35:26 +000026from test import support
Serhiy Storchaka9062c262016-06-12 09:43:55 +030027from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
Armin Rigo9ed73062005-12-14 18:10:45 +000028
Eli Bendersky698bdb22013-01-10 06:01:06 -080029# pyET is the pure-Python implementation.
Eli Bendersky458c0d52013-01-10 06:07:00 -080030#
Eli Bendersky698bdb22013-01-10 06:01:06 -080031# ET is pyET in test_xml_etree and is the C accelerated version in
32# test_xml_etree_c.
Eli Bendersky64d11e62012-06-15 07:42:50 +030033pyET = None
34ET = None
Florent Xiclunaf15351d2010-03-13 23:24:31 +000035
36SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
Victor Stinner6c6f8512010-08-07 10:09:35 +000037try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +000038 SIMPLE_XMLFILE.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +000039except UnicodeEncodeError:
40 raise unittest.SkipTest("filename is not encodable to utf8")
Florent Xiclunaf15351d2010-03-13 23:24:31 +000041SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
Victor Stinnere6d9fcb2017-09-25 01:27:34 -070042UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
Florent Xiclunaf15351d2010-03-13 23:24:31 +000043
44SAMPLE_XML = """\
Armin Rigo9ed73062005-12-14 18:10:45 +000045<body>
Florent Xiclunaf15351d2010-03-13 23:24:31 +000046 <tag class='a'>text</tag>
47 <tag class='b' />
Armin Rigo9ed73062005-12-14 18:10:45 +000048 <section>
Florent Xiclunaf15351d2010-03-13 23:24:31 +000049 <tag class='b' id='inner'>subtext</tag>
Armin Rigo9ed73062005-12-14 18:10:45 +000050 </section>
51</body>
52"""
53
Florent Xiclunaf15351d2010-03-13 23:24:31 +000054SAMPLE_SECTION = """\
55<section>
56 <tag class='b' id='inner'>subtext</tag>
57 <nexttag />
58 <nextsection>
59 <tag />
60 </nextsection>
61</section>
62"""
63
Armin Rigo9ed73062005-12-14 18:10:45 +000064SAMPLE_XML_NS = """
65<body xmlns="http://effbot.org/ns">
66 <tag>text</tag>
67 <tag />
68 <section>
69 <tag>subtext</tag>
70 </section>
71</body>
72"""
73
Eli Bendersky737b1732012-05-29 06:02:56 +030074SAMPLE_XML_NS_ELEMS = """
75<root>
76<h:table xmlns:h="hello">
77 <h:tr>
78 <h:td>Apples</h:td>
79 <h:td>Bananas</h:td>
80 </h:tr>
81</h:table>
82
83<f:table xmlns:f="foo">
84 <f:name>African Coffee Table</f:name>
85 <f:width>80</f:width>
86 <f:length>120</f:length>
87</f:table>
88</root>
89"""
Florent Xiclunaf15351d2010-03-13 23:24:31 +000090
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +020091ENTITY_XML = """\
92<!DOCTYPE points [
93<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
94%user-entities;
95]>
96<document>&entity;</document>
97"""
Armin Rigo9ed73062005-12-14 18:10:45 +000098
Christian Heimes17b1d5d2018-09-23 09:50:25 +020099EXTERNAL_ENTITY_XML = """\
100<!DOCTYPE points [
101<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
102]>
103<document>&entity;</document>
104"""
Armin Rigo9ed73062005-12-14 18:10:45 +0000105
Serhiy Storchaka762ec972017-03-30 18:12:06 +0300106def checkwarnings(*filters, quiet=False):
107 def decorator(test):
108 def newtest(*args, **kwargs):
109 with support.check_warnings(*filters, quiet=quiet):
110 test(*args, **kwargs)
111 functools.update_wrapper(newtest, test)
112 return newtest
113 return decorator
114
115
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200116class ModuleTest(unittest.TestCase):
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200117 def test_sanity(self):
118 # Import sanity.
119
120 from xml.etree import ElementTree
121 from xml.etree import ElementInclude
122 from xml.etree import ElementPath
123
Martin Panterdcfebb32016-04-01 06:55:55 +0000124 def test_all(self):
125 names = ("xml.etree.ElementTree", "_elementtree")
126 support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",))
127
Armin Rigo9ed73062005-12-14 18:10:45 +0000128
Florent Xiclunac17f1722010-08-08 19:48:29 +0000129def serialize(elem, to_string=True, encoding='unicode', **options):
Florent Xiclunac17f1722010-08-08 19:48:29 +0000130 if encoding != 'unicode':
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000131 file = io.BytesIO()
132 else:
133 file = io.StringIO()
Armin Rigo9ed73062005-12-14 18:10:45 +0000134 tree = ET.ElementTree(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000135 tree.write(file, encoding=encoding, **options)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000136 if to_string:
137 return file.getvalue()
138 else:
139 file.seek(0)
140 return file
Armin Rigo9ed73062005-12-14 18:10:45 +0000141
Armin Rigo9ed73062005-12-14 18:10:45 +0000142def summarize_list(seq):
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200143 return [elem.tag for elem in seq]
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000144
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000145
Eli Bendersky698bdb22013-01-10 06:01:06 -0800146class ElementTestCase:
147 @classmethod
148 def setUpClass(cls):
149 cls.modules = {pyET, ET}
150
Serhiy Storchakabad12572014-12-15 14:03:42 +0200151 def pickleRoundTrip(self, obj, name, dumper, loader, proto):
Eli Bendersky698bdb22013-01-10 06:01:06 -0800152 save_m = sys.modules[name]
153 try:
154 sys.modules[name] = dumper
Serhiy Storchakabad12572014-12-15 14:03:42 +0200155 temp = pickle.dumps(obj, proto)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800156 sys.modules[name] = loader
157 result = pickle.loads(temp)
158 except pickle.PicklingError as pe:
159 # pyET must be second, because pyET may be (equal to) ET.
160 human = dict([(ET, "cET"), (pyET, "pyET")])
161 raise support.TestFailed("Failed to round-trip %r from %r to %r"
162 % (obj,
163 human.get(dumper, dumper),
164 human.get(loader, loader))) from pe
165 finally:
166 sys.modules[name] = save_m
167 return result
168
169 def assertEqualElements(self, alice, bob):
170 self.assertIsInstance(alice, (ET.Element, pyET.Element))
171 self.assertIsInstance(bob, (ET.Element, pyET.Element))
172 self.assertEqual(len(list(alice)), len(list(bob)))
173 for x, y in zip(alice, bob):
174 self.assertEqualElements(x, y)
175 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
176 self.assertEqual(properties(alice), properties(bob))
177
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000178# --------------------------------------------------------------------
179# element tree tests
Armin Rigo9ed73062005-12-14 18:10:45 +0000180
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200181class ElementTreeTest(unittest.TestCase):
Armin Rigo9ed73062005-12-14 18:10:45 +0000182
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200183 def serialize_check(self, elem, expected):
184 self.assertEqual(serialize(elem), expected)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000185
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200186 def test_interface(self):
187 # Test element tree interface.
Armin Rigo9ed73062005-12-14 18:10:45 +0000188
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200189 def check_string(string):
190 len(string)
191 for char in string:
192 self.assertEqual(len(char), 1,
193 msg="expected one-character string, got %r" % char)
194 new_string = string + ""
195 new_string = string + " "
196 string[:0]
Armin Rigo9ed73062005-12-14 18:10:45 +0000197
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200198 def check_mapping(mapping):
199 len(mapping)
200 keys = mapping.keys()
201 items = mapping.items()
202 for key in keys:
203 item = mapping[key]
204 mapping["key"] = "value"
205 self.assertEqual(mapping["key"], "value",
206 msg="expected value string, got %r" % mapping["key"])
Armin Rigo9ed73062005-12-14 18:10:45 +0000207
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200208 def check_element(element):
209 self.assertTrue(ET.iselement(element), msg="not an element")
Serhiy Storchakadde08152015-11-25 15:28:13 +0200210 direlem = dir(element)
211 for attr in 'tag', 'attrib', 'text', 'tail':
212 self.assertTrue(hasattr(element, attr),
213 msg='no %s member' % attr)
214 self.assertIn(attr, direlem,
215 msg='no %s visible by dir' % attr)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000216
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200217 check_string(element.tag)
218 check_mapping(element.attrib)
219 if element.text is not None:
220 check_string(element.text)
221 if element.tail is not None:
222 check_string(element.tail)
223 for elem in element:
224 check_element(elem)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000225
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200226 element = ET.Element("tag")
227 check_element(element)
228 tree = ET.ElementTree(element)
229 check_element(tree.getroot())
230 element = ET.Element("t\xe4g", key="value")
231 tree = ET.ElementTree(element)
232 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
233 element = ET.Element("tag", key="value")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000234
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200235 # Make sure all standard element methods exist.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000236
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200237 def check_method(method):
238 self.assertTrue(hasattr(method, '__call__'),
239 msg="%s not callable" % method)
Armin Rigo9ed73062005-12-14 18:10:45 +0000240
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200241 check_method(element.append)
242 check_method(element.extend)
243 check_method(element.insert)
244 check_method(element.remove)
245 check_method(element.getchildren)
246 check_method(element.find)
247 check_method(element.iterfind)
248 check_method(element.findall)
249 check_method(element.findtext)
250 check_method(element.clear)
251 check_method(element.get)
252 check_method(element.set)
253 check_method(element.keys)
254 check_method(element.items)
255 check_method(element.iter)
256 check_method(element.itertext)
257 check_method(element.getiterator)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000258
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200259 # These methods return an iterable. See bug 6472.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000260
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200261 def check_iter(it):
262 check_method(it.__next__)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000263
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200264 check_iter(element.iterfind("tag"))
265 check_iter(element.iterfind("*"))
266 check_iter(tree.iterfind("tag"))
267 check_iter(tree.iterfind("*"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000268
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200269 # These aliases are provided:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000270
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200271 self.assertEqual(ET.XML, ET.fromstring)
272 self.assertEqual(ET.PI, ET.ProcessingInstruction)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000273
Serhiy Storchakab6aa5372015-11-23 08:42:25 +0200274 def test_set_attribute(self):
275 element = ET.Element('tag')
276
277 self.assertEqual(element.tag, 'tag')
278 element.tag = 'Tag'
279 self.assertEqual(element.tag, 'Tag')
280 element.tag = 'TAG'
281 self.assertEqual(element.tag, 'TAG')
282
283 self.assertIsNone(element.text)
284 element.text = 'Text'
285 self.assertEqual(element.text, 'Text')
286 element.text = 'TEXT'
287 self.assertEqual(element.text, 'TEXT')
288
289 self.assertIsNone(element.tail)
290 element.tail = 'Tail'
291 self.assertEqual(element.tail, 'Tail')
292 element.tail = 'TAIL'
293 self.assertEqual(element.tail, 'TAIL')
294
295 self.assertEqual(element.attrib, {})
296 element.attrib = {'a': 'b', 'c': 'd'}
297 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
298 element.attrib = {'A': 'B', 'C': 'D'}
299 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
300
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200301 def test_simpleops(self):
302 # Basic method sanity checks.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000303
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200304 elem = ET.XML("<body><tag/></body>")
305 self.serialize_check(elem, '<body><tag /></body>')
306 e = ET.Element("tag2")
307 elem.append(e)
308 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
309 elem.remove(e)
310 self.serialize_check(elem, '<body><tag /></body>')
311 elem.insert(0, e)
312 self.serialize_check(elem, '<body><tag2 /><tag /></body>')
313 elem.remove(e)
314 elem.extend([e])
315 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
316 elem.remove(e)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000317
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200318 element = ET.Element("tag", key="value")
319 self.serialize_check(element, '<tag key="value" />') # 1
320 subelement = ET.Element("subtag")
321 element.append(subelement)
322 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
323 element.insert(0, subelement)
324 self.serialize_check(element,
325 '<tag key="value"><subtag /><subtag /></tag>') # 3
326 element.remove(subelement)
327 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
328 element.remove(subelement)
329 self.serialize_check(element, '<tag key="value" />') # 5
330 with self.assertRaises(ValueError) as cm:
331 element.remove(subelement)
332 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
333 self.serialize_check(element, '<tag key="value" />') # 6
334 element[0:0] = [subelement, subelement, subelement]
335 self.serialize_check(element[1], '<subtag />')
336 self.assertEqual(element[1:9], [element[1], element[2]])
337 self.assertEqual(element[:9:2], [element[0], element[2]])
338 del element[1:2]
339 self.serialize_check(element,
340 '<tag key="value"><subtag /><subtag /></tag>')
Florent Xiclunaa72a98f2012-02-13 11:03:30 +0100341
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200342 def test_cdata(self):
343 # Test CDATA handling (etc).
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000344
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200345 self.serialize_check(ET.XML("<tag>hello</tag>"),
346 '<tag>hello</tag>')
347 self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
348 '<tag>hello</tag>')
349 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
350 '<tag>hello</tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000351
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200352 def test_file_init(self):
353 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
354 tree = ET.ElementTree(file=stringfile)
355 self.assertEqual(tree.find("tag").tag, 'tag')
356 self.assertEqual(tree.find("section/tag").tag, 'tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000357
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200358 tree = ET.ElementTree(file=SIMPLE_XMLFILE)
359 self.assertEqual(tree.find("element").tag, 'element')
360 self.assertEqual(tree.find("element/../empty-element").tag,
361 'empty-element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000362
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200363 def test_path_cache(self):
364 # Check that the path cache behaves sanely.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000365
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200366 from xml.etree import ElementPath
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000367
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200368 elem = ET.XML(SAMPLE_XML)
369 for i in range(10): ET.ElementTree(elem).find('./'+str(i))
370 cache_len_10 = len(ElementPath._cache)
371 for i in range(10): ET.ElementTree(elem).find('./'+str(i))
372 self.assertEqual(len(ElementPath._cache), cache_len_10)
373 for i in range(20): ET.ElementTree(elem).find('./'+str(i))
374 self.assertGreater(len(ElementPath._cache), cache_len_10)
375 for i in range(600): ET.ElementTree(elem).find('./'+str(i))
376 self.assertLess(len(ElementPath._cache), 500)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000377
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200378 def test_copy(self):
379 # Test copy handling (etc).
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000380
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200381 import copy
382 e1 = ET.XML("<tag>hello<foo/></tag>")
383 e2 = copy.copy(e1)
384 e3 = copy.deepcopy(e1)
385 e1.find("foo").tag = "bar"
386 self.serialize_check(e1, '<tag>hello<bar /></tag>')
387 self.serialize_check(e2, '<tag>hello<bar /></tag>')
388 self.serialize_check(e3, '<tag>hello<foo /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000389
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200390 def test_attrib(self):
391 # Test attribute handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000392
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200393 elem = ET.Element("tag")
394 elem.get("key") # 1.1
395 self.assertEqual(elem.get("key", "default"), 'default') # 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000396
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200397 elem.set("key", "value")
398 self.assertEqual(elem.get("key"), 'value') # 1.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000399
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200400 elem = ET.Element("tag", key="value")
401 self.assertEqual(elem.get("key"), 'value') # 2.1
402 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000403
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200404 attrib = {"key": "value"}
405 elem = ET.Element("tag", attrib)
406 attrib.clear() # check for aliasing issues
407 self.assertEqual(elem.get("key"), 'value') # 3.1
408 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000409
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200410 attrib = {"key": "value"}
411 elem = ET.Element("tag", **attrib)
412 attrib.clear() # check for aliasing issues
413 self.assertEqual(elem.get("key"), 'value') # 4.1
414 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000415
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200416 elem = ET.Element("tag", {"key": "other"}, key="value")
417 self.assertEqual(elem.get("key"), 'value') # 5.1
418 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000419
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200420 elem = ET.Element('test')
421 elem.text = "aa"
422 elem.set('testa', 'testval')
423 elem.set('testb', 'test2')
424 self.assertEqual(ET.tostring(elem),
425 b'<test testa="testval" testb="test2">aa</test>')
426 self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
427 self.assertEqual(sorted(elem.items()),
428 [('testa', 'testval'), ('testb', 'test2')])
429 self.assertEqual(elem.attrib['testb'], 'test2')
430 elem.attrib['testb'] = 'test1'
431 elem.attrib['testc'] = 'test2'
432 self.assertEqual(ET.tostring(elem),
433 b'<test testa="testval" testb="test1" testc="test2">aa</test>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000434
Raymond Hettinger076366c2016-09-11 23:18:03 -0700435 elem = ET.Element('test')
436 elem.set('a', '\r')
437 elem.set('b', '\r\n')
438 elem.set('c', '\t\n\r ')
439 elem.set('d', '\n\n')
440 self.assertEqual(ET.tostring(elem),
441 b'<test a="&#10;" b="&#10;" c="&#09;&#10;&#10; " d="&#10;&#10;" />')
442
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200443 def test_makeelement(self):
444 # Test makeelement handling.
Antoine Pitroub86680e2010-10-14 21:15:17 +0000445
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200446 elem = ET.Element("tag")
447 attrib = {"key": "value"}
448 subelem = elem.makeelement("subtag", attrib)
449 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
450 elem.append(subelem)
451 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000452
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200453 elem.clear()
454 self.serialize_check(elem, '<tag />')
455 elem.append(subelem)
456 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
457 elem.extend([subelem, subelem])
458 self.serialize_check(elem,
459 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
460 elem[:] = [subelem]
461 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
462 elem[:] = tuple([subelem])
463 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000464
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200465 def test_parsefile(self):
466 # Test parsing from file.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000467
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200468 tree = ET.parse(SIMPLE_XMLFILE)
469 stream = io.StringIO()
470 tree.write(stream, encoding='unicode')
471 self.assertEqual(stream.getvalue(),
472 '<root>\n'
473 ' <element key="value">text</element>\n'
474 ' <element>text</element>tail\n'
475 ' <empty-element />\n'
476 '</root>')
477 tree = ET.parse(SIMPLE_NS_XMLFILE)
478 stream = io.StringIO()
479 tree.write(stream, encoding='unicode')
480 self.assertEqual(stream.getvalue(),
481 '<ns0:root xmlns:ns0="namespace">\n'
482 ' <ns0:element key="value">text</ns0:element>\n'
483 ' <ns0:element>text</ns0:element>tail\n'
484 ' <ns0:empty-element />\n'
485 '</ns0:root>')
Armin Rigo9ed73062005-12-14 18:10:45 +0000486
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200487 with open(SIMPLE_XMLFILE) as f:
488 data = f.read()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000489
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200490 parser = ET.XMLParser()
491 self.assertRegex(parser.version, r'^Expat ')
492 parser.feed(data)
493 self.serialize_check(parser.close(),
494 '<root>\n'
495 ' <element key="value">text</element>\n'
496 ' <element>text</element>tail\n'
497 ' <empty-element />\n'
498 '</root>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000499
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200500 target = ET.TreeBuilder()
501 parser = ET.XMLParser(target=target)
502 parser.feed(data)
503 self.serialize_check(parser.close(),
504 '<root>\n'
505 ' <element key="value">text</element>\n'
506 ' <element>text</element>tail\n'
507 ' <empty-element />\n'
508 '</root>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000509
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200510 def test_parseliteral(self):
511 element = ET.XML("<html><body>text</body></html>")
512 self.assertEqual(ET.tostring(element, encoding='unicode'),
513 '<html><body>text</body></html>')
514 element = ET.fromstring("<html><body>text</body></html>")
515 self.assertEqual(ET.tostring(element, encoding='unicode'),
516 '<html><body>text</body></html>')
517 sequence = ["<html><body>", "text</bo", "dy></html>"]
518 element = ET.fromstringlist(sequence)
519 self.assertEqual(ET.tostring(element),
520 b'<html><body>text</body></html>')
521 self.assertEqual(b"".join(ET.tostringlist(element)),
522 b'<html><body>text</body></html>')
523 self.assertEqual(ET.tostring(element, "ascii"),
524 b"<?xml version='1.0' encoding='ascii'?>\n"
525 b"<html><body>text</body></html>")
526 _, ids = ET.XMLID("<html><body>text</body></html>")
527 self.assertEqual(len(ids), 0)
528 _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
529 self.assertEqual(len(ids), 1)
530 self.assertEqual(ids["body"].tag, 'body')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000531
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200532 def test_iterparse(self):
533 # Test iterparse interface.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000534
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200535 iterparse = ET.iterparse
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000536
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200537 context = iterparse(SIMPLE_XMLFILE)
538 action, elem = next(context)
539 self.assertEqual((action, elem.tag), ('end', 'element'))
540 self.assertEqual([(action, elem.tag) for action, elem in context], [
541 ('end', 'element'),
542 ('end', 'empty-element'),
543 ('end', 'root'),
544 ])
545 self.assertEqual(context.root.tag, 'root')
546
547 context = iterparse(SIMPLE_NS_XMLFILE)
548 self.assertEqual([(action, elem.tag) for action, elem in context], [
549 ('end', '{namespace}element'),
550 ('end', '{namespace}element'),
551 ('end', '{namespace}empty-element'),
552 ('end', '{namespace}root'),
553 ])
554
555 events = ()
556 context = iterparse(SIMPLE_XMLFILE, events)
557 self.assertEqual([(action, elem.tag) for action, elem in context], [])
558
559 events = ()
560 context = iterparse(SIMPLE_XMLFILE, events=events)
561 self.assertEqual([(action, elem.tag) for action, elem in context], [])
562
563 events = ("start", "end")
564 context = iterparse(SIMPLE_XMLFILE, events)
565 self.assertEqual([(action, elem.tag) for action, elem in context], [
566 ('start', 'root'),
567 ('start', 'element'),
568 ('end', 'element'),
569 ('start', 'element'),
570 ('end', 'element'),
571 ('start', 'empty-element'),
572 ('end', 'empty-element'),
573 ('end', 'root'),
574 ])
575
576 events = ("start", "end", "start-ns", "end-ns")
577 context = iterparse(SIMPLE_NS_XMLFILE, events)
Eli Bendersky23687042013-02-26 05:53:23 -0800578 self.assertEqual([(action, elem.tag) if action in ("start", "end")
579 else (action, elem)
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200580 for action, elem in context], [
581 ('start-ns', ('', 'namespace')),
582 ('start', '{namespace}root'),
583 ('start', '{namespace}element'),
584 ('end', '{namespace}element'),
585 ('start', '{namespace}element'),
586 ('end', '{namespace}element'),
587 ('start', '{namespace}empty-element'),
588 ('end', '{namespace}empty-element'),
589 ('end', '{namespace}root'),
590 ('end-ns', None),
591 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000592
Eli Bendersky5dd40e52013-11-28 06:31:58 -0800593 events = ('start-ns', 'end-ns')
594 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
595 res = [action for action, elem in context]
596 self.assertEqual(res, ['start-ns', 'end-ns'])
597
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200598 events = ("start", "end", "bogus")
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200599 with open(SIMPLE_XMLFILE, "rb") as f:
600 with self.assertRaises(ValueError) as cm:
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200601 iterparse(f, events)
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200602 self.assertFalse(f.closed)
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200603 self.assertEqual(str(cm.exception), "unknown event 'bogus'")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000604
Serhiy Storchaka94a619d2016-02-11 13:11:44 +0200605 with support.check_no_resource_warning(self):
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200606 with self.assertRaises(ValueError) as cm:
607 iterparse(SIMPLE_XMLFILE, events)
608 self.assertEqual(str(cm.exception), "unknown event 'bogus'")
609 del cm
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200610
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200611 source = io.BytesIO(
612 b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
613 b"<body xmlns='http://&#233;ffbot.org/ns'\n"
614 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
615 events = ("start-ns",)
616 context = iterparse(source, events)
617 self.assertEqual([(action, elem) for action, elem in context], [
618 ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
619 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
620 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000621
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200622 source = io.StringIO("<document />junk")
623 it = iterparse(source)
624 action, elem = next(it)
625 self.assertEqual((action, elem.tag), ('end', 'document'))
626 with self.assertRaises(ET.ParseError) as cm:
627 next(it)
628 self.assertEqual(str(cm.exception),
629 'junk after document element: line 1, column 12')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000630
Serhiy Storchaka13f51d92018-06-03 20:56:52 +0300631 self.addCleanup(support.unlink, TESTFN)
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200632 with open(TESTFN, "wb") as f:
633 f.write(b"<document />junk")
634 it = iterparse(TESTFN)
635 action, elem = next(it)
636 self.assertEqual((action, elem.tag), ('end', 'document'))
Serhiy Storchaka94a619d2016-02-11 13:11:44 +0200637 with support.check_no_resource_warning(self):
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200638 with self.assertRaises(ET.ParseError) as cm:
639 next(it)
640 self.assertEqual(str(cm.exception),
641 'junk after document element: line 1, column 12')
642 del cm, it
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200643
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200644 def test_writefile(self):
645 elem = ET.Element("tag")
646 elem.text = "text"
647 self.serialize_check(elem, '<tag>text</tag>')
648 ET.SubElement(elem, "subtag").text = "subtext"
649 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000650
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200651 # Test tag suppression
652 elem.tag = None
653 self.serialize_check(elem, 'text<subtag>subtext</subtag>')
654 elem.insert(0, ET.Comment("comment"))
655 self.serialize_check(elem,
656 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000657
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200658 elem[0] = ET.PI("key", "value")
659 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000660
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200661 def test_custom_builder(self):
662 # Test parser w. custom builder.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000663
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200664 with open(SIMPLE_XMLFILE) as f:
665 data = f.read()
666 class Builder(list):
667 def start(self, tag, attrib):
668 self.append(("start", tag))
669 def end(self, tag):
670 self.append(("end", tag))
671 def data(self, text):
672 pass
673 builder = Builder()
674 parser = ET.XMLParser(target=builder)
675 parser.feed(data)
676 self.assertEqual(builder, [
677 ('start', 'root'),
678 ('start', 'element'),
679 ('end', 'element'),
680 ('start', 'element'),
681 ('end', 'element'),
682 ('start', 'empty-element'),
683 ('end', 'empty-element'),
684 ('end', 'root'),
685 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000686
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200687 with open(SIMPLE_NS_XMLFILE) as f:
688 data = f.read()
689 class Builder(list):
690 def start(self, tag, attrib):
691 self.append(("start", tag))
692 def end(self, tag):
693 self.append(("end", tag))
694 def data(self, text):
695 pass
696 def pi(self, target, data):
697 self.append(("pi", target, data))
698 def comment(self, data):
699 self.append(("comment", data))
Stefan Behneldde3eeb2019-05-01 21:49:58 +0200700 def start_ns(self, prefix, uri):
701 self.append(("start-ns", prefix, uri))
702 def end_ns(self, prefix):
703 self.append(("end-ns", prefix))
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200704 builder = Builder()
705 parser = ET.XMLParser(target=builder)
706 parser.feed(data)
707 self.assertEqual(builder, [
708 ('pi', 'pi', 'data'),
709 ('comment', ' comment '),
Stefan Behneldde3eeb2019-05-01 21:49:58 +0200710 ('start-ns', '', 'namespace'),
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200711 ('start', '{namespace}root'),
712 ('start', '{namespace}element'),
713 ('end', '{namespace}element'),
714 ('start', '{namespace}element'),
715 ('end', '{namespace}element'),
716 ('start', '{namespace}empty-element'),
717 ('end', '{namespace}empty-element'),
718 ('end', '{namespace}root'),
Stefan Behneldde3eeb2019-05-01 21:49:58 +0200719 ('end-ns', ''),
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200720 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000721
Stefan Behneldde3eeb2019-05-01 21:49:58 +0200722 def test_custom_builder_only_end_ns(self):
723 class Builder(list):
724 def end_ns(self, prefix):
725 self.append(("end-ns", prefix))
726
727 builder = Builder()
728 parser = ET.XMLParser(target=builder)
729 parser.feed(textwrap.dedent("""\
730 <?pi data?>
731 <!-- comment -->
732 <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
733 <a:element key='value'>text</a:element>
734 <p:element>text</p:element>tail
735 <empty-element/>
736 </root>
737 """))
738 self.assertEqual(builder, [
739 ('end-ns', 'a'),
740 ('end-ns', 'p'),
741 ('end-ns', ''),
742 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000743
Serhiy Storchaka762ec972017-03-30 18:12:06 +0300744 # Element.getchildren() and ElementTree.getiterator() are deprecated.
745 @checkwarnings(("This method will be removed in future versions. "
746 "Use .+ instead.",
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +0300747 DeprecationWarning))
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200748 def test_getchildren(self):
749 # Test Element.getchildren()
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000750
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200751 with open(SIMPLE_XMLFILE, "rb") as f:
752 tree = ET.parse(f)
753 self.assertEqual([summarize_list(elem.getchildren())
754 for elem in tree.getroot().iter()], [
755 ['element', 'element', 'empty-element'],
756 [],
757 [],
758 [],
759 ])
760 self.assertEqual([summarize_list(elem.getchildren())
761 for elem in tree.getiterator()], [
762 ['element', 'element', 'empty-element'],
763 [],
764 [],
765 [],
766 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000767
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200768 elem = ET.XML(SAMPLE_XML)
769 self.assertEqual(len(elem.getchildren()), 3)
770 self.assertEqual(len(elem[2].getchildren()), 1)
771 self.assertEqual(elem[:], elem.getchildren())
772 child1 = elem[0]
773 child2 = elem[2]
774 del elem[1:2]
775 self.assertEqual(len(elem.getchildren()), 2)
776 self.assertEqual(child1, elem[0])
777 self.assertEqual(child2, elem[1])
778 elem[0:2] = [child2, child1]
779 self.assertEqual(child2, elem[0])
780 self.assertEqual(child1, elem[1])
781 self.assertNotEqual(child1, elem[0])
782 elem.clear()
783 self.assertEqual(elem.getchildren(), [])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000784
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200785 def test_writestring(self):
786 elem = ET.XML("<html><body>text</body></html>")
787 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
788 elem = ET.fromstring("<html><body>text</body></html>")
789 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000790
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +0200791 def test_tostring_default_namespace(self):
792 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
793 self.assertEqual(
794 ET.tostring(elem, encoding='unicode'),
795 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
796 )
797 self.assertEqual(
798 ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'),
799 '<body xmlns="http://effbot.org/ns"><tag /></body>'
800 )
801
802 def test_tostring_default_namespace_different_namespace(self):
803 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
804 self.assertEqual(
805 ET.tostring(elem, encoding='unicode', default_namespace='foobar'),
806 '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>'
807 )
808
809 def test_tostring_default_namespace_original_no_namespace(self):
810 elem = ET.XML('<body><tag/></body>')
811 EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$'
812 with self.assertRaisesRegex(ValueError, EXPECTED_MSG):
813 ET.tostring(elem, encoding='unicode', default_namespace='foobar')
814
815 def test_tostring_no_xml_declaration(self):
816 elem = ET.XML('<body><tag/></body>')
817 self.assertEqual(
818 ET.tostring(elem, encoding='unicode'),
819 '<body><tag /></body>'
820 )
821
822 def test_tostring_xml_declaration(self):
823 elem = ET.XML('<body><tag/></body>')
824 self.assertEqual(
825 ET.tostring(elem, encoding='utf8', xml_declaration=True),
826 b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>"
827 )
828
829 def test_tostring_xml_declaration_unicode_encoding(self):
830 elem = ET.XML('<body><tag/></body>')
831 preferredencoding = locale.getpreferredencoding()
832 self.assertEqual(
833 f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>",
834 ET.tostring(elem, encoding='unicode', xml_declaration=True)
835 )
836
837 def test_tostring_xml_declaration_cases(self):
838 elem = ET.XML('<body><tag>ø</tag></body>')
839 preferredencoding = locale.getpreferredencoding()
840 TESTCASES = [
841 # (expected_retval, encoding, xml_declaration)
842 # ... xml_declaration = None
843 (b'<body><tag>&#248;</tag></body>', None, None),
844 (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None),
845 (b'<body><tag>&#248;</tag></body>', 'US-ASCII', None),
846 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
847 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None),
848 ('<body><tag>ø</tag></body>', 'unicode', None),
849
850 # ... xml_declaration = False
851 (b"<body><tag>&#248;</tag></body>", None, False),
852 (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False),
853 (b"<body><tag>&#248;</tag></body>", 'US-ASCII', False),
854 (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False),
855 ("<body><tag>ø</tag></body>", 'unicode', False),
856
857 # ... xml_declaration = True
858 (b"<?xml version='1.0' encoding='us-ascii'?>\n"
859 b"<body><tag>&#248;</tag></body>", None, True),
860 (b"<?xml version='1.0' encoding='UTF-8'?>\n"
861 b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True),
862 (b"<?xml version='1.0' encoding='US-ASCII'?>\n"
863 b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
864 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
865 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
866 (f"<?xml version='1.0' encoding='{preferredencoding}'?>\n"
867 "<body><tag>ø</tag></body>", 'unicode', True),
868
869 ]
870 for expected_retval, encoding, xml_declaration in TESTCASES:
871 with self.subTest(f'encoding={encoding} '
872 f'xml_declaration={xml_declaration}'):
873 self.assertEqual(
874 ET.tostring(
875 elem,
876 encoding=encoding,
877 xml_declaration=xml_declaration
878 ),
879 expected_retval
880 )
881
882 def test_tostringlist_default_namespace(self):
883 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
884 self.assertEqual(
885 ''.join(ET.tostringlist(elem, encoding='unicode')),
886 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
887 )
888 self.assertEqual(
889 ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')),
890 '<body xmlns="http://effbot.org/ns"><tag /></body>'
891 )
892
893 def test_tostringlist_xml_declaration(self):
894 elem = ET.XML('<body><tag/></body>')
895 self.assertEqual(
896 ''.join(ET.tostringlist(elem, encoding='unicode')),
897 '<body><tag /></body>'
898 )
899 self.assertEqual(
900 b''.join(ET.tostringlist(elem, xml_declaration=True)),
901 b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
902 )
903
904 preferredencoding = locale.getpreferredencoding()
905 stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
906 self.assertEqual(
907 ''.join(stringlist),
908 f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>"
909 )
910 self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
911 self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
912
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +0300913 def test_encoding(self):
914 def check(encoding, body=''):
915 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
916 (encoding, body))
917 self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
918 self.assertEqual(ET.XML(xml).text, body)
919 check("ascii", 'a')
920 check("us-ascii", 'a')
921 check("iso-8859-1", '\xbd')
922 check("iso-8859-15", '\u20ac')
923 check("cp437", '\u221a')
924 check("mac-roman", '\u02da')
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000925
Eli Bendersky6dc32b32013-05-25 05:25:48 -0700926 def xml(encoding):
927 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
928 def bxml(encoding):
929 return xml(encoding).encode(encoding)
930 supported_encodings = [
931 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
932 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
933 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
934 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
935 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
936 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
Serhiy Storchakabe0c3252013-11-23 18:52:23 +0200937 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
938 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
939 'cp1256', 'cp1257', 'cp1258',
Eli Bendersky6dc32b32013-05-25 05:25:48 -0700940 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
941 'mac-roman', 'mac-turkish',
942 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
943 'iso2022-jp-3', 'iso2022-jp-ext',
Serhiy Storchakaf0eeedf2015-05-12 23:24:19 +0300944 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
Eli Bendersky6dc32b32013-05-25 05:25:48 -0700945 'hz', 'ptcp154',
946 ]
947 for encoding in supported_encodings:
948 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
949
950 unsupported_ascii_compatible_encodings = [
951 'big5', 'big5hkscs',
952 'cp932', 'cp949', 'cp950',
953 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
954 'gb2312', 'gbk', 'gb18030',
955 'iso2022-kr', 'johab',
956 'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
957 'utf-7',
958 ]
959 for encoding in unsupported_ascii_compatible_encodings:
960 self.assertRaises(ValueError, ET.XML, bxml(encoding))
961
962 unsupported_ascii_incompatible_encodings = [
963 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
964 'utf_32', 'utf_32_be', 'utf_32_le',
965 ]
966 for encoding in unsupported_ascii_incompatible_encodings:
967 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
968
969 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
970 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
971
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200972 def test_methods(self):
973 # Test serialization methods.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000974
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200975 e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
976 e.tail = "\n"
977 self.assertEqual(serialize(e),
978 '<html><link /><script>1 &lt; 2</script></html>\n')
979 self.assertEqual(serialize(e, method=None),
980 '<html><link /><script>1 &lt; 2</script></html>\n')
981 self.assertEqual(serialize(e, method="xml"),
982 '<html><link /><script>1 &lt; 2</script></html>\n')
983 self.assertEqual(serialize(e, method="html"),
984 '<html><link><script>1 < 2</script></html>\n')
985 self.assertEqual(serialize(e, method="text"), '1 < 2\n')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000986
Christian Heimes54ad7e32013-07-05 01:39:49 +0200987 def test_issue18347(self):
988 e = ET.XML('<html><CamelCase>text</CamelCase></html>')
989 self.assertEqual(serialize(e),
990 '<html><CamelCase>text</CamelCase></html>')
991 self.assertEqual(serialize(e, method="html"),
992 '<html><CamelCase>text</CamelCase></html>')
993
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200994 def test_entity(self):
995 # Test entity handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000996
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200997 # 1) good entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000998
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200999 e = ET.XML("<document title='&#x8230;'>test</document>")
1000 self.assertEqual(serialize(e, encoding="us-ascii"),
1001 b'<document title="&#33328;">test</document>')
1002 self.serialize_check(e, '<document title="\u8230">test</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001003
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001004 # 2) bad entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001005
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001006 with self.assertRaises(ET.ParseError) as cm:
1007 ET.XML("<document>&entity;</document>")
1008 self.assertEqual(str(cm.exception),
1009 'undefined entity: line 1, column 10')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001010
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001011 with self.assertRaises(ET.ParseError) as cm:
1012 ET.XML(ENTITY_XML)
1013 self.assertEqual(str(cm.exception),
1014 'undefined entity &entity;: line 5, column 10')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001015
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001016 # 3) custom entity
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001017
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001018 parser = ET.XMLParser()
1019 parser.entity["entity"] = "text"
1020 parser.feed(ENTITY_XML)
1021 root = parser.close()
1022 self.serialize_check(root, '<document>text</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001023
Christian Heimes17b1d5d2018-09-23 09:50:25 +02001024 # 4) external (SYSTEM) entity
1025
1026 with self.assertRaises(ET.ParseError) as cm:
1027 ET.XML(EXTERNAL_ENTITY_XML)
1028 self.assertEqual(str(cm.exception),
1029 'undefined entity &entity;: line 4, column 10')
1030
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001031 def test_namespace(self):
1032 # Test namespace issues.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001033
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001034 # 1) xml namespace
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001035
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001036 elem = ET.XML("<tag xml:lang='en' />")
1037 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001038
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001039 # 2) other "well-known" namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001040
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001041 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1042 self.serialize_check(elem,
1043 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001044
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001045 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1046 self.serialize_check(elem,
1047 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001048
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001049 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1050 self.serialize_check(elem,
1051 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001052
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001053 # 3) unknown namespaces
1054 elem = ET.XML(SAMPLE_XML_NS)
1055 self.serialize_check(elem,
1056 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
1057 ' <ns0:tag>text</ns0:tag>\n'
1058 ' <ns0:tag />\n'
1059 ' <ns0:section>\n'
1060 ' <ns0:tag>subtext</ns0:tag>\n'
1061 ' </ns0:section>\n'
1062 '</ns0:body>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001063
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001064 def test_qname(self):
1065 # Test QName handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001066
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001067 # 1) decorated tags
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001068
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001069 elem = ET.Element("{uri}tag")
1070 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
1071 elem = ET.Element(ET.QName("{uri}tag"))
1072 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
1073 elem = ET.Element(ET.QName("uri", "tag"))
1074 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
1075 elem = ET.Element(ET.QName("uri", "tag"))
1076 subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1077 subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1078 self.serialize_check(elem,
1079 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001080
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001081 # 2) decorated attributes
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001082
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001083 elem.clear()
1084 elem.attrib["{uri}key"] = "value"
1085 self.serialize_check(elem,
1086 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001087
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001088 elem.clear()
1089 elem.attrib[ET.QName("{uri}key")] = "value"
1090 self.serialize_check(elem,
1091 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001092
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001093 # 3) decorated values are not converted by default, but the
1094 # QName wrapper can be used for values
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001096 elem.clear()
1097 elem.attrib["{uri}key"] = "{uri}value"
1098 self.serialize_check(elem,
1099 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001100
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001101 elem.clear()
1102 elem.attrib["{uri}key"] = ET.QName("{uri}value")
1103 self.serialize_check(elem,
1104 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001105
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001106 elem.clear()
1107 subelem = ET.Element("tag")
1108 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1109 elem.append(subelem)
1110 elem.append(subelem)
1111 self.serialize_check(elem,
1112 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
1113 '<tag ns1:key="ns2:value" />'
1114 '<tag ns1:key="ns2:value" />'
1115 '</ns0:tag>') # 3.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001116
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001117 # 4) Direct QName tests
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001118
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001119 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
1120 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
1121 q1 = ET.QName('ns', 'tag')
1122 q2 = ET.QName('ns', 'tag')
1123 self.assertEqual(q1, q2)
1124 q2 = ET.QName('ns', 'other-tag')
1125 self.assertNotEqual(q1, q2)
1126 self.assertNotEqual(q1, 'ns:tag')
1127 self.assertEqual(q1, '{ns}tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001129 def test_doctype_public(self):
1130 # Test PUBLIC doctype.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001131
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001132 elem = ET.XML('<!DOCTYPE html PUBLIC'
1133 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1134 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1135 '<html>text</html>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001136
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001137 def test_xpath_tokenizer(self):
1138 # Test the XPath tokenizer.
1139 from xml.etree import ElementPath
Stefan Behnel47541682019-05-03 20:58:16 +02001140 def check(p, expected, namespaces=None):
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001141 self.assertEqual([op or tag
Stefan Behnel47541682019-05-03 20:58:16 +02001142 for op, tag in ElementPath.xpath_tokenizer(p, namespaces)],
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001143 expected)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001144
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001145 # tests from the xml specification
1146 check("*", ['*'])
1147 check("text()", ['text', '()'])
1148 check("@name", ['@', 'name'])
1149 check("@*", ['@', '*'])
1150 check("para[1]", ['para', '[', '1', ']'])
1151 check("para[last()]", ['para', '[', 'last', '()', ']'])
1152 check("*/para", ['*', '/', 'para'])
1153 check("/doc/chapter[5]/section[2]",
1154 ['/', 'doc', '/', 'chapter', '[', '5', ']',
1155 '/', 'section', '[', '2', ']'])
1156 check("chapter//para", ['chapter', '//', 'para'])
1157 check("//para", ['//', 'para'])
1158 check("//olist/item", ['//', 'olist', '/', 'item'])
1159 check(".", ['.'])
1160 check(".//para", ['.', '//', 'para'])
1161 check("..", ['..'])
1162 check("../@lang", ['..', '/', '@', 'lang'])
1163 check("chapter[title]", ['chapter', '[', 'title', ']'])
1164 check("employee[@secretary and @assistant]", ['employee',
1165 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001166
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001167 # additional tests
Stefan Behnel88db8bd2019-05-09 07:22:47 +02001168 check("@{ns}attr", ['@', '{ns}attr'])
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001169 check("{http://spam}egg", ['{http://spam}egg'])
1170 check("./spam.egg", ['.', '/', 'spam.egg'])
1171 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
Stefan Behnel88db8bd2019-05-09 07:22:47 +02001172
1173 # wildcard tags
1174 check("{ns}*", ['{ns}*'])
1175 check("{}*", ['{}*'])
1176 check("{*}tag", ['{*}tag'])
1177 check("{*}*", ['{*}*'])
1178 check(".//{*}tag", ['.', '//', '{*}tag'])
1179
1180 # namespace prefix resolution
Stefan Behnel47541682019-05-03 20:58:16 +02001181 check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
1182 {'xsd': 'http://www.w3.org/2001/XMLSchema'})
Stefan Behnel88db8bd2019-05-09 07:22:47 +02001183 check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
1184 {'': 'http://www.w3.org/2001/XMLSchema'})
1185 check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
1186 {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1187 check("@type", ['@', 'type'],
1188 {'': 'http://www.w3.org/2001/XMLSchema'})
1189 check("@{*}type", ['@', '{*}type'],
1190 {'': 'http://www.w3.org/2001/XMLSchema'})
1191 check("@{ns}attr", ['@', '{ns}attr'],
1192 {'': 'http://www.w3.org/2001/XMLSchema',
1193 'ns': 'http://www.w3.org/2001/XMLSchema'})
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001194
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001195 def test_processinginstruction(self):
1196 # Test ProcessingInstruction directly
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001198 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
1199 b'<?test instruction?>')
1200 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
1201 b'<?test instruction?>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001202
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001203 # Issue #2746
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001204
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001205 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
1206 b'<?test <testing&>?>')
1207 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
1208 b"<?xml version='1.0' encoding='latin-1'?>\n"
1209 b"<?test <testing&>\xe3?>")
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001210
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001211 def test_html_empty_elems_serialization(self):
1212 # issue 15970
1213 # from http://www.w3.org/TR/html401/index/elements.html
1214 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
1215 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
1216 for elem in [element, element.lower()]:
1217 expected = '<%s>' % elem
1218 serialized = serialize(ET.XML('<%s />' % elem), method='html')
1219 self.assertEqual(serialized, expected)
1220 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
1221 method='html')
1222 self.assertEqual(serialized, expected)
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001223
Raymond Hettingere3685fd2018-10-28 11:18:22 -07001224 def test_dump_attribute_order(self):
1225 # See BPO 34160
1226 e = ET.Element('cirriculum', status='public', company='example')
1227 with support.captured_stdout() as stdout:
1228 ET.dump(e)
1229 self.assertEqual(stdout.getvalue(),
1230 '<cirriculum status="public" company="example" />\n')
1231
1232 def test_tree_write_attribute_order(self):
1233 # See BPO 34160
1234 root = ET.Element('cirriculum', status='public', company='example')
Serhiy Storchaka3b05ad72018-10-29 19:31:04 +02001235 self.assertEqual(serialize(root),
1236 '<cirriculum status="public" company="example" />')
1237 self.assertEqual(serialize(root, method='html'),
1238 '<cirriculum status="public" company="example"></cirriculum>')
Raymond Hettingere3685fd2018-10-28 11:18:22 -07001239
Fredrik Lundh8911ca3d2005-12-16 22:07:17 +00001240
Eli Benderskyb5869342013-08-30 05:51:20 -07001241class XMLPullParserTest(unittest.TestCase):
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001242
1243 def _feed(self, parser, data, chunk_size=None):
1244 if chunk_size is None:
Eli Benderskyb5869342013-08-30 05:51:20 -07001245 parser.feed(data)
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001246 else:
1247 for i in range(0, len(data), chunk_size):
Eli Benderskyb5869342013-08-30 05:51:20 -07001248 parser.feed(data[i:i+chunk_size])
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001249
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001250 def assert_events(self, parser, expected, max_events=None):
Stefan Behnel43851a22019-05-01 21:20:38 +02001251 self.assertEqual(
1252 [(event, (elem.tag, elem.text))
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001253 for event, elem in islice(parser.read_events(), max_events)],
Stefan Behnel43851a22019-05-01 21:20:38 +02001254 expected)
1255
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001256 def assert_event_tuples(self, parser, expected, max_events=None):
1257 self.assertEqual(
1258 list(islice(parser.read_events(), max_events)),
1259 expected)
1260
1261 def assert_event_tags(self, parser, expected, max_events=None):
1262 events = islice(parser.read_events(), max_events)
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001263 self.assertEqual([(action, elem.tag) for action, elem in events],
1264 expected)
1265
1266 def test_simple_xml(self):
1267 for chunk_size in (None, 1, 5):
1268 with self.subTest(chunk_size=chunk_size):
Eli Benderskyb5869342013-08-30 05:51:20 -07001269 parser = ET.XMLPullParser()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001270 self.assert_event_tags(parser, [])
1271 self._feed(parser, "<!-- comment -->\n", chunk_size)
1272 self.assert_event_tags(parser, [])
1273 self._feed(parser,
1274 "<root>\n <element key='value'>text</element",
1275 chunk_size)
1276 self.assert_event_tags(parser, [])
1277 self._feed(parser, ">\n", chunk_size)
1278 self.assert_event_tags(parser, [('end', 'element')])
1279 self._feed(parser, "<element>text</element>tail\n", chunk_size)
1280 self._feed(parser, "<empty-element/>\n", chunk_size)
1281 self.assert_event_tags(parser, [
1282 ('end', 'element'),
1283 ('end', 'empty-element'),
1284 ])
1285 self._feed(parser, "</root>\n", chunk_size)
1286 self.assert_event_tags(parser, [('end', 'root')])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001287 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001288
Eli Benderskyb5869342013-08-30 05:51:20 -07001289 def test_feed_while_iterating(self):
1290 parser = ET.XMLPullParser()
1291 it = parser.read_events()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001292 self._feed(parser, "<root>\n <element key='value'>text</element>\n")
1293 action, elem = next(it)
1294 self.assertEqual((action, elem.tag), ('end', 'element'))
1295 self._feed(parser, "</root>\n")
1296 action, elem = next(it)
1297 self.assertEqual((action, elem.tag), ('end', 'root'))
1298 with self.assertRaises(StopIteration):
1299 next(it)
1300
1301 def test_simple_xml_with_ns(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001302 parser = ET.XMLPullParser()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001303 self.assert_event_tags(parser, [])
1304 self._feed(parser, "<!-- comment -->\n")
1305 self.assert_event_tags(parser, [])
1306 self._feed(parser, "<root xmlns='namespace'>\n")
1307 self.assert_event_tags(parser, [])
1308 self._feed(parser, "<element key='value'>text</element")
1309 self.assert_event_tags(parser, [])
1310 self._feed(parser, ">\n")
1311 self.assert_event_tags(parser, [('end', '{namespace}element')])
1312 self._feed(parser, "<element>text</element>tail\n")
1313 self._feed(parser, "<empty-element/>\n")
1314 self.assert_event_tags(parser, [
1315 ('end', '{namespace}element'),
1316 ('end', '{namespace}empty-element'),
1317 ])
1318 self._feed(parser, "</root>\n")
1319 self.assert_event_tags(parser, [('end', '{namespace}root')])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001320 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001321
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001322 def test_ns_events(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001323 parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001324 self._feed(parser, "<!-- comment -->\n")
1325 self._feed(parser, "<root xmlns='namespace'>\n")
1326 self.assertEqual(
Eli Benderskyb5869342013-08-30 05:51:20 -07001327 list(parser.read_events()),
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001328 [('start-ns', ('', 'namespace'))])
1329 self._feed(parser, "<element key='value'>text</element")
1330 self._feed(parser, ">\n")
1331 self._feed(parser, "<element>text</element>tail\n")
1332 self._feed(parser, "<empty-element/>\n")
1333 self._feed(parser, "</root>\n")
Eli Benderskyb5869342013-08-30 05:51:20 -07001334 self.assertEqual(list(parser.read_events()), [('end-ns', None)])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001335 self.assertIsNone(parser.close())
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001336
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001337 def test_ns_events_start(self):
1338 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
1339 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1340 self.assert_event_tuples(parser, [
1341 ('start-ns', ('', 'abc')),
1342 ('start-ns', ('p', 'xyz')),
1343 ], max_events=2)
1344 self.assert_event_tags(parser, [
1345 ('start', '{abc}tag'),
1346 ], max_events=1)
1347
1348 self._feed(parser, "<child />\n")
1349 self.assert_event_tags(parser, [
1350 ('start', '{abc}child'),
1351 ('end', '{abc}child'),
1352 ])
1353
1354 self._feed(parser, "</tag>\n")
1355 parser.close()
1356 self.assert_event_tags(parser, [
1357 ('end', '{abc}tag'),
1358 ])
1359
1360 def test_ns_events_start_end(self):
1361 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
1362 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1363 self.assert_event_tuples(parser, [
1364 ('start-ns', ('', 'abc')),
1365 ('start-ns', ('p', 'xyz')),
1366 ], max_events=2)
1367 self.assert_event_tags(parser, [
1368 ('start', '{abc}tag'),
1369 ], max_events=1)
1370
1371 self._feed(parser, "<child />\n")
1372 self.assert_event_tags(parser, [
1373 ('start', '{abc}child'),
1374 ('end', '{abc}child'),
1375 ])
1376
1377 self._feed(parser, "</tag>\n")
1378 parser.close()
1379 self.assert_event_tags(parser, [
1380 ('end', '{abc}tag'),
1381 ], max_events=1)
1382 self.assert_event_tuples(parser, [
1383 ('end-ns', None),
1384 ('end-ns', None),
1385 ])
1386
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001387 def test_events(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001388 parser = ET.XMLPullParser(events=())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001389 self._feed(parser, "<root/>\n")
1390 self.assert_event_tags(parser, [])
1391
Eli Benderskyb5869342013-08-30 05:51:20 -07001392 parser = ET.XMLPullParser(events=('start', 'end'))
Stefan Behnel43851a22019-05-01 21:20:38 +02001393 self._feed(parser, "<!-- text here -->\n")
1394 self.assert_events(parser, [])
1395
1396 parser = ET.XMLPullParser(events=('start', 'end'))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001397 self._feed(parser, "<root>\n")
1398 self.assert_event_tags(parser, [('start', 'root')])
1399 self._feed(parser, "<element key='value'>text</element")
1400 self.assert_event_tags(parser, [('start', 'element')])
1401 self._feed(parser, ">\n")
1402 self.assert_event_tags(parser, [('end', 'element')])
1403 self._feed(parser,
1404 "<element xmlns='foo'>text<empty-element/></element>tail\n")
1405 self.assert_event_tags(parser, [
1406 ('start', '{foo}element'),
1407 ('start', '{foo}empty-element'),
1408 ('end', '{foo}empty-element'),
1409 ('end', '{foo}element'),
1410 ])
1411 self._feed(parser, "</root>")
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001412 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001413 self.assert_event_tags(parser, [('end', 'root')])
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001414
Eli Benderskyb5869342013-08-30 05:51:20 -07001415 parser = ET.XMLPullParser(events=('start',))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001416 self._feed(parser, "<!-- comment -->\n")
1417 self.assert_event_tags(parser, [])
1418 self._feed(parser, "<root>\n")
1419 self.assert_event_tags(parser, [('start', 'root')])
1420 self._feed(parser, "<element key='value'>text</element")
1421 self.assert_event_tags(parser, [('start', 'element')])
1422 self._feed(parser, ">\n")
1423 self.assert_event_tags(parser, [])
1424 self._feed(parser,
1425 "<element xmlns='foo'>text<empty-element/></element>tail\n")
1426 self.assert_event_tags(parser, [
1427 ('start', '{foo}element'),
1428 ('start', '{foo}empty-element'),
1429 ])
1430 self._feed(parser, "</root>")
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001431 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001432
Stefan Behnel43851a22019-05-01 21:20:38 +02001433 def test_events_comment(self):
1434 parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
1435 self._feed(parser, "<!-- text here -->\n")
1436 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1437 self._feed(parser, "<!-- more text here -->\n")
1438 self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))])
1439 self._feed(parser, "<root-tag>text")
1440 self.assert_event_tags(parser, [('start', 'root-tag')])
1441 self._feed(parser, "<!-- inner comment-->\n")
1442 self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))])
1443 self._feed(parser, "</root-tag>\n")
1444 self.assert_event_tags(parser, [('end', 'root-tag')])
1445 self._feed(parser, "<!-- outer comment -->\n")
1446 self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))])
1447
1448 parser = ET.XMLPullParser(events=('comment',))
1449 self._feed(parser, "<!-- text here -->\n")
1450 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1451
1452 def test_events_pi(self):
1453 parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
1454 self._feed(parser, "<?pitarget?>\n")
1455 self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))])
1456 parser = ET.XMLPullParser(events=('pi',))
1457 self._feed(parser, "<?pitarget some text ?>\n")
1458 self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))])
1459
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001460 def test_events_sequence(self):
1461 # Test that events can be some sequence that's not just a tuple or list
1462 eventset = {'end', 'start'}
Eli Benderskyb5869342013-08-30 05:51:20 -07001463 parser = ET.XMLPullParser(events=eventset)
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001464 self._feed(parser, "<foo>bar</foo>")
1465 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1466
1467 class DummyIter:
1468 def __init__(self):
1469 self.events = iter(['start', 'end', 'start-ns'])
1470 def __iter__(self):
1471 return self
1472 def __next__(self):
1473 return next(self.events)
1474
Eli Benderskyb5869342013-08-30 05:51:20 -07001475 parser = ET.XMLPullParser(events=DummyIter())
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001476 self._feed(parser, "<foo>bar</foo>")
1477 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1478
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001479 def test_unknown_event(self):
1480 with self.assertRaises(ValueError):
Eli Benderskyb5869342013-08-30 05:51:20 -07001481 ET.XMLPullParser(events=('start', 'end', 'bogus'))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001482
1483
Armin Rigo9ed73062005-12-14 18:10:45 +00001484#
1485# xinclude tests (samples from appendix C of the xinclude specification)
1486
1487XINCLUDE = {}
1488
1489XINCLUDE["C1.xml"] = """\
1490<?xml version='1.0'?>
1491<document xmlns:xi="http://www.w3.org/2001/XInclude">
1492 <p>120 Mz is adequate for an average home user.</p>
1493 <xi:include href="disclaimer.xml"/>
1494</document>
1495"""
1496
1497XINCLUDE["disclaimer.xml"] = """\
1498<?xml version='1.0'?>
1499<disclaimer>
1500 <p>The opinions represented herein represent those of the individual
1501 and should not be interpreted as official policy endorsed by this
1502 organization.</p>
1503</disclaimer>
1504"""
1505
1506XINCLUDE["C2.xml"] = """\
1507<?xml version='1.0'?>
1508<document xmlns:xi="http://www.w3.org/2001/XInclude">
1509 <p>This document has been accessed
1510 <xi:include href="count.txt" parse="text"/> times.</p>
1511</document>
1512"""
1513
1514XINCLUDE["count.txt"] = "324387"
1515
Florent Xiclunaba8a9862010-08-08 23:08:41 +00001516XINCLUDE["C2b.xml"] = """\
1517<?xml version='1.0'?>
1518<document xmlns:xi="http://www.w3.org/2001/XInclude">
1519 <p>This document has been <em>accessed</em>
1520 <xi:include href="count.txt" parse="text"/> times.</p>
1521</document>
1522"""
1523
Armin Rigo9ed73062005-12-14 18:10:45 +00001524XINCLUDE["C3.xml"] = """\
1525<?xml version='1.0'?>
1526<document xmlns:xi="http://www.w3.org/2001/XInclude">
1527 <p>The following is the source of the "data.xml" resource:</p>
1528 <example><xi:include href="data.xml" parse="text"/></example>
1529</document>
1530"""
1531
1532XINCLUDE["data.xml"] = """\
1533<?xml version='1.0'?>
1534<data>
1535 <item><![CDATA[Brooks & Shields]]></item>
1536</data>
1537"""
1538
1539XINCLUDE["C5.xml"] = """\
1540<?xml version='1.0'?>
1541<div xmlns:xi="http://www.w3.org/2001/XInclude">
1542 <xi:include href="example.txt" parse="text">
1543 <xi:fallback>
1544 <xi:include href="fallback-example.txt" parse="text">
1545 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1546 </xi:include>
1547 </xi:fallback>
1548 </xi:include>
1549</div>
1550"""
1551
1552XINCLUDE["default.xml"] = """\
1553<?xml version='1.0'?>
1554<document xmlns:xi="http://www.w3.org/2001/XInclude">
1555 <p>Example.</p>
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001556 <xi:include href="{}"/>
Armin Rigo9ed73062005-12-14 18:10:45 +00001557</document>
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001558""".format(html.escape(SIMPLE_XMLFILE, True))
Armin Rigo9ed73062005-12-14 18:10:45 +00001559
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001560#
1561# badly formatted xi:include tags
1562
1563XINCLUDE_BAD = {}
1564
1565XINCLUDE_BAD["B1.xml"] = """\
1566<?xml version='1.0'?>
1567<document xmlns:xi="http://www.w3.org/2001/XInclude">
1568 <p>120 Mz is adequate for an average home user.</p>
1569 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1570</document>
1571"""
1572
1573XINCLUDE_BAD["B2.xml"] = """\
1574<?xml version='1.0'?>
1575<div xmlns:xi="http://www.w3.org/2001/XInclude">
1576 <xi:fallback></xi:fallback>
1577</div>
1578"""
1579
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001580class XIncludeTest(unittest.TestCase):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001581
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001582 def xinclude_loader(self, href, parse="xml", encoding=None):
1583 try:
1584 data = XINCLUDE[href]
1585 except KeyError:
1586 raise OSError("resource not found")
1587 if parse == "xml":
1588 data = ET.XML(data)
1589 return data
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001590
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001591 def none_loader(self, href, parser, encoding=None):
1592 return None
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001593
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001594 def _my_loader(self, href, parse):
1595 # Used to avoid a test-dependency problem where the default loader
1596 # of ElementInclude uses the pyET parser for cET tests.
1597 if parse == 'xml':
1598 with open(href, 'rb') as f:
1599 return ET.parse(f).getroot()
1600 else:
1601 return None
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001602
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001603 def test_xinclude_default(self):
1604 from xml.etree import ElementInclude
1605 doc = self.xinclude_loader('default.xml')
1606 ElementInclude.include(doc, self._my_loader)
1607 self.assertEqual(serialize(doc),
1608 '<document>\n'
1609 ' <p>Example.</p>\n'
1610 ' <root>\n'
1611 ' <element key="value">text</element>\n'
1612 ' <element>text</element>tail\n'
1613 ' <empty-element />\n'
1614 '</root>\n'
1615 '</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001616
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001617 def test_xinclude(self):
1618 from xml.etree import ElementInclude
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001619
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001620 # Basic inclusion example (XInclude C.1)
1621 document = self.xinclude_loader("C1.xml")
1622 ElementInclude.include(document, self.xinclude_loader)
1623 self.assertEqual(serialize(document),
1624 '<document>\n'
1625 ' <p>120 Mz is adequate for an average home user.</p>\n'
1626 ' <disclaimer>\n'
1627 ' <p>The opinions represented herein represent those of the individual\n'
1628 ' and should not be interpreted as official policy endorsed by this\n'
1629 ' organization.</p>\n'
1630 '</disclaimer>\n'
1631 '</document>') # C1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001632
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001633 # Textual inclusion example (XInclude C.2)
1634 document = self.xinclude_loader("C2.xml")
1635 ElementInclude.include(document, self.xinclude_loader)
1636 self.assertEqual(serialize(document),
1637 '<document>\n'
1638 ' <p>This document has been accessed\n'
1639 ' 324387 times.</p>\n'
1640 '</document>') # C2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001641
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001642 # Textual inclusion after sibling element (based on modified XInclude C.2)
1643 document = self.xinclude_loader("C2b.xml")
1644 ElementInclude.include(document, self.xinclude_loader)
1645 self.assertEqual(serialize(document),
1646 '<document>\n'
1647 ' <p>This document has been <em>accessed</em>\n'
1648 ' 324387 times.</p>\n'
1649 '</document>') # C2b
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001650
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001651 # Textual inclusion of XML example (XInclude C.3)
1652 document = self.xinclude_loader("C3.xml")
1653 ElementInclude.include(document, self.xinclude_loader)
1654 self.assertEqual(serialize(document),
1655 '<document>\n'
1656 ' <p>The following is the source of the "data.xml" resource:</p>\n'
1657 " <example>&lt;?xml version='1.0'?&gt;\n"
1658 '&lt;data&gt;\n'
1659 ' &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1660 '&lt;/data&gt;\n'
1661 '</example>\n'
1662 '</document>') # C3
1663
1664 # Fallback example (XInclude C.5)
1665 # Note! Fallback support is not yet implemented
1666 document = self.xinclude_loader("C5.xml")
1667 with self.assertRaises(OSError) as cm:
1668 ElementInclude.include(document, self.xinclude_loader)
1669 self.assertEqual(str(cm.exception), 'resource not found')
1670 self.assertEqual(serialize(document),
1671 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1672 ' <ns0:include href="example.txt" parse="text">\n'
1673 ' <ns0:fallback>\n'
1674 ' <ns0:include href="fallback-example.txt" parse="text">\n'
1675 ' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1676 ' </ns0:include>\n'
1677 ' </ns0:fallback>\n'
1678 ' </ns0:include>\n'
1679 '</div>') # C5
1680
1681 def test_xinclude_failures(self):
1682 from xml.etree import ElementInclude
1683
1684 # Test failure to locate included XML file.
1685 document = ET.XML(XINCLUDE["C1.xml"])
1686 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1687 ElementInclude.include(document, loader=self.none_loader)
1688 self.assertEqual(str(cm.exception),
1689 "cannot load 'disclaimer.xml' as 'xml'")
1690
1691 # Test failure to locate included text file.
1692 document = ET.XML(XINCLUDE["C2.xml"])
1693 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1694 ElementInclude.include(document, loader=self.none_loader)
1695 self.assertEqual(str(cm.exception),
1696 "cannot load 'count.txt' as 'text'")
1697
1698 # Test bad parse type.
1699 document = ET.XML(XINCLUDE_BAD["B1.xml"])
1700 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1701 ElementInclude.include(document, loader=self.none_loader)
1702 self.assertEqual(str(cm.exception),
1703 "unknown parse type in xi:include tag ('BAD_TYPE')")
1704
1705 # Test xi:fallback outside xi:include.
1706 document = ET.XML(XINCLUDE_BAD["B2.xml"])
1707 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1708 ElementInclude.include(document, loader=self.none_loader)
1709 self.assertEqual(str(cm.exception),
1710 "xi:fallback tag must be child of xi:include "
1711 "('{http://www.w3.org/2001/XInclude}fallback')")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001712
1713# --------------------------------------------------------------------
1714# reported bugs
1715
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001716class BugsTest(unittest.TestCase):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001717
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001718 def test_bug_xmltoolkit21(self):
1719 # marshaller gives obscure errors for non-string values
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001720
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001721 def check(elem):
1722 with self.assertRaises(TypeError) as cm:
1723 serialize(elem)
1724 self.assertEqual(str(cm.exception),
1725 'cannot serialize 123 (type int)')
Armin Rigo9ed73062005-12-14 18:10:45 +00001726
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001727 elem = ET.Element(123)
1728 check(elem) # tag
Armin Rigo9ed73062005-12-14 18:10:45 +00001729
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001730 elem = ET.Element("elem")
1731 elem.text = 123
1732 check(elem) # text
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001733
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001734 elem = ET.Element("elem")
1735 elem.tail = 123
1736 check(elem) # tail
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001737
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001738 elem = ET.Element("elem")
1739 elem.set(123, "123")
1740 check(elem) # attribute key
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001741
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001742 elem = ET.Element("elem")
1743 elem.set("123", 123)
1744 check(elem) # attribute value
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001746 def test_bug_xmltoolkit25(self):
1747 # typo in ElementTree.findtext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001749 elem = ET.XML(SAMPLE_XML)
1750 tree = ET.ElementTree(elem)
1751 self.assertEqual(tree.findtext("tag"), 'text')
1752 self.assertEqual(tree.findtext("section/tag"), 'subtext')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001753
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001754 def test_bug_xmltoolkit28(self):
1755 # .//tag causes exceptions
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001756
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001757 tree = ET.XML("<doc><table><tbody/></table></doc>")
1758 self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1759 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001760
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001761 def test_bug_xmltoolkitX1(self):
1762 # dump() doesn't flush the output buffer
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001763
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001764 tree = ET.XML("<doc><table><tbody/></table></doc>")
1765 with support.captured_stdout() as stdout:
1766 ET.dump(tree)
1767 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001768
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001769 def test_bug_xmltoolkit39(self):
1770 # non-ascii element and attribute names doesn't work
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001771
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001772 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1773 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001774
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001775 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1776 b"<tag \xe4ttr='v&#228;lue' />")
1777 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
1778 self.assertEqual(ET.tostring(tree, "utf-8"),
1779 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001780
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001781 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1782 b'<t\xe4g>text</t\xe4g>')
1783 self.assertEqual(ET.tostring(tree, "utf-8"),
1784 b'<t\xc3\xa4g>text</t\xc3\xa4g>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001785
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001786 tree = ET.Element("t\u00e4g")
1787 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001788
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001789 tree = ET.Element("tag")
1790 tree.set("\u00e4ttr", "v\u00e4lue")
1791 self.assertEqual(ET.tostring(tree, "utf-8"),
1792 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001793
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001794 def test_bug_xmltoolkit54(self):
1795 # problems handling internally defined entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001797 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
1798 '<doc>&ldots;</doc>')
1799 self.assertEqual(serialize(e, encoding="us-ascii"),
1800 b'<doc>&#33328;</doc>')
1801 self.assertEqual(serialize(e), '<doc>\u8230</doc>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001803 def test_bug_xmltoolkit55(self):
1804 # make sure we're reporting the first error, not the last
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001805
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001806 with self.assertRaises(ET.ParseError) as cm:
1807 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
1808 b'<doc>&ldots;&ndots;&rdots;</doc>')
1809 self.assertEqual(str(cm.exception),
1810 'undefined entity &ldots;: line 1, column 36')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001811
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001812 def test_bug_xmltoolkit60(self):
1813 # Handle crash in stream source.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001814
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001815 class ExceptionFile:
1816 def read(self, x):
1817 raise OSError
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001818
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001819 self.assertRaises(OSError, ET.parse, ExceptionFile())
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001821 def test_bug_xmltoolkit62(self):
1822 # Don't crash when using custom entities.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001823
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001824 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
Eli Benderskyc4e98a62013-05-19 09:24:43 -07001825 parser = ET.XMLParser()
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001826 parser.entity.update(ENTITIES)
1827 parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001828<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1829<patent-application-publication>
1830<subdoc-abstract>
1831<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1832</subdoc-abstract>
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001833</patent-application-publication>""")
1834 t = parser.close()
1835 self.assertEqual(t.find('.//paragraph').text,
1836 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001837
Kushal Das1de47052017-05-24 11:46:43 -07001838 @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001839 def test_bug_xmltoolkit63(self):
1840 # Check reference leak.
1841 def xmltoolkit63():
1842 tree = ET.TreeBuilder()
1843 tree.start("tag", {})
1844 tree.data("text")
1845 tree.end("tag")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001846
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001847 xmltoolkit63()
1848 count = sys.getrefcount(None)
1849 for i in range(1000):
1850 xmltoolkit63()
1851 self.assertEqual(sys.getrefcount(None), count)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001852
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001853 def test_bug_200708_newline(self):
1854 # Preserve newlines in attributes.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001856 e = ET.Element('SomeTag', text="def _f():\n return 3\n")
1857 self.assertEqual(ET.tostring(e),
1858 b'<SomeTag text="def _f():&#10; return 3&#10;" />')
1859 self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
1860 'def _f():\n return 3\n')
1861 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
1862 b'<SomeTag text="def _f():&#10; return 3&#10;" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001863
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001864 def test_bug_200708_close(self):
1865 # Test default builder.
1866 parser = ET.XMLParser() # default
1867 parser.feed("<element>some text</element>")
1868 self.assertEqual(parser.close().tag, 'element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001869
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001870 # Test custom builder.
1871 class EchoTarget:
1872 def close(self):
1873 return ET.Element("element") # simulate root
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001874 parser = ET.XMLParser(target=EchoTarget())
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001875 parser.feed("<element>some text</element>")
1876 self.assertEqual(parser.close().tag, 'element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001877
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001878 def test_bug_200709_default_namespace(self):
1879 e = ET.Element("{default}elem")
1880 s = ET.SubElement(e, "{default}elem")
1881 self.assertEqual(serialize(e, default_namespace="default"), # 1
1882 '<elem xmlns="default"><elem /></elem>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001884 e = ET.Element("{default}elem")
1885 s = ET.SubElement(e, "{default}elem")
1886 s = ET.SubElement(e, "{not-default}elem")
1887 self.assertEqual(serialize(e, default_namespace="default"), # 2
1888 '<elem xmlns="default" xmlns:ns1="not-default">'
1889 '<elem />'
1890 '<ns1:elem />'
1891 '</elem>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001892
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001893 e = ET.Element("{default}elem")
1894 s = ET.SubElement(e, "{default}elem")
1895 s = ET.SubElement(e, "elem") # unprefixed name
1896 with self.assertRaises(ValueError) as cm:
1897 serialize(e, default_namespace="default") # 3
1898 self.assertEqual(str(cm.exception),
1899 'cannot use non-qualified names with default_namespace option')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001900
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001901 def test_bug_200709_register_namespace(self):
1902 e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1903 self.assertEqual(ET.tostring(e),
1904 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
1905 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1906 e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1907 self.assertEqual(ET.tostring(e),
1908 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001909
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001910 # And the Dublin Core namespace is in the default list:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001911
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001912 e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
1913 self.assertEqual(ET.tostring(e),
1914 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001915
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001916 def test_bug_200709_element_comment(self):
1917 # Not sure if this can be fixed, really (since the serializer needs
1918 # ET.Comment, not cET.comment).
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001920 a = ET.Element('a')
1921 a.append(ET.Comment('foo'))
1922 self.assertEqual(a[0].tag, ET.Comment)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001923
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001924 a = ET.Element('a')
1925 a.append(ET.PI('foo'))
1926 self.assertEqual(a[0].tag, ET.PI)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001927
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001928 def test_bug_200709_element_insert(self):
1929 a = ET.Element('a')
1930 b = ET.SubElement(a, 'b')
1931 c = ET.SubElement(a, 'c')
1932 d = ET.Element('d')
1933 a.insert(0, d)
1934 self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
1935 a.insert(-1, d)
1936 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001937
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001938 def test_bug_200709_iter_comment(self):
1939 a = ET.Element('a')
1940 b = ET.SubElement(a, 'b')
1941 comment_b = ET.Comment("TEST-b")
1942 b.append(comment_b)
1943 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001944
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001945 # --------------------------------------------------------------------
1946 # reported on bugs.python.org
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001947
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001948 def test_bug_1534630(self):
1949 bob = ET.TreeBuilder()
1950 e = bob.data("data")
1951 e = bob.start("tag", {})
1952 e = bob.end("tag")
1953 e = bob.close()
1954 self.assertEqual(serialize(e), '<tag />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001955
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001956 def test_issue6233(self):
1957 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
1958 b'<body>t\xc3\xa3g</body>')
1959 self.assertEqual(ET.tostring(e, 'ascii'),
1960 b"<?xml version='1.0' encoding='ascii'?>\n"
1961 b'<body>t&#227;g</body>')
1962 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1963 b'<body>t\xe3g</body>')
1964 self.assertEqual(ET.tostring(e, 'ascii'),
1965 b"<?xml version='1.0' encoding='ascii'?>\n"
1966 b'<body>t&#227;g</body>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001967
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001968 def test_issue3151(self):
1969 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1970 self.assertEqual(e.tag, '{${stuff}}localname')
1971 t = ET.ElementTree(e)
1972 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001973
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001974 def test_issue6565(self):
1975 elem = ET.XML("<body><tag/></body>")
1976 self.assertEqual(summarize_list(elem), ['tag'])
1977 newelem = ET.XML(SAMPLE_XML)
1978 elem[:] = newelem[:]
1979 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001980
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001981 def test_issue10777(self):
1982 # Registering a namespace twice caused a "dictionary changed size during
1983 # iteration" bug.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001984
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001985 ET.register_namespace('test10777', 'http://myuri/')
1986 ET.register_namespace('test10777', 'http://myuri/')
Georg Brandl90b20672010-12-28 10:38:33 +00001987
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02001988 def test_lost_text(self):
1989 # Issue #25902: Borrowed text can disappear
1990 class Text:
1991 def __bool__(self):
1992 e.text = 'changed'
1993 return True
1994
1995 e = ET.Element('tag')
1996 e.text = Text()
1997 i = e.itertext()
1998 t = next(i)
1999 self.assertIsInstance(t, Text)
2000 self.assertIsInstance(e.text, str)
2001 self.assertEqual(e.text, 'changed')
2002
2003 def test_lost_tail(self):
2004 # Issue #25902: Borrowed tail can disappear
2005 class Text:
2006 def __bool__(self):
2007 e[0].tail = 'changed'
2008 return True
2009
2010 e = ET.Element('root')
2011 e.append(ET.Element('tag'))
2012 e[0].tail = Text()
2013 i = e.itertext()
2014 t = next(i)
2015 self.assertIsInstance(t, Text)
2016 self.assertIsInstance(e[0].tail, str)
2017 self.assertEqual(e[0].tail, 'changed')
2018
2019 def test_lost_elem(self):
2020 # Issue #25902: Borrowed element can disappear
2021 class Tag:
2022 def __eq__(self, other):
2023 e[0] = ET.Element('changed')
2024 next(i)
2025 return True
2026
2027 e = ET.Element('root')
2028 e.append(ET.Element(Tag()))
2029 e.append(ET.Element('tag'))
2030 i = e.iter('tag')
2031 try:
2032 t = next(i)
2033 except ValueError:
2034 self.skipTest('generators are not reentrant')
2035 self.assertIsInstance(t.tag, Tag)
2036 self.assertIsInstance(e[0].tag, str)
2037 self.assertEqual(e[0].tag, 'changed')
2038
Victor Stinnere6d9fcb2017-09-25 01:27:34 -07002039 def check_expat224_utf8_bug(self, text):
2040 xml = b'<a b="%s"/>' % text
2041 root = ET.XML(xml)
2042 self.assertEqual(root.get('b'), text.decode('utf-8'))
2043
2044 def test_expat224_utf8_bug(self):
2045 # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
2046 # Check that Expat 2.2.4 fixed the bug.
2047 #
2048 # Test buffer bounds at odd and even positions.
2049
2050 text = b'\xc3\xa0' * 1024
2051 self.check_expat224_utf8_bug(text)
2052
2053 text = b'x' + b'\xc3\xa0' * 1024
2054 self.check_expat224_utf8_bug(text)
2055
2056 def test_expat224_utf8_bug_file(self):
2057 with open(UTF8_BUG_XMLFILE, 'rb') as fp:
2058 raw = fp.read()
2059 root = ET.fromstring(raw)
2060 xmlattr = root.get('b')
2061
2062 # "Parse" manually the XML file to extract the value of the 'b'
2063 # attribute of the <a b='xxx' /> XML element
2064 text = raw.decode('utf-8').strip()
2065 text = text.replace('\r\n', ' ')
2066 text = text[6:-4]
2067 self.assertEqual(root.get('b'), text)
2068
2069
Antoine Pitrou5b235d02013-04-18 19:37:06 +02002070
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002071# --------------------------------------------------------------------
2072
2073
Eli Bendersky698bdb22013-01-10 06:01:06 -08002074class BasicElementTest(ElementTestCase, unittest.TestCase):
Gordon P. Hemsley50fed0b2019-04-28 00:41:43 -04002075
2076 def test___init__(self):
2077 tag = "foo"
2078 attrib = { "zix": "wyp" }
2079
2080 element_foo = ET.Element(tag, attrib)
2081
2082 # traits of an element
2083 self.assertIsInstance(element_foo, ET.Element)
2084 self.assertIn("tag", dir(element_foo))
2085 self.assertIn("attrib", dir(element_foo))
2086 self.assertIn("text", dir(element_foo))
2087 self.assertIn("tail", dir(element_foo))
2088
2089 # string attributes have expected values
2090 self.assertEqual(element_foo.tag, tag)
2091 self.assertIsNone(element_foo.text)
2092 self.assertIsNone(element_foo.tail)
2093
2094 # attrib is a copy
2095 self.assertIsNot(element_foo.attrib, attrib)
2096 self.assertEqual(element_foo.attrib, attrib)
2097
2098 # attrib isn't linked
2099 attrib["bar"] = "baz"
2100 self.assertIsNot(element_foo.attrib, attrib)
2101 self.assertNotEqual(element_foo.attrib, attrib)
2102
2103 def test___copy__(self):
2104 element_foo = ET.Element("foo", { "zix": "wyp" })
2105 element_foo.append(ET.Element("bar", { "baz": "qix" }))
2106
2107 element_foo2 = copy.copy(element_foo)
2108
2109 # elements are not the same
2110 self.assertIsNot(element_foo2, element_foo)
2111
2112 # string attributes are equal
2113 self.assertEqual(element_foo2.tag, element_foo.tag)
2114 self.assertEqual(element_foo2.text, element_foo.text)
2115 self.assertEqual(element_foo2.tail, element_foo.tail)
2116
2117 # number of children is the same
2118 self.assertEqual(len(element_foo2), len(element_foo))
2119
2120 # children are the same
2121 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2122 self.assertIs(child1, child2)
2123
2124 # attrib is a copy
2125 self.assertEqual(element_foo2.attrib, element_foo.attrib)
2126
2127 def test___deepcopy__(self):
2128 element_foo = ET.Element("foo", { "zix": "wyp" })
2129 element_foo.append(ET.Element("bar", { "baz": "qix" }))
2130
2131 element_foo2 = copy.deepcopy(element_foo)
2132
2133 # elements are not the same
2134 self.assertIsNot(element_foo2, element_foo)
2135
2136 # string attributes are equal
2137 self.assertEqual(element_foo2.tag, element_foo.tag)
2138 self.assertEqual(element_foo2.text, element_foo.text)
2139 self.assertEqual(element_foo2.tail, element_foo.tail)
2140
2141 # number of children is the same
2142 self.assertEqual(len(element_foo2), len(element_foo))
2143
2144 # children are not the same
2145 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2146 self.assertIsNot(child1, child2)
2147
2148 # attrib is a copy
2149 self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2150 self.assertEqual(element_foo2.attrib, element_foo.attrib)
2151
2152 # attrib isn't linked
2153 element_foo.attrib["bar"] = "baz"
2154 self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2155 self.assertNotEqual(element_foo2.attrib, element_foo.attrib)
2156
Eli Bendersky396e8fc2012-03-23 14:24:20 +02002157 def test_augmentation_type_errors(self):
2158 e = ET.Element('joe')
2159 self.assertRaises(TypeError, e.append, 'b')
2160 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
2161 self.assertRaises(TypeError, e.insert, 0, 'foo')
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002162 e[:] = [ET.Element('bar')]
2163 with self.assertRaises(TypeError):
2164 e[0] = 'foo'
2165 with self.assertRaises(TypeError):
2166 e[:] = [ET.Element('bar'), 'foo']
2167
2168 if hasattr(e, '__setstate__'):
2169 state = {
2170 'tag': 'tag',
2171 '_children': [None], # non-Element
2172 'attrib': 'attr',
2173 'tail': 'tail',
2174 'text': 'text',
2175 }
2176 self.assertRaises(TypeError, e.__setstate__, state)
2177
2178 if hasattr(e, '__deepcopy__'):
2179 class E(ET.Element):
2180 def __deepcopy__(self, memo):
2181 return None # non-Element
2182 e[:] = [E('bar')]
2183 self.assertRaises(TypeError, copy.deepcopy, e)
Florent Xicluna41fe6152010-04-02 18:52:12 +00002184
Eli Bendersky0192ba32012-03-30 16:38:33 +03002185 def test_cyclic_gc(self):
Eli Benderskya5e82202012-03-31 13:55:38 +03002186 class Dummy:
2187 pass
Eli Bendersky0192ba32012-03-30 16:38:33 +03002188
Eli Benderskya5e82202012-03-31 13:55:38 +03002189 # Test the shortest cycle: d->element->d
2190 d = Dummy()
2191 d.dummyref = ET.Element('joe', attr=d)
2192 wref = weakref.ref(d)
2193 del d
2194 gc_collect()
2195 self.assertIsNone(wref())
Eli Bendersky0192ba32012-03-30 16:38:33 +03002196
Eli Benderskyebf37a22012-04-03 22:02:37 +03002197 # A longer cycle: d->e->e2->d
2198 e = ET.Element('joe')
2199 d = Dummy()
2200 d.dummyref = e
2201 wref = weakref.ref(d)
2202 e2 = ET.SubElement(e, 'foo', attr=d)
2203 del d, e, e2
2204 gc_collect()
2205 self.assertIsNone(wref())
2206
2207 # A cycle between Element objects as children of one another
2208 # e1->e2->e3->e1
2209 e1 = ET.Element('e1')
2210 e2 = ET.Element('e2')
2211 e3 = ET.Element('e3')
Eli Benderskyebf37a22012-04-03 22:02:37 +03002212 e3.append(e1)
Gordon P. Hemsley50fed0b2019-04-28 00:41:43 -04002213 e2.append(e3)
2214 e1.append(e2)
Eli Benderskyebf37a22012-04-03 22:02:37 +03002215 wref = weakref.ref(e1)
2216 del e1, e2, e3
2217 gc_collect()
2218 self.assertIsNone(wref())
2219
2220 def test_weakref(self):
2221 flag = False
2222 def wref_cb(w):
2223 nonlocal flag
2224 flag = True
2225 e = ET.Element('e')
2226 wref = weakref.ref(e, wref_cb)
2227 self.assertEqual(wref().tag, 'e')
2228 del e
2229 self.assertEqual(flag, True)
2230 self.assertEqual(wref(), None)
2231
Eli Benderskya8736902013-01-05 06:26:39 -08002232 def test_get_keyword_args(self):
2233 e1 = ET.Element('foo' , x=1, y=2, z=3)
2234 self.assertEqual(e1.get('x', default=7), 1)
2235 self.assertEqual(e1.get('w', default=7), 7)
2236
Eli Bendersky7ec45f72012-12-30 06:17:49 -08002237 def test_pickle(self):
Eli Bendersky698bdb22013-01-10 06:01:06 -08002238 # issue #16076: the C implementation wasn't pickleable.
Serhiy Storchakabad12572014-12-15 14:03:42 +02002239 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2240 for dumper, loader in product(self.modules, repeat=2):
2241 e = dumper.Element('foo', bar=42)
2242 e.text = "text goes here"
2243 e.tail = "opposite of head"
2244 dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
2245 e.append(dumper.Element('child'))
2246 e.findall('.//grandchild')[0].set('attr', 'other value')
Eli Bendersky7ec45f72012-12-30 06:17:49 -08002247
Serhiy Storchakabad12572014-12-15 14:03:42 +02002248 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
2249 dumper, loader, proto)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002250
Serhiy Storchakabad12572014-12-15 14:03:42 +02002251 self.assertEqual(e2.tag, 'foo')
2252 self.assertEqual(e2.attrib['bar'], 42)
2253 self.assertEqual(len(e2), 2)
2254 self.assertEqualElements(e, e2)
Eli Bendersky396e8fc2012-03-23 14:24:20 +02002255
Eli Benderskydd3661e2013-09-13 06:24:25 -07002256 def test_pickle_issue18997(self):
Serhiy Storchakabad12572014-12-15 14:03:42 +02002257 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2258 for dumper, loader in product(self.modules, repeat=2):
2259 XMLTEXT = """<?xml version="1.0"?>
2260 <group><dogs>4</dogs>
2261 </group>"""
2262 e1 = dumper.fromstring(XMLTEXT)
2263 if hasattr(e1, '__getstate__'):
2264 self.assertEqual(e1.__getstate__()['tag'], 'group')
2265 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
2266 dumper, loader, proto)
2267 self.assertEqual(e2.tag, 'group')
2268 self.assertEqual(e2[0].tag, 'dogs')
Eli Benderskydd3661e2013-09-13 06:24:25 -07002269
Eli Bendersky23687042013-02-26 05:53:23 -08002270
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002271class BadElementTest(ElementTestCase, unittest.TestCase):
2272 def test_extend_mutable_list(self):
2273 class X:
2274 @property
2275 def __class__(self):
2276 L[:] = [ET.Element('baz')]
2277 return ET.Element
2278 L = [X()]
2279 e = ET.Element('foo')
2280 try:
2281 e.extend(L)
2282 except TypeError:
2283 pass
2284
2285 class Y(X, ET.Element):
2286 pass
2287 L = [Y('x')]
2288 e = ET.Element('foo')
2289 e.extend(L)
2290
2291 def test_extend_mutable_list2(self):
2292 class X:
2293 @property
2294 def __class__(self):
2295 del L[:]
2296 return ET.Element
2297 L = [X(), ET.Element('baz')]
2298 e = ET.Element('foo')
2299 try:
2300 e.extend(L)
2301 except TypeError:
2302 pass
2303
2304 class Y(X, ET.Element):
2305 pass
2306 L = [Y('bar'), ET.Element('baz')]
2307 e = ET.Element('foo')
2308 e.extend(L)
2309
2310 def test_remove_with_mutating(self):
2311 class X(ET.Element):
2312 def __eq__(self, o):
2313 del e[:]
2314 return False
2315 e = ET.Element('foo')
2316 e.extend([X('bar')])
2317 self.assertRaises(ValueError, e.remove, ET.Element('baz'))
2318
2319 e = ET.Element('foo')
2320 e.extend([ET.Element('bar')])
2321 self.assertRaises(ValueError, e.remove, X('baz'))
2322
Serhiy Storchaka9062c262016-06-12 09:43:55 +03002323 def test_recursive_repr(self):
2324 # Issue #25455
2325 e = ET.Element('foo')
2326 with swap_attr(e, 'tag', e):
2327 with self.assertRaises(RuntimeError):
2328 repr(e) # Should not crash
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002329
Serhiy Storchaka576def02017-03-30 09:47:31 +03002330 def test_element_get_text(self):
2331 # Issue #27863
2332 class X(str):
2333 def __del__(self):
2334 try:
2335 elem.text
2336 except NameError:
2337 pass
2338
2339 b = ET.TreeBuilder()
2340 b.start('tag', {})
2341 b.data('ABCD')
2342 b.data(X('EFGH'))
2343 b.data('IJKL')
2344 b.end('tag')
2345
2346 elem = b.close()
2347 self.assertEqual(elem.text, 'ABCDEFGHIJKL')
2348
2349 def test_element_get_tail(self):
2350 # Issue #27863
2351 class X(str):
2352 def __del__(self):
2353 try:
2354 elem[0].tail
2355 except NameError:
2356 pass
2357
2358 b = ET.TreeBuilder()
2359 b.start('root', {})
2360 b.start('tag', {})
2361 b.end('tag')
2362 b.data('ABCD')
2363 b.data(X('EFGH'))
2364 b.data('IJKL')
2365 b.end('root')
2366
2367 elem = b.close()
2368 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
2369
Serhiy Storchaka576def02017-03-30 09:47:31 +03002370 def test_subscr(self):
2371 # Issue #27863
2372 class X:
2373 def __index__(self):
2374 del e[:]
2375 return 1
2376
2377 e = ET.Element('elem')
2378 e.append(ET.Element('child'))
2379 e[:X()] # shouldn't crash
2380
2381 e.append(ET.Element('child'))
2382 e[0:10:X()] # shouldn't crash
2383
2384 def test_ass_subscr(self):
2385 # Issue #27863
2386 class X:
2387 def __index__(self):
2388 e[:] = []
2389 return 1
2390
2391 e = ET.Element('elem')
2392 for _ in range(10):
2393 e.insert(0, ET.Element('child'))
2394
2395 e[0:10:X()] = [] # shouldn't crash
2396
2397 def test_treebuilder_start(self):
2398 # Issue #27863
2399 def element_factory(x, y):
2400 return []
2401 b = ET.TreeBuilder(element_factory=element_factory)
2402
2403 b.start('tag', {})
2404 b.data('ABCD')
2405 self.assertRaises(AttributeError, b.start, 'tag2', {})
2406 del b
2407 gc_collect()
2408
2409 def test_treebuilder_end(self):
2410 # Issue #27863
2411 def element_factory(x, y):
2412 return []
2413 b = ET.TreeBuilder(element_factory=element_factory)
2414
2415 b.start('tag', {})
2416 b.data('ABCD')
2417 self.assertRaises(AttributeError, b.end, 'tag')
2418 del b
2419 gc_collect()
2420
2421
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002422class MutatingElementPath(str):
2423 def __new__(cls, elem, *args):
2424 self = str.__new__(cls, *args)
2425 self.elem = elem
2426 return self
2427 def __eq__(self, o):
2428 del self.elem[:]
2429 return True
2430MutatingElementPath.__hash__ = str.__hash__
2431
2432class BadElementPath(str):
2433 def __eq__(self, o):
2434 raise 1/0
2435BadElementPath.__hash__ = str.__hash__
2436
2437class BadElementPathTest(ElementTestCase, unittest.TestCase):
2438 def setUp(self):
2439 super().setUp()
2440 from xml.etree import ElementPath
2441 self.path_cache = ElementPath._cache
2442 ElementPath._cache = {}
2443
2444 def tearDown(self):
2445 from xml.etree import ElementPath
2446 ElementPath._cache = self.path_cache
2447 super().tearDown()
2448
2449 def test_find_with_mutating(self):
2450 e = ET.Element('foo')
2451 e.extend([ET.Element('bar')])
2452 e.find(MutatingElementPath(e, 'x'))
2453
2454 def test_find_with_error(self):
2455 e = ET.Element('foo')
2456 e.extend([ET.Element('bar')])
2457 try:
2458 e.find(BadElementPath('x'))
2459 except ZeroDivisionError:
2460 pass
2461
2462 def test_findtext_with_mutating(self):
2463 e = ET.Element('foo')
2464 e.extend([ET.Element('bar')])
2465 e.findtext(MutatingElementPath(e, 'x'))
2466
2467 def test_findtext_with_error(self):
2468 e = ET.Element('foo')
2469 e.extend([ET.Element('bar')])
2470 try:
2471 e.findtext(BadElementPath('x'))
2472 except ZeroDivisionError:
2473 pass
2474
2475 def test_findall_with_mutating(self):
2476 e = ET.Element('foo')
2477 e.extend([ET.Element('bar')])
2478 e.findall(MutatingElementPath(e, 'x'))
2479
2480 def test_findall_with_error(self):
2481 e = ET.Element('foo')
2482 e.extend([ET.Element('bar')])
2483 try:
2484 e.findall(BadElementPath('x'))
2485 except ZeroDivisionError:
2486 pass
2487
2488
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002489class ElementTreeTypeTest(unittest.TestCase):
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002490 def test_istype(self):
2491 self.assertIsInstance(ET.ParseError, type)
2492 self.assertIsInstance(ET.QName, type)
2493 self.assertIsInstance(ET.ElementTree, type)
Eli Bendersky092af1f2012-03-04 07:14:03 +02002494 self.assertIsInstance(ET.Element, type)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002495 self.assertIsInstance(ET.TreeBuilder, type)
2496 self.assertIsInstance(ET.XMLParser, type)
Eli Bendersky092af1f2012-03-04 07:14:03 +02002497
2498 def test_Element_subclass_trivial(self):
2499 class MyElement(ET.Element):
2500 pass
2501
2502 mye = MyElement('foo')
2503 self.assertIsInstance(mye, ET.Element)
2504 self.assertIsInstance(mye, MyElement)
2505 self.assertEqual(mye.tag, 'foo')
2506
Eli Benderskyb20df952012-05-20 06:33:29 +03002507 # test that attribute assignment works (issue 14849)
2508 mye.text = "joe"
2509 self.assertEqual(mye.text, "joe")
2510
Eli Bendersky092af1f2012-03-04 07:14:03 +02002511 def test_Element_subclass_constructor(self):
2512 class MyElement(ET.Element):
2513 def __init__(self, tag, attrib={}, **extra):
2514 super(MyElement, self).__init__(tag + '__', attrib, **extra)
2515
2516 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
2517 self.assertEqual(mye.tag, 'foo__')
2518 self.assertEqual(sorted(mye.items()),
2519 [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
2520
2521 def test_Element_subclass_new_method(self):
2522 class MyElement(ET.Element):
2523 def newmethod(self):
2524 return self.tag
2525
2526 mye = MyElement('joe')
2527 self.assertEqual(mye.newmethod(), 'joe')
Eli Benderskyda578192012-02-16 06:52:39 +02002528
Serhiy Storchakab11c5662018-10-14 10:32:19 +03002529 def test_Element_subclass_find(self):
2530 class MyElement(ET.Element):
2531 pass
2532
2533 e = ET.Element('foo')
2534 e.text = 'text'
2535 sub = MyElement('bar')
2536 sub.text = 'subtext'
2537 e.append(sub)
2538 self.assertEqual(e.findtext('bar'), 'subtext')
2539 self.assertEqual(e.find('bar').tag, 'bar')
2540 found = list(e.findall('bar'))
2541 self.assertEqual(len(found), 1, found)
2542 self.assertEqual(found[0].tag, 'bar')
2543
Eli Benderskyceab1a92013-01-12 07:42:46 -08002544
2545class ElementFindTest(unittest.TestCase):
2546 def test_find_simple(self):
2547 e = ET.XML(SAMPLE_XML)
2548 self.assertEqual(e.find('tag').tag, 'tag')
2549 self.assertEqual(e.find('section/tag').tag, 'tag')
2550 self.assertEqual(e.find('./tag').tag, 'tag')
2551
2552 e[2] = ET.XML(SAMPLE_SECTION)
2553 self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
2554
2555 self.assertEqual(e.findtext('./tag'), 'text')
2556 self.assertEqual(e.findtext('section/tag'), 'subtext')
2557
2558 # section/nexttag is found but has no text
2559 self.assertEqual(e.findtext('section/nexttag'), '')
2560 self.assertEqual(e.findtext('section/nexttag', 'default'), '')
2561
2562 # tog doesn't exist and 'default' kicks in
2563 self.assertIsNone(e.findtext('tog'))
2564 self.assertEqual(e.findtext('tog', 'default'), 'default')
2565
Eli Bendersky25771b32013-01-13 05:26:07 -08002566 # Issue #16922
2567 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
2568
Eli Benderskya80f7612013-01-22 06:12:54 -08002569 def test_find_xpath(self):
2570 LINEAR_XML = '''
2571 <body>
2572 <tag class='a'/>
2573 <tag class='b'/>
2574 <tag class='c'/>
2575 <tag class='d'/>
2576 </body>'''
2577 e = ET.XML(LINEAR_XML)
2578
2579 # Test for numeric indexing and last()
2580 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
2581 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
2582 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
2583 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
2584 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
2585
Eli Bendersky5c6198b2013-01-24 06:29:26 -08002586 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
2587 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
2588 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
2589 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
2590
Eli Benderskyceab1a92013-01-12 07:42:46 -08002591 def test_findall(self):
2592 e = ET.XML(SAMPLE_XML)
2593 e[2] = ET.XML(SAMPLE_SECTION)
2594 self.assertEqual(summarize_list(e.findall('.')), ['body'])
2595 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
2596 self.assertEqual(summarize_list(e.findall('tog')), [])
2597 self.assertEqual(summarize_list(e.findall('tog/foo')), [])
2598 self.assertEqual(summarize_list(e.findall('*')),
2599 ['tag', 'tag', 'section'])
2600 self.assertEqual(summarize_list(e.findall('.//tag')),
2601 ['tag'] * 4)
2602 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
2603 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
2604 self.assertEqual(summarize_list(e.findall('section/*')),
2605 ['tag', 'nexttag', 'nextsection'])
2606 self.assertEqual(summarize_list(e.findall('section//*')),
2607 ['tag', 'nexttag', 'nextsection', 'tag'])
2608 self.assertEqual(summarize_list(e.findall('section/.//*')),
2609 ['tag', 'nexttag', 'nextsection', 'tag'])
2610 self.assertEqual(summarize_list(e.findall('*/*')),
2611 ['tag', 'nexttag', 'nextsection'])
2612 self.assertEqual(summarize_list(e.findall('*//*')),
2613 ['tag', 'nexttag', 'nextsection', 'tag'])
2614 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
2615 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
2616 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
2617 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
2618
2619 self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
2620 ['tag'] * 3)
2621 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
2622 ['tag'])
2623 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
2624 ['tag'] * 2)
2625 self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
2626 ['tag'])
2627 self.assertEqual(summarize_list(e.findall('.//section[tag]')),
2628 ['section'])
2629 self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
2630 self.assertEqual(summarize_list(e.findall('../tag')), [])
2631 self.assertEqual(summarize_list(e.findall('section/../tag')),
2632 ['tag'] * 2)
2633 self.assertEqual(e.findall('section//'), e.findall('section//*'))
2634
scoder101a5e82017-09-30 15:35:21 +02002635 self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
2636 ['section'])
2637 self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
2638 ['section'])
2639 self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
2640 ['section'])
2641 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2642 ['section'])
2643 self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
2644 ['section'])
2645
2646 self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
2647 ['tag'])
2648 self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
2649 ['tag'])
2650 self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
2651 ['tag'])
2652 self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
2653 ['tag'])
2654 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2655 ['tag'])
2656 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
2657 [])
2658 self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
2659 [])
2660
2661 # duplicate section => 2x tag matches
2662 e[1] = e[2]
2663 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2664 ['section', 'section'])
2665 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2666 ['tag', 'tag'])
2667
Eli Benderskyceab1a92013-01-12 07:42:46 -08002668 def test_test_find_with_ns(self):
2669 e = ET.XML(SAMPLE_XML_NS)
2670 self.assertEqual(summarize_list(e.findall('tag')), [])
2671 self.assertEqual(
2672 summarize_list(e.findall("{http://effbot.org/ns}tag")),
2673 ['{http://effbot.org/ns}tag'] * 2)
2674 self.assertEqual(
2675 summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
2676 ['{http://effbot.org/ns}tag'] * 3)
2677
Eli Bendersky2acc5252013-08-03 17:47:47 -07002678 def test_findall_different_nsmaps(self):
2679 root = ET.XML('''
2680 <a xmlns:x="X" xmlns:y="Y">
2681 <x:b><c/></x:b>
2682 <b/>
2683 <c><x:b/><b/></c><y:b/>
2684 </a>''')
2685 nsmap = {'xx': 'X'}
2686 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2687 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2688 nsmap = {'xx': 'Y'}
2689 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2690 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
Stefan Behnele8113f52019-04-18 19:05:03 +02002691 nsmap = {'xx': 'X', '': 'Y'}
Stefan Behnele9927e12019-04-14 10:09:09 +02002692 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2693 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
Eli Bendersky2acc5252013-08-03 17:47:47 -07002694
Stefan Behnel47541682019-05-03 20:58:16 +02002695 def test_findall_wildcard(self):
2696 root = ET.XML('''
2697 <a xmlns:x="X" xmlns:y="Y">
2698 <x:b><c/></x:b>
2699 <b/>
2700 <c><x:b/><b/></c><y:b/>
2701 </a>''')
2702 root.append(ET.Comment('test'))
2703
2704 self.assertEqual(summarize_list(root.findall("{*}b")),
2705 ['{X}b', 'b', '{Y}b'])
2706 self.assertEqual(summarize_list(root.findall("{*}c")),
2707 ['c'])
2708 self.assertEqual(summarize_list(root.findall("{X}*")),
2709 ['{X}b'])
2710 self.assertEqual(summarize_list(root.findall("{Y}*")),
2711 ['{Y}b'])
2712 self.assertEqual(summarize_list(root.findall("{}*")),
2713 ['b', 'c'])
2714 self.assertEqual(summarize_list(root.findall("{}b")), # only for consistency
2715 ['b'])
2716 self.assertEqual(summarize_list(root.findall("{}b")),
2717 summarize_list(root.findall("b")))
2718 self.assertEqual(summarize_list(root.findall("{*}*")),
2719 ['{X}b', 'b', 'c', '{Y}b'])
2720 # This is an unfortunate difference, but that's how find('*') works.
2721 self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]),
2722 summarize_list(root.findall("*")))
2723
2724 self.assertEqual(summarize_list(root.findall(".//{*}b")),
2725 ['{X}b', 'b', '{X}b', 'b', '{Y}b'])
2726 self.assertEqual(summarize_list(root.findall(".//{*}c")),
2727 ['c', 'c'])
2728 self.assertEqual(summarize_list(root.findall(".//{X}*")),
2729 ['{X}b', '{X}b'])
2730 self.assertEqual(summarize_list(root.findall(".//{Y}*")),
2731 ['{Y}b'])
2732 self.assertEqual(summarize_list(root.findall(".//{}*")),
2733 ['c', 'b', 'c', 'b'])
2734 self.assertEqual(summarize_list(root.findall(".//{}b")), # only for consistency
2735 ['b', 'b'])
2736 self.assertEqual(summarize_list(root.findall(".//{}b")),
2737 summarize_list(root.findall(".//b")))
2738
Eli Benderskyceab1a92013-01-12 07:42:46 -08002739 def test_bad_find(self):
2740 e = ET.XML(SAMPLE_XML)
2741 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
2742 e.findall('/tag')
Eli Benderskyc31f7732013-01-12 07:44:32 -08002743
Eli Benderskyceab1a92013-01-12 07:42:46 -08002744 def test_find_through_ElementTree(self):
2745 e = ET.XML(SAMPLE_XML)
2746 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
2747 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
2748 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
2749 ['tag'] * 2)
2750 # this produces a warning
Serhiy Storchaka762ec972017-03-30 18:12:06 +03002751 msg = ("This search is broken in 1.3 and earlier, and will be fixed "
2752 "in a future version. If you rely on the current behaviour, "
2753 "change it to '.+'")
2754 with self.assertWarnsRegex(FutureWarning, msg):
2755 it = ET.ElementTree(e).findall('//tag')
2756 self.assertEqual(summarize_list(it), ['tag'] * 3)
Eli Benderskyc31f7732013-01-12 07:44:32 -08002757
Eli Benderskyceab1a92013-01-12 07:42:46 -08002758
Eli Bendersky64d11e62012-06-15 07:42:50 +03002759class ElementIterTest(unittest.TestCase):
2760 def _ilist(self, elem, tag=None):
2761 return summarize_list(elem.iter(tag))
2762
2763 def test_basic(self):
2764 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
2765 self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
2766 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
2767 self.assertEqual(next(doc.iter()).tag, 'html')
2768 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
2769 self.assertEqual(''.join(doc.find('body').itertext()),
2770 'this is a paragraph.')
2771 self.assertEqual(next(doc.itertext()), 'this is a ')
2772
2773 # iterparse should return an iterator
2774 sourcefile = serialize(doc, to_string=False)
2775 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
2776
Mike53f7a7c2017-12-14 14:04:53 +03002777 # With an explicit parser too (issue #9708)
Eli Benderskyaaa97802013-01-24 07:15:19 -08002778 sourcefile = serialize(doc, to_string=False)
2779 parser = ET.XMLParser(target=ET.TreeBuilder())
2780 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
2781 'end')
2782
Eli Bendersky64d11e62012-06-15 07:42:50 +03002783 tree = ET.ElementTree(None)
2784 self.assertRaises(AttributeError, tree.iter)
2785
Eli Benderskye6174ca2013-01-10 06:27:53 -08002786 # Issue #16913
2787 doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
2788 self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
2789
Eli Bendersky64d11e62012-06-15 07:42:50 +03002790 def test_corners(self):
2791 # single root, no subelements
2792 a = ET.Element('a')
2793 self.assertEqual(self._ilist(a), ['a'])
2794
2795 # one child
2796 b = ET.SubElement(a, 'b')
2797 self.assertEqual(self._ilist(a), ['a', 'b'])
2798
2799 # one child and one grandchild
2800 c = ET.SubElement(b, 'c')
2801 self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
2802
2803 # two children, only first with grandchild
2804 d = ET.SubElement(a, 'd')
2805 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
2806
2807 # replace first child by second
2808 a[0] = a[1]
2809 del a[1]
2810 self.assertEqual(self._ilist(a), ['a', 'd'])
2811
2812 def test_iter_by_tag(self):
2813 doc = ET.XML('''
2814 <document>
2815 <house>
2816 <room>bedroom1</room>
2817 <room>bedroom2</room>
2818 </house>
2819 <shed>nothing here
2820 </shed>
2821 <house>
2822 <room>bedroom8</room>
2823 </house>
2824 </document>''')
2825
2826 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
2827 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
2828
Eli Benderskya8736902013-01-05 06:26:39 -08002829 # test that iter also accepts 'tag' as a keyword arg
2830 self.assertEqual(
2831 summarize_list(doc.iter(tag='room')),
2832 ['room'] * 3)
2833
Eli Bendersky64d11e62012-06-15 07:42:50 +03002834 # make sure both tag=None and tag='*' return all tags
2835 all_tags = ['document', 'house', 'room', 'room',
2836 'shed', 'house', 'room']
Serhiy Storchaka036fb152016-10-25 10:37:01 +03002837 self.assertEqual(summarize_list(doc.iter()), all_tags)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002838 self.assertEqual(self._ilist(doc), all_tags)
2839 self.assertEqual(self._ilist(doc, '*'), all_tags)
2840
Serhiy Storchaka762ec972017-03-30 18:12:06 +03002841 # Element.getiterator() is deprecated.
2842 @checkwarnings(("This method will be removed in future versions. "
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03002843 "Use .+ instead.", DeprecationWarning))
Serhiy Storchaka036fb152016-10-25 10:37:01 +03002844 def test_getiterator(self):
2845 doc = ET.XML('''
2846 <document>
2847 <house>
2848 <room>bedroom1</room>
2849 <room>bedroom2</room>
2850 </house>
2851 <shed>nothing here
2852 </shed>
2853 <house>
2854 <room>bedroom8</room>
2855 </house>
2856 </document>''')
2857
2858 self.assertEqual(summarize_list(doc.getiterator('room')),
2859 ['room'] * 3)
2860 self.assertEqual(summarize_list(doc.getiterator('house')),
2861 ['house'] * 2)
2862
2863 # test that getiterator also accepts 'tag' as a keyword arg
2864 self.assertEqual(
2865 summarize_list(doc.getiterator(tag='room')),
2866 ['room'] * 3)
2867
2868 # make sure both tag=None and tag='*' return all tags
2869 all_tags = ['document', 'house', 'room', 'room',
2870 'shed', 'house', 'room']
2871 self.assertEqual(summarize_list(doc.getiterator()), all_tags)
2872 self.assertEqual(summarize_list(doc.getiterator(None)), all_tags)
2873 self.assertEqual(summarize_list(doc.getiterator('*')), all_tags)
2874
Serhiy Storchakad7a44152015-11-12 11:23:04 +02002875 def test_copy(self):
2876 a = ET.Element('a')
2877 it = a.iter()
2878 with self.assertRaises(TypeError):
2879 copy.copy(it)
2880
2881 def test_pickle(self):
2882 a = ET.Element('a')
2883 it = a.iter()
2884 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2885 with self.assertRaises((TypeError, pickle.PicklingError)):
2886 pickle.dumps(it, proto)
2887
Eli Bendersky64d11e62012-06-15 07:42:50 +03002888
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002889class TreeBuilderTest(unittest.TestCase):
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002890 sample1 = ('<!DOCTYPE html PUBLIC'
2891 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2892 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
Antoine Pitrouee329312012-10-04 19:53:29 +02002893 '<html>text<div>subtext</div>tail</html>')
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002894
Eli Bendersky48d358b2012-05-30 17:57:50 +03002895 sample2 = '''<toplevel>sometext</toplevel>'''
2896
Antoine Pitrouee329312012-10-04 19:53:29 +02002897 def _check_sample1_element(self, e):
2898 self.assertEqual(e.tag, 'html')
2899 self.assertEqual(e.text, 'text')
2900 self.assertEqual(e.tail, None)
2901 self.assertEqual(e.attrib, {})
2902 children = list(e)
2903 self.assertEqual(len(children), 1)
2904 child = children[0]
2905 self.assertEqual(child.tag, 'div')
2906 self.assertEqual(child.text, 'subtext')
2907 self.assertEqual(child.tail, 'tail')
2908 self.assertEqual(child.attrib, {})
2909
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002910 def test_dummy_builder(self):
2911 class BaseDummyBuilder:
2912 def close(self):
2913 return 42
2914
2915 class DummyBuilder(BaseDummyBuilder):
2916 data = start = end = lambda *a: None
2917
2918 parser = ET.XMLParser(target=DummyBuilder())
2919 parser.feed(self.sample1)
2920 self.assertEqual(parser.close(), 42)
2921
2922 parser = ET.XMLParser(target=BaseDummyBuilder())
2923 parser.feed(self.sample1)
2924 self.assertEqual(parser.close(), 42)
2925
2926 parser = ET.XMLParser(target=object())
2927 parser.feed(self.sample1)
2928 self.assertIsNone(parser.close())
2929
Stefan Behnel43851a22019-05-01 21:20:38 +02002930 def test_treebuilder_comment(self):
2931 b = ET.TreeBuilder()
2932 self.assertEqual(b.comment('ctext').tag, ET.Comment)
2933 self.assertEqual(b.comment('ctext').text, 'ctext')
2934
2935 b = ET.TreeBuilder(comment_factory=ET.Comment)
2936 self.assertEqual(b.comment('ctext').tag, ET.Comment)
2937 self.assertEqual(b.comment('ctext').text, 'ctext')
2938
2939 b = ET.TreeBuilder(comment_factory=len)
2940 self.assertEqual(b.comment('ctext'), len('ctext'))
2941
2942 def test_treebuilder_pi(self):
2943 b = ET.TreeBuilder()
2944 self.assertEqual(b.pi('target', None).tag, ET.PI)
2945 self.assertEqual(b.pi('target', None).text, 'target')
2946
2947 b = ET.TreeBuilder(pi_factory=ET.PI)
2948 self.assertEqual(b.pi('target').tag, ET.PI)
2949 self.assertEqual(b.pi('target').text, "target")
2950 self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI)
2951 self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ")
2952
2953 b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text))
2954 self.assertEqual(b.pi('target'), (len('target'), None))
2955 self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
2956
Stefan Behnelbb697892019-07-24 20:46:01 +02002957 def test_late_tail(self):
2958 # Issue #37399: The tail of an ignored comment could overwrite the text before it.
2959 class TreeBuilderSubclass(ET.TreeBuilder):
2960 pass
2961
2962 xml = "<a>text<!-- comment -->tail</a>"
2963 a = ET.fromstring(xml)
2964 self.assertEqual(a.text, "texttail")
2965
2966 parser = ET.XMLParser(target=TreeBuilderSubclass())
2967 parser.feed(xml)
2968 a = parser.close()
2969 self.assertEqual(a.text, "texttail")
2970
2971 xml = "<a>text<?pi data?>tail</a>"
2972 a = ET.fromstring(xml)
2973 self.assertEqual(a.text, "texttail")
2974
2975 xml = "<a>text<?pi data?>tail</a>"
2976 parser = ET.XMLParser(target=TreeBuilderSubclass())
2977 parser.feed(xml)
2978 a = parser.close()
2979 self.assertEqual(a.text, "texttail")
2980
2981 def test_late_tail_mix_pi_comments(self):
2982 # Issue #37399: The tail of an ignored comment could overwrite the text before it.
2983 # Test appending tails to comments/pis.
2984 class TreeBuilderSubclass(ET.TreeBuilder):
2985 pass
2986
2987 xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
2988 parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
2989 parser.feed(xml)
2990 a = parser.close()
2991 self.assertEqual(a[0].text, ' comment ')
2992 self.assertEqual(a[0].tail, '\ntail')
2993 self.assertEqual(a.text, "text ")
2994
2995 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
2996 parser.feed(xml)
2997 a = parser.close()
2998 self.assertEqual(a[0].text, ' comment ')
2999 self.assertEqual(a[0].tail, '\ntail')
3000 self.assertEqual(a.text, "text ")
3001
3002 xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
3003 parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
3004 parser.feed(xml)
3005 a = parser.close()
3006 self.assertEqual(a[0].text, 'pi data')
3007 self.assertEqual(a[0].tail, 'tail')
3008 self.assertEqual(a.text, "text\n")
3009
3010 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
3011 parser.feed(xml)
3012 a = parser.close()
3013 self.assertEqual(a[0].text, 'pi data')
3014 self.assertEqual(a[0].tail, 'tail')
3015 self.assertEqual(a.text, "text\n")
3016
Eli Bendersky08231a92013-05-18 15:47:16 -07003017 def test_treebuilder_elementfactory_none(self):
3018 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
3019 parser.feed(self.sample1)
3020 e = parser.close()
3021 self._check_sample1_element(e)
3022
Eli Bendersky58d548d2012-05-29 15:45:16 +03003023 def test_subclass(self):
3024 class MyTreeBuilder(ET.TreeBuilder):
3025 def foobar(self, x):
3026 return x * 2
3027
3028 tb = MyTreeBuilder()
3029 self.assertEqual(tb.foobar(10), 20)
3030
3031 parser = ET.XMLParser(target=tb)
3032 parser.feed(self.sample1)
3033
3034 e = parser.close()
Antoine Pitrouee329312012-10-04 19:53:29 +02003035 self._check_sample1_element(e)
Eli Bendersky58d548d2012-05-29 15:45:16 +03003036
Stefan Behnel43851a22019-05-01 21:20:38 +02003037 def test_subclass_comment_pi(self):
3038 class MyTreeBuilder(ET.TreeBuilder):
3039 def foobar(self, x):
3040 return x * 2
3041
3042 tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI)
3043 self.assertEqual(tb.foobar(10), 20)
3044
3045 parser = ET.XMLParser(target=tb)
3046 parser.feed(self.sample1)
3047 parser.feed('<!-- a comment--><?and a pi?>')
3048
3049 e = parser.close()
3050 self._check_sample1_element(e)
3051
Eli Bendersky2b711402012-03-16 15:29:50 +02003052 def test_element_factory(self):
Eli Bendersky48d358b2012-05-30 17:57:50 +03003053 lst = []
3054 def myfactory(tag, attrib):
3055 nonlocal lst
3056 lst.append(tag)
3057 return ET.Element(tag, attrib)
3058
3059 tb = ET.TreeBuilder(element_factory=myfactory)
3060 parser = ET.XMLParser(target=tb)
3061 parser.feed(self.sample2)
3062 parser.close()
3063
3064 self.assertEqual(lst, ['toplevel'])
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003065
Antoine Pitrouee329312012-10-04 19:53:29 +02003066 def _check_element_factory_class(self, cls):
3067 tb = ET.TreeBuilder(element_factory=cls)
3068
3069 parser = ET.XMLParser(target=tb)
3070 parser.feed(self.sample1)
3071 e = parser.close()
3072 self.assertIsInstance(e, cls)
3073 self._check_sample1_element(e)
3074
3075 def test_element_factory_subclass(self):
3076 class MyElement(ET.Element):
3077 pass
3078 self._check_element_factory_class(MyElement)
3079
3080 def test_element_factory_pure_python_subclass(self):
3081 # Mimick SimpleTAL's behaviour (issue #16089): both versions of
3082 # TreeBuilder should be able to cope with a subclass of the
3083 # pure Python Element class.
Eli Bendersky46955b22013-05-19 09:20:50 -07003084 base = ET._Element_Py
Antoine Pitrouee329312012-10-04 19:53:29 +02003085 # Not from a C extension
3086 self.assertEqual(base.__module__, 'xml.etree.ElementTree')
3087 # Force some multiple inheritance with a C class to make things
3088 # more interesting.
3089 class MyElement(base, ValueError):
3090 pass
3091 self._check_element_factory_class(MyElement)
3092
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003093 def test_doctype(self):
3094 class DoctypeParser:
3095 _doctype = None
3096
3097 def doctype(self, name, pubid, system):
3098 self._doctype = (name, pubid, system)
3099
3100 def close(self):
3101 return self._doctype
3102
3103 parser = ET.XMLParser(target=DoctypeParser())
3104 parser.feed(self.sample1)
3105
3106 self.assertEqual(parser.close(),
3107 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3108 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3109
scoderc8d8e152017-09-14 22:00:03 +02003110 def test_builder_lookup_errors(self):
3111 class RaisingBuilder:
3112 def __init__(self, raise_in=None, what=ValueError):
3113 self.raise_in = raise_in
3114 self.what = what
3115
3116 def __getattr__(self, name):
3117 if name == self.raise_in:
3118 raise self.what(self.raise_in)
3119 def handle(*args):
3120 pass
3121 return handle
3122
3123 ET.XMLParser(target=RaisingBuilder())
3124 # cET also checks for 'close' and 'doctype', PyET does it only at need
3125 for event in ('start', 'data', 'end', 'comment', 'pi'):
3126 with self.assertRaisesRegex(ValueError, event):
3127 ET.XMLParser(target=RaisingBuilder(event))
3128
3129 ET.XMLParser(target=RaisingBuilder(what=AttributeError))
3130 for event in ('start', 'data', 'end', 'comment', 'pi'):
3131 parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
3132 parser.feed(self.sample1)
3133 self.assertIsNone(parser.close())
3134
Eli Bendersky175fada2012-06-15 08:37:08 +03003135
Eli Bendersky52467b12012-06-01 07:13:08 +03003136class XMLParserTest(unittest.TestCase):
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003137 sample1 = b'<file><line>22</line></file>'
3138 sample2 = (b'<!DOCTYPE html PUBLIC'
3139 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3140 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3141 b'<html>text</html>')
3142 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
3143 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
Eli Bendersky52467b12012-06-01 07:13:08 +03003144
3145 def _check_sample_element(self, e):
3146 self.assertEqual(e.tag, 'file')
3147 self.assertEqual(e[0].tag, 'line')
3148 self.assertEqual(e[0].text, '22')
3149
3150 def test_constructor_args(self):
Eli Bendersky23687042013-02-26 05:53:23 -08003151 parser2 = ET.XMLParser(encoding='utf-8',
Eli Bendersky23687042013-02-26 05:53:23 -08003152 target=ET.TreeBuilder())
Eli Bendersky52467b12012-06-01 07:13:08 +03003153 parser2.feed(self.sample1)
3154 self._check_sample_element(parser2.close())
3155
3156 def test_subclass(self):
3157 class MyParser(ET.XMLParser):
3158 pass
3159 parser = MyParser()
3160 parser.feed(self.sample1)
3161 self._check_sample_element(parser.close())
3162
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003163 def test_doctype_warning(self):
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003164 with warnings.catch_warnings():
3165 warnings.simplefilter('error', DeprecationWarning)
3166 parser = ET.XMLParser()
3167 parser.feed(self.sample2)
3168 parser.close()
3169
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003170 def test_subclass_doctype(self):
3171 _doctype = None
3172 class MyParserWithDoctype(ET.XMLParser):
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003173 def doctype(self, *args, **kwargs):
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003174 nonlocal _doctype
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003175 _doctype = (args, kwargs)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003176
3177 parser = MyParserWithDoctype()
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003178 with self.assertWarnsRegex(RuntimeWarning, 'doctype'):
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003179 parser.feed(self.sample2)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003180 parser.close()
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003181 self.assertIsNone(_doctype)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003182
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003183 _doctype = _doctype2 = None
3184 with warnings.catch_warnings():
3185 warnings.simplefilter('error', DeprecationWarning)
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003186 warnings.simplefilter('error', RuntimeWarning)
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003187 class DoctypeParser:
3188 def doctype(self, name, pubid, system):
3189 nonlocal _doctype2
3190 _doctype2 = (name, pubid, system)
3191
3192 parser = MyParserWithDoctype(target=DoctypeParser())
3193 parser.feed(self.sample2)
3194 parser.close()
3195 self.assertIsNone(_doctype)
3196 self.assertEqual(_doctype2,
3197 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3198 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3199
3200 def test_inherited_doctype(self):
3201 '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
3202 with warnings.catch_warnings():
3203 warnings.simplefilter('error', DeprecationWarning)
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003204 warnings.simplefilter('error', RuntimeWarning)
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003205 class MyParserWithoutDoctype(ET.XMLParser):
3206 pass
3207 parser = MyParserWithoutDoctype()
3208 parser.feed(self.sample2)
3209 parser.close()
3210
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003211 def test_parse_string(self):
3212 parser = ET.XMLParser(target=ET.TreeBuilder())
3213 parser.feed(self.sample3)
3214 e = parser.close()
3215 self.assertEqual(e.tag, 'money')
3216 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
3217 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
3218
Eli Bendersky52467b12012-06-01 07:13:08 +03003219
Eli Bendersky737b1732012-05-29 06:02:56 +03003220class NamespaceParseTest(unittest.TestCase):
3221 def test_find_with_namespace(self):
3222 nsmap = {'h': 'hello', 'f': 'foo'}
3223 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
3224
3225 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
3226 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
3227 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
3228
3229
Eli Bendersky865756a2012-03-09 13:38:15 +02003230class ElementSlicingTest(unittest.TestCase):
3231 def _elem_tags(self, elemlist):
3232 return [e.tag for e in elemlist]
3233
3234 def _subelem_tags(self, elem):
3235 return self._elem_tags(list(elem))
3236
3237 def _make_elem_with_children(self, numchildren):
3238 """Create an Element with a tag 'a', with the given amount of children
3239 named 'a0', 'a1' ... and so on.
3240
3241 """
3242 e = ET.Element('a')
3243 for i in range(numchildren):
3244 ET.SubElement(e, 'a%s' % i)
3245 return e
3246
3247 def test_getslice_single_index(self):
3248 e = self._make_elem_with_children(10)
3249
3250 self.assertEqual(e[1].tag, 'a1')
3251 self.assertEqual(e[-2].tag, 'a8')
3252
3253 self.assertRaises(IndexError, lambda: e[12])
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02003254 self.assertRaises(IndexError, lambda: e[-12])
Eli Bendersky865756a2012-03-09 13:38:15 +02003255
3256 def test_getslice_range(self):
3257 e = self._make_elem_with_children(6)
3258
3259 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
3260 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
3261 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
3262 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
3263 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
3264 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
3265
3266 def test_getslice_steps(self):
3267 e = self._make_elem_with_children(10)
3268
3269 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
3270 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
3271 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
3272 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02003273 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
3274 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
Eli Bendersky865756a2012-03-09 13:38:15 +02003275
3276 def test_getslice_negative_steps(self):
3277 e = self._make_elem_with_children(4)
3278
3279 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
3280 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02003281 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
3282 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
3283 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
Eli Bendersky865756a2012-03-09 13:38:15 +02003284
3285 def test_delslice(self):
3286 e = self._make_elem_with_children(4)
3287 del e[0:2]
3288 self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
3289
3290 e = self._make_elem_with_children(4)
3291 del e[0:]
3292 self.assertEqual(self._subelem_tags(e), [])
3293
3294 e = self._make_elem_with_children(4)
3295 del e[::-1]
3296 self.assertEqual(self._subelem_tags(e), [])
3297
3298 e = self._make_elem_with_children(4)
3299 del e[::-2]
3300 self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3301
3302 e = self._make_elem_with_children(4)
3303 del e[1::2]
3304 self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3305
3306 e = self._make_elem_with_children(2)
3307 del e[::2]
3308 self.assertEqual(self._subelem_tags(e), ['a1'])
3309
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02003310 def test_setslice_single_index(self):
3311 e = self._make_elem_with_children(4)
3312 e[1] = ET.Element('b')
3313 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3314
3315 e[-2] = ET.Element('c')
3316 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3317
3318 with self.assertRaises(IndexError):
3319 e[5] = ET.Element('d')
3320 with self.assertRaises(IndexError):
3321 e[-5] = ET.Element('d')
3322 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3323
3324 def test_setslice_range(self):
3325 e = self._make_elem_with_children(4)
3326 e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
3327 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
3328
3329 e = self._make_elem_with_children(4)
3330 e[1:3] = [ET.Element('b')]
3331 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
3332
3333 e = self._make_elem_with_children(4)
3334 e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
3335 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
3336
3337 def test_setslice_steps(self):
3338 e = self._make_elem_with_children(6)
3339 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
3340 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
3341
3342 e = self._make_elem_with_children(6)
3343 with self.assertRaises(ValueError):
3344 e[1:5:2] = [ET.Element('b')]
3345 with self.assertRaises(ValueError):
3346 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
3347 with self.assertRaises(ValueError):
3348 e[1:5:2] = []
3349 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
3350
3351 e = self._make_elem_with_children(4)
3352 e[1::sys.maxsize] = [ET.Element('b')]
3353 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3354 e[1::sys.maxsize<<64] = [ET.Element('c')]
3355 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3356
3357 def test_setslice_negative_steps(self):
3358 e = self._make_elem_with_children(4)
3359 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
3360 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
3361
3362 e = self._make_elem_with_children(4)
3363 with self.assertRaises(ValueError):
3364 e[2:0:-1] = [ET.Element('b')]
3365 with self.assertRaises(ValueError):
3366 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
3367 with self.assertRaises(ValueError):
3368 e[2:0:-1] = []
3369 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
3370
3371 e = self._make_elem_with_children(4)
3372 e[1::-sys.maxsize] = [ET.Element('b')]
3373 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3374 e[1::-sys.maxsize-1] = [ET.Element('c')]
3375 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3376 e[1::-sys.maxsize<<64] = [ET.Element('d')]
3377 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
3378
Eli Benderskyf996e772012-03-16 05:53:30 +02003379
Eli Bendersky00f402b2012-07-15 06:02:22 +03003380class IOTest(unittest.TestCase):
Eli Bendersky00f402b2012-07-15 06:02:22 +03003381 def test_encoding(self):
3382 # Test encoding issues.
3383 elem = ET.Element("tag")
3384 elem.text = "abc"
3385 self.assertEqual(serialize(elem), '<tag>abc</tag>')
Martin Panter89f76d32015-09-23 01:14:35 +00003386 for enc in ("utf-8", "us-ascii"):
3387 with self.subTest(enc):
3388 self.assertEqual(serialize(elem, encoding=enc),
3389 b'<tag>abc</tag>')
3390 self.assertEqual(serialize(elem, encoding=enc.upper()),
3391 b'<tag>abc</tag>')
Eli Bendersky00f402b2012-07-15 06:02:22 +03003392 for enc in ("iso-8859-1", "utf-16", "utf-32"):
Martin Panter89f76d32015-09-23 01:14:35 +00003393 with self.subTest(enc):
3394 self.assertEqual(serialize(elem, encoding=enc),
3395 ("<?xml version='1.0' encoding='%s'?>\n"
3396 "<tag>abc</tag>" % enc).encode(enc))
3397 upper = enc.upper()
3398 self.assertEqual(serialize(elem, encoding=upper),
3399 ("<?xml version='1.0' encoding='%s'?>\n"
3400 "<tag>abc</tag>" % upper).encode(enc))
Eli Bendersky00f402b2012-07-15 06:02:22 +03003401
3402 elem = ET.Element("tag")
3403 elem.text = "<&\"\'>"
3404 self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
3405 self.assertEqual(serialize(elem, encoding="utf-8"),
3406 b'<tag>&lt;&amp;"\'&gt;</tag>')
3407 self.assertEqual(serialize(elem, encoding="us-ascii"),
3408 b'<tag>&lt;&amp;"\'&gt;</tag>')
3409 for enc in ("iso-8859-1", "utf-16", "utf-32"):
3410 self.assertEqual(serialize(elem, encoding=enc),
3411 ("<?xml version='1.0' encoding='%s'?>\n"
3412 "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
3413
3414 elem = ET.Element("tag")
3415 elem.attrib["key"] = "<&\"\'>"
3416 self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
3417 self.assertEqual(serialize(elem, encoding="utf-8"),
3418 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3419 self.assertEqual(serialize(elem, encoding="us-ascii"),
3420 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3421 for enc in ("iso-8859-1", "utf-16", "utf-32"):
3422 self.assertEqual(serialize(elem, encoding=enc),
3423 ("<?xml version='1.0' encoding='%s'?>\n"
3424 "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
3425
3426 elem = ET.Element("tag")
3427 elem.text = '\xe5\xf6\xf6<>'
3428 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
3429 self.assertEqual(serialize(elem, encoding="utf-8"),
3430 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
3431 self.assertEqual(serialize(elem, encoding="us-ascii"),
3432 b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
3433 for enc in ("iso-8859-1", "utf-16", "utf-32"):
3434 self.assertEqual(serialize(elem, encoding=enc),
3435 ("<?xml version='1.0' encoding='%s'?>\n"
3436 "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
3437
3438 elem = ET.Element("tag")
3439 elem.attrib["key"] = '\xe5\xf6\xf6<>'
3440 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
3441 self.assertEqual(serialize(elem, encoding="utf-8"),
3442 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
3443 self.assertEqual(serialize(elem, encoding="us-ascii"),
3444 b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
3445 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
3446 self.assertEqual(serialize(elem, encoding=enc),
3447 ("<?xml version='1.0' encoding='%s'?>\n"
3448 "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
3449
3450 def test_write_to_filename(self):
Serhiy Storchaka13f51d92018-06-03 20:56:52 +03003451 self.addCleanup(support.unlink, TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03003452 tree = ET.ElementTree(ET.XML('''<site />'''))
3453 tree.write(TESTFN)
3454 with open(TESTFN, 'rb') as f:
3455 self.assertEqual(f.read(), b'''<site />''')
3456
3457 def test_write_to_text_file(self):
Serhiy Storchaka13f51d92018-06-03 20:56:52 +03003458 self.addCleanup(support.unlink, TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03003459 tree = ET.ElementTree(ET.XML('''<site />'''))
3460 with open(TESTFN, 'w', encoding='utf-8') as f:
3461 tree.write(f, encoding='unicode')
3462 self.assertFalse(f.closed)
3463 with open(TESTFN, 'rb') as f:
3464 self.assertEqual(f.read(), b'''<site />''')
3465
3466 def test_write_to_binary_file(self):
Serhiy Storchaka13f51d92018-06-03 20:56:52 +03003467 self.addCleanup(support.unlink, TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03003468 tree = ET.ElementTree(ET.XML('''<site />'''))
3469 with open(TESTFN, 'wb') as f:
3470 tree.write(f)
3471 self.assertFalse(f.closed)
3472 with open(TESTFN, 'rb') as f:
3473 self.assertEqual(f.read(), b'''<site />''')
3474
3475 def test_write_to_binary_file_with_bom(self):
Serhiy Storchaka13f51d92018-06-03 20:56:52 +03003476 self.addCleanup(support.unlink, TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03003477 tree = ET.ElementTree(ET.XML('''<site />'''))
3478 # test BOM writing to buffered file
3479 with open(TESTFN, 'wb') as f:
3480 tree.write(f, encoding='utf-16')
3481 self.assertFalse(f.closed)
3482 with open(TESTFN, 'rb') as f:
3483 self.assertEqual(f.read(),
3484 '''<?xml version='1.0' encoding='utf-16'?>\n'''
3485 '''<site />'''.encode("utf-16"))
3486 # test BOM writing to non-buffered file
3487 with open(TESTFN, 'wb', buffering=0) as f:
3488 tree.write(f, encoding='utf-16')
3489 self.assertFalse(f.closed)
3490 with open(TESTFN, 'rb') as f:
3491 self.assertEqual(f.read(),
3492 '''<?xml version='1.0' encoding='utf-16'?>\n'''
3493 '''<site />'''.encode("utf-16"))
3494
Eli Benderskyf996e772012-03-16 05:53:30 +02003495 def test_read_from_stringio(self):
3496 tree = ET.ElementTree()
Eli Bendersky00f402b2012-07-15 06:02:22 +03003497 stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
Eli Benderskyf996e772012-03-16 05:53:30 +02003498 tree.parse(stream)
Eli Benderskyf996e772012-03-16 05:53:30 +02003499 self.assertEqual(tree.getroot().tag, 'site')
3500
Eli Bendersky00f402b2012-07-15 06:02:22 +03003501 def test_write_to_stringio(self):
3502 tree = ET.ElementTree(ET.XML('''<site />'''))
3503 stream = io.StringIO()
3504 tree.write(stream, encoding='unicode')
3505 self.assertEqual(stream.getvalue(), '''<site />''')
3506
3507 def test_read_from_bytesio(self):
3508 tree = ET.ElementTree()
3509 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3510 tree.parse(raw)
3511 self.assertEqual(tree.getroot().tag, 'site')
3512
3513 def test_write_to_bytesio(self):
3514 tree = ET.ElementTree(ET.XML('''<site />'''))
3515 raw = io.BytesIO()
3516 tree.write(raw)
3517 self.assertEqual(raw.getvalue(), b'''<site />''')
3518
3519 class dummy:
3520 pass
3521
3522 def test_read_from_user_text_reader(self):
3523 stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3524 reader = self.dummy()
3525 reader.read = stream.read
3526 tree = ET.ElementTree()
3527 tree.parse(reader)
3528 self.assertEqual(tree.getroot().tag, 'site')
3529
3530 def test_write_to_user_text_writer(self):
3531 tree = ET.ElementTree(ET.XML('''<site />'''))
3532 stream = io.StringIO()
3533 writer = self.dummy()
3534 writer.write = stream.write
3535 tree.write(writer, encoding='unicode')
3536 self.assertEqual(stream.getvalue(), '''<site />''')
3537
3538 def test_read_from_user_binary_reader(self):
3539 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3540 reader = self.dummy()
3541 reader.read = raw.read
3542 tree = ET.ElementTree()
3543 tree.parse(reader)
3544 self.assertEqual(tree.getroot().tag, 'site')
3545 tree = ET.ElementTree()
3546
3547 def test_write_to_user_binary_writer(self):
3548 tree = ET.ElementTree(ET.XML('''<site />'''))
3549 raw = io.BytesIO()
3550 writer = self.dummy()
3551 writer.write = raw.write
3552 tree.write(writer)
3553 self.assertEqual(raw.getvalue(), b'''<site />''')
3554
3555 def test_write_to_user_binary_writer_with_bom(self):
3556 tree = ET.ElementTree(ET.XML('''<site />'''))
3557 raw = io.BytesIO()
3558 writer = self.dummy()
3559 writer.write = raw.write
3560 writer.seekable = lambda: True
3561 writer.tell = raw.tell
3562 tree.write(writer, encoding="utf-16")
3563 self.assertEqual(raw.getvalue(),
3564 '''<?xml version='1.0' encoding='utf-16'?>\n'''
3565 '''<site />'''.encode("utf-16"))
3566
Eli Bendersky426e2482012-07-17 05:45:11 +03003567 def test_tostringlist_invariant(self):
3568 root = ET.fromstring('<tag>foo</tag>')
3569 self.assertEqual(
3570 ET.tostring(root, 'unicode'),
3571 ''.join(ET.tostringlist(root, 'unicode')))
3572 self.assertEqual(
3573 ET.tostring(root, 'utf-16'),
3574 b''.join(ET.tostringlist(root, 'utf-16')))
3575
Eli Benderskya9a2ef52013-01-13 06:04:43 -08003576 def test_short_empty_elements(self):
3577 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
3578 self.assertEqual(
3579 ET.tostring(root, 'unicode'),
3580 '<tag>a<x />b<y />c</tag>')
3581 self.assertEqual(
3582 ET.tostring(root, 'unicode', short_empty_elements=True),
3583 '<tag>a<x />b<y />c</tag>')
3584 self.assertEqual(
3585 ET.tostring(root, 'unicode', short_empty_elements=False),
3586 '<tag>a<x></x>b<y></y>c</tag>')
3587
Eli Benderskyf996e772012-03-16 05:53:30 +02003588
Eli Bendersky5b77d812012-03-16 08:20:05 +02003589class ParseErrorTest(unittest.TestCase):
3590 def test_subclass(self):
3591 self.assertIsInstance(ET.ParseError(), SyntaxError)
3592
3593 def _get_error(self, s):
3594 try:
3595 ET.fromstring(s)
3596 except ET.ParseError as e:
3597 return e
3598
3599 def test_error_position(self):
3600 self.assertEqual(self._get_error('foo').position, (1, 0))
3601 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
3602 self.assertEqual(self._get_error('foobar<').position, (1, 6))
3603
3604 def test_error_code(self):
3605 import xml.parsers.expat.errors as ERRORS
3606 self.assertEqual(self._get_error('foo').code,
3607 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
3608
3609
Eli Bendersky737b1732012-05-29 06:02:56 +03003610class KeywordArgsTest(unittest.TestCase):
3611 # Test various issues with keyword arguments passed to ET.Element
3612 # constructor and methods
3613 def test_issue14818(self):
3614 x = ET.XML("<a>foo</a>")
3615 self.assertEqual(x.find('a', None),
3616 x.find(path='a', namespaces=None))
3617 self.assertEqual(x.findtext('a', None, None),
3618 x.findtext(path='a', default=None, namespaces=None))
3619 self.assertEqual(x.findall('a', None),
3620 x.findall(path='a', namespaces=None))
3621 self.assertEqual(list(x.iterfind('a', None)),
3622 list(x.iterfind(path='a', namespaces=None)))
3623
3624 self.assertEqual(ET.Element('a').attrib, {})
3625 elements = [
3626 ET.Element('a', dict(href="#", id="foo")),
3627 ET.Element('a', attrib=dict(href="#", id="foo")),
3628 ET.Element('a', dict(href="#"), id="foo"),
3629 ET.Element('a', href="#", id="foo"),
3630 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
3631 ]
3632 for e in elements:
3633 self.assertEqual(e.tag, 'a')
3634 self.assertEqual(e.attrib, dict(href="#", id="foo"))
3635
3636 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
3637 self.assertEqual(e2.attrib['key1'], 'value1')
3638
3639 with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3640 ET.Element('a', "I'm not a dict")
3641 with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3642 ET.Element('a', attrib="I'm not a dict")
3643
Eli Bendersky64d11e62012-06-15 07:42:50 +03003644# --------------------------------------------------------------------
3645
Eli Bendersky64d11e62012-06-15 07:42:50 +03003646class NoAcceleratorTest(unittest.TestCase):
Eli Bendersky52280c42012-12-30 06:27:56 -08003647 def setUp(self):
3648 if not pyET:
Eli Bendersky698bdb22013-01-10 06:01:06 -08003649 raise unittest.SkipTest('only for the Python version')
Eli Bendersky52280c42012-12-30 06:27:56 -08003650
Eli Bendersky64d11e62012-06-15 07:42:50 +03003651 # Test that the C accelerator was not imported for pyET
3652 def test_correct_import_pyET(self):
Eli Benderskye26fa1b2013-05-19 17:49:54 -07003653 # The type of methods defined in Python code is types.FunctionType,
3654 # while the type of methods defined inside _elementtree is
3655 # <class 'wrapper_descriptor'>
3656 self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
3657 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
Eli Bendersky64d11e62012-06-15 07:42:50 +03003658
Stefan Behnele1d5dd62019-05-01 22:34:13 +02003659
3660# --------------------------------------------------------------------
3661
3662def c14n_roundtrip(xml, **options):
3663 return pyET.canonicalize(xml, **options)
3664
3665
3666class C14NTest(unittest.TestCase):
3667 maxDiff = None
3668
3669 #
3670 # simple roundtrip tests (from c14n.py)
3671
3672 def test_simple_roundtrip(self):
3673 # Basics
3674 self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
3675 self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
3676 '<doc xmlns="uri"></doc>')
3677 self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
3678 '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
3679 self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
3680 '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
3681 self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
3682 '<elem></elem>')
3683
3684 # C14N spec
3685 self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
3686 '<doc>Hello, world!</doc>')
3687 self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
3688 '<value>2</value>')
3689 self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
3690 '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
3691 self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
3692 '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
3693 self.assertEqual(c14n_roundtrip("<norm attr=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
3694 '<norm attr=" \' &#xD;&#xA;&#x9; \' "></norm>')
3695 self.assertEqual(c14n_roundtrip("<normNames attr=' A &#x20;&#13;&#xa;&#9; B '/>"),
3696 '<normNames attr=" A &#xD;&#xA;&#x9; B "></normNames>')
3697 self.assertEqual(c14n_roundtrip("<normId id=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
3698 '<normId id=" \' &#xD;&#xA;&#x9; \' "></normId>')
3699
3700 # fragments from PJ's tests
3701 #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
3702 #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
3703
3704 def test_c14n_exclusion(self):
3705 xml = textwrap.dedent("""\
3706 <root xmlns:x="http://example.com/x">
3707 <a x:attr="attrx">
3708 <b>abtext</b>
3709 </a>
3710 <b>btext</b>
3711 <c>
3712 <x:d>dtext</x:d>
3713 </c>
3714 </root>
3715 """)
3716 self.assertEqual(
3717 c14n_roundtrip(xml, strip_text=True),
3718 '<root>'
3719 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
3720 '<b>btext</b>'
3721 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3722 '</root>')
3723 self.assertEqual(
3724 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
3725 '<root>'
3726 '<a><b>abtext</b></a>'
3727 '<b>btext</b>'
3728 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3729 '</root>')
3730 self.assertEqual(
3731 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
3732 '<root>'
3733 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
3734 '<b>btext</b>'
3735 '<c></c>'
3736 '</root>')
3737 self.assertEqual(
3738 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
3739 exclude_tags=['{http://example.com/x}d']),
3740 '<root>'
3741 '<a><b>abtext</b></a>'
3742 '<b>btext</b>'
3743 '<c></c>'
3744 '</root>')
3745 self.assertEqual(
3746 c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
3747 '<root>'
3748 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3749 '</root>')
3750 self.assertEqual(
3751 c14n_roundtrip(xml, exclude_tags=['a', 'b']),
3752 '<root>\n'
3753 ' \n'
3754 ' \n'
3755 ' <c>\n'
3756 ' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
3757 ' </c>\n'
3758 '</root>')
3759 self.assertEqual(
3760 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
3761 '<root>'
3762 '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
3763 '<c></c>'
3764 '</root>')
3765 self.assertEqual(
3766 c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
3767 '<root>\n'
3768 ' <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
3769 ' \n'
3770 ' </a>\n'
3771 ' \n'
3772 ' <c>\n'
3773 ' \n'
3774 ' </c>\n'
3775 '</root>')
3776
3777 #
3778 # basic method=c14n tests from the c14n 2.0 specification. uses
3779 # test files under xmltestdata/c14n-20.
3780
3781 # note that this uses generated C14N versions of the standard ET.write
3782 # output, not roundtripped C14N (see above).
3783
3784 def test_xml_c14n2(self):
3785 datadir = findfile("c14n-20", subdir="xmltestdata")
3786 full_path = partial(os.path.join, datadir)
3787
3788 files = [filename[:-4] for filename in sorted(os.listdir(datadir))
3789 if filename.endswith('.xml')]
3790 input_files = [
3791 filename for filename in files
3792 if filename.startswith('in')
3793 ]
3794 configs = {
3795 filename: {
3796 # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
3797 option.tag.split('}')[-1]: ((option.text or '').strip(), option)
3798 for option in ET.parse(full_path(filename) + ".xml").getroot()
3799 }
3800 for filename in files
3801 if filename.startswith('c14n')
3802 }
3803
3804 tests = {
3805 input_file: [
3806 (filename, configs[filename.rsplit('_', 1)[-1]])
3807 for filename in files
3808 if filename.startswith(f'out_{input_file}_')
3809 and filename.rsplit('_', 1)[-1] in configs
3810 ]
3811 for input_file in input_files
3812 }
3813
3814 # Make sure we found all test cases.
3815 self.assertEqual(30, len([
3816 output_file for output_files in tests.values()
3817 for output_file in output_files]))
3818
3819 def get_option(config, option_name, default=None):
3820 return config.get(option_name, (default, ()))[0]
3821
3822 for input_file, output_files in tests.items():
3823 for output_file, config in output_files:
3824 keep_comments = get_option(
3825 config, 'IgnoreComments') == 'true' # no, it's right :)
3826 strip_text = get_option(
3827 config, 'TrimTextNodes') == 'true'
3828 rewrite_prefixes = get_option(
3829 config, 'PrefixRewrite') == 'sequential'
3830 if 'QNameAware' in config:
3831 qattrs = [
3832 f"{{{el.get('NS')}}}{el.get('Name')}"
3833 for el in config['QNameAware'][1].findall(
3834 '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
3835 ]
3836 qtags = [
3837 f"{{{el.get('NS')}}}{el.get('Name')}"
3838 for el in config['QNameAware'][1].findall(
3839 '{http://www.w3.org/2010/xml-c14n2}Element')
3840 ]
3841 else:
3842 qtags = qattrs = None
3843
3844 # Build subtest description from config.
3845 config_descr = ','.join(
3846 f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
3847 for name, (value, children) in sorted(config.items())
3848 )
3849
3850 with self.subTest(f"{output_file}({config_descr})"):
3851 if input_file == 'inNsRedecl' and not rewrite_prefixes:
3852 self.skipTest(
3853 f"Redeclared namespace handling is not supported in {output_file}")
3854 if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
3855 self.skipTest(
3856 f"Redeclared namespace handling is not supported in {output_file}")
3857 if 'QNameAware' in config and config['QNameAware'][1].find(
3858 '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
3859 self.skipTest(
3860 f"QName rewriting in XPath text is not supported in {output_file}")
3861
3862 f = full_path(input_file + ".xml")
3863 if input_file == 'inC14N5':
3864 # Hack: avoid setting up external entity resolution in the parser.
3865 with open(full_path('world.txt'), 'rb') as entity_file:
3866 with open(f, 'rb') as f:
3867 f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
3868
3869 text = ET.canonicalize(
3870 from_file=f,
3871 with_comments=keep_comments,
3872 strip_text=strip_text,
3873 rewrite_prefixes=rewrite_prefixes,
3874 qname_aware_tags=qtags, qname_aware_attrs=qattrs)
3875
3876 with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
3877 expected = f.read()
3878 if input_file == 'inC14N3':
3879 # FIXME: cET resolves default attributes but ET does not!
3880 expected = expected.replace(' attr="default"', '')
3881 text = text.replace(' attr="default"', '')
3882 self.assertEqual(expected, text)
3883
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003884# --------------------------------------------------------------------
3885
3886
Eli Bendersky64d11e62012-06-15 07:42:50 +03003887def test_main(module=None):
3888 # When invoked without a module, runs the Python ET tests by loading pyET.
3889 # Otherwise, uses the given module as the ET.
Eli Bendersky698bdb22013-01-10 06:01:06 -08003890 global pyET
3891 pyET = import_fresh_module('xml.etree.ElementTree',
3892 blocked=['_elementtree'])
Eli Bendersky64d11e62012-06-15 07:42:50 +03003893 if module is None:
Eli Bendersky64d11e62012-06-15 07:42:50 +03003894 module = pyET
Florent Xicluna41fe6152010-04-02 18:52:12 +00003895
Eli Bendersky64d11e62012-06-15 07:42:50 +03003896 global ET
3897 ET = module
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003898
Eli Bendersky865756a2012-03-09 13:38:15 +02003899 test_classes = [
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02003900 ModuleTest,
Eli Bendersky865756a2012-03-09 13:38:15 +02003901 ElementSlicingTest,
Eli Bendersky396e8fc2012-03-23 14:24:20 +02003902 BasicElementTest,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03003903 BadElementTest,
3904 BadElementPathTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02003905 ElementTreeTest,
Eli Bendersky00f402b2012-07-15 06:02:22 +03003906 IOTest,
Eli Bendersky5b77d812012-03-16 08:20:05 +02003907 ParseErrorTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02003908 XIncludeTest,
3909 ElementTreeTypeTest,
Eli Benderskyceab1a92013-01-12 07:42:46 -08003910 ElementFindTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03003911 ElementIterTest,
Eli Bendersky737b1732012-05-29 06:02:56 +03003912 TreeBuilderTest,
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003913 XMLParserTest,
Eli Benderskyb5869342013-08-30 05:51:20 -07003914 XMLPullParserTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02003915 BugsTest,
Stefan Behnele1d5dd62019-05-01 22:34:13 +02003916 KeywordArgsTest,
3917 C14NTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03003918 ]
3919
3920 # These tests will only run for the pure-Python version that doesn't import
3921 # _elementtree. We can't use skipUnless here, because pyET is filled in only
3922 # after the module is loaded.
Eli Bendersky698bdb22013-01-10 06:01:06 -08003923 if pyET is not ET:
Eli Bendersky64d11e62012-06-15 07:42:50 +03003924 test_classes.extend([
3925 NoAcceleratorTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03003926 ])
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003927
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003928 # Provide default namespace mapping and path cache.
3929 from xml.etree import ElementPath
3930 nsmap = ET.register_namespace._namespace_map
3931 # Copy the default namespace mapping
3932 nsmap_copy = nsmap.copy()
3933 # Copy the path cache (should be empty)
3934 path_cache = ElementPath._cache
3935 ElementPath._cache = path_cache.copy()
Stefan Behnel43851a22019-05-01 21:20:38 +02003936 # Align the Comment/PI factories.
3937 if hasattr(ET, '_set_factories'):
3938 old_factories = ET._set_factories(ET.Comment, ET.PI)
3939 else:
3940 old_factories = None
3941
Eli Bendersky6319e0f2012-06-16 06:47:44 +03003942 try:
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003943 support.run_unittest(*test_classes)
Eli Bendersky6319e0f2012-06-16 06:47:44 +03003944 finally:
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003945 from xml.etree import ElementPath
3946 # Restore mapping and path cache
3947 nsmap.clear()
3948 nsmap.update(nsmap_copy)
3949 ElementPath._cache = path_cache
Stefan Behnel43851a22019-05-01 21:20:38 +02003950 if old_factories is not None:
3951 ET._set_factories(*old_factories)
Eli Bendersky6319e0f2012-06-16 06:47:44 +03003952 # don't interfere with subsequent tests
3953 ET = pyET = None
3954
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003955
Armin Rigo9ed73062005-12-14 18:10:45 +00003956if __name__ == '__main__':
3957 test_main()