blob: cfc674be5d141dfde5b425e54cb6a59780ae0ff3 [file] [log] [blame]
Antoine Pitroud72402e2010-10-27 18:52:48 +00001# regression test for SAX 2.0
Lars Gustäbel96753b32000-09-24 12:24:24 +00002# $Id$
3
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004from xml.sax import make_parser, ContentHandler, \
5 SAXException, SAXReaderNotAvailable, SAXParseException
R David Murraya846f5a2013-03-18 00:18:12 -04006import unittest
Victor Stinneref9c0e72017-05-05 09:46:47 +02007from unittest import mock
Martin v. Löwis962c9e72000-10-06 17:41:52 +00008try:
9 make_parser()
Martin v. Löwis80670bc2000-10-06 21:13:23 +000010except SAXReaderNotAvailable:
Martin v. Löwis962c9e72000-10-06 17:41:52 +000011 # don't try to test this module if we cannot create a parser
R David Murraya846f5a2013-03-18 00:18:12 -040012 raise unittest.SkipTest("no XML parsers available")
Thomas Wouters0e3f5912006-08-11 14:57:12 +000013from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +030014 XMLFilterBase, prepare_input_source
Thomas Wouters0e3f5912006-08-11 14:57:12 +000015from xml.sax.expatreader import create_parser
Christian Heimes17b1d5d2018-09-23 09:50:25 +020016from xml.sax.handler import feature_namespaces, feature_external_ges
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
Serhiy Storchaka88efc522013-02-10 14:29:52 +020018from io import BytesIO, StringIO
Georg Brandlc502df42013-05-12 11:41:12 +020019import codecs
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +020020import os.path
Serhiy Storchakad5202392013-02-02 10:31:17 +020021import shutil
Hai Shi24bddc12020-05-28 22:24:39 +080022import sys
Christian Heimes17b1d5d2018-09-23 09:50:25 +020023from urllib.error import URLError
Victor Stinner7cb92042019-07-02 14:50:19 +020024import urllib.request
Hai Shibb0424b2020-08-04 00:47:42 +080025from test.support import os_helper
26from test.support import findfile, run_unittest
27from test.support.os_helper import FakePath, TESTFN
28
Florent Xiclunaf15351d2010-03-13 23:24:31 +000029
30TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
31TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
Victor Stinner6c6f8512010-08-07 10:09:35 +000032try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +000033 TEST_XMLFILE.encode("utf-8")
34 TEST_XMLFILE_OUT.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +000035except UnicodeEncodeError:
36 raise unittest.SkipTest("filename is not encodable to utf8")
Lars Gustäbel96753b32000-09-24 12:24:24 +000037
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +020038supports_nonascii_filenames = True
39if not os.path.supports_unicode_filenames:
40 try:
Hai Shibb0424b2020-08-04 00:47:42 +080041 os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding())
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +020042 except (UnicodeError, TypeError):
43 # Either the file system encoding is None, or the file name
44 # cannot be encoded in the file system encoding.
45 supports_nonascii_filenames = False
46requires_nonascii_filenames = unittest.skipUnless(
47 supports_nonascii_filenames,
48 'Requires non-ascii filenames support')
49
Guido van Rossumd8faa362007-04-27 19:54:29 +000050ns_uri = "http://www.python.org/xml-ns/saxtest/"
Lars Gustäbel96753b32000-09-24 12:24:24 +000051
Guido van Rossumd8faa362007-04-27 19:54:29 +000052class XmlTestBase(unittest.TestCase):
53 def verify_empty_attrs(self, attrs):
54 self.assertRaises(KeyError, attrs.getValue, "attr")
55 self.assertRaises(KeyError, attrs.getValueByQName, "attr")
56 self.assertRaises(KeyError, attrs.getNameByQName, "attr")
57 self.assertRaises(KeyError, attrs.getQNameByName, "attr")
58 self.assertRaises(KeyError, attrs.__getitem__, "attr")
Ezio Melottib3aedd42010-11-20 19:04:17 +000059 self.assertEqual(attrs.getLength(), 0)
60 self.assertEqual(attrs.getNames(), [])
61 self.assertEqual(attrs.getQNames(), [])
62 self.assertEqual(len(attrs), 0)
Ezio Melottib58e0bd2010-01-23 15:40:09 +000063 self.assertNotIn("attr", attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +000064 self.assertEqual(list(attrs.keys()), [])
65 self.assertEqual(attrs.get("attrs"), None)
66 self.assertEqual(attrs.get("attrs", 25), 25)
67 self.assertEqual(list(attrs.items()), [])
68 self.assertEqual(list(attrs.values()), [])
Lars Gustäbel96753b32000-09-24 12:24:24 +000069
Guido van Rossumd8faa362007-04-27 19:54:29 +000070 def verify_empty_nsattrs(self, attrs):
71 self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr"))
72 self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr")
73 self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr")
74 self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr"))
75 self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr"))
Ezio Melottib3aedd42010-11-20 19:04:17 +000076 self.assertEqual(attrs.getLength(), 0)
77 self.assertEqual(attrs.getNames(), [])
78 self.assertEqual(attrs.getQNames(), [])
79 self.assertEqual(len(attrs), 0)
Ezio Melottib58e0bd2010-01-23 15:40:09 +000080 self.assertNotIn((ns_uri, "attr"), attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +000081 self.assertEqual(list(attrs.keys()), [])
82 self.assertEqual(attrs.get((ns_uri, "attr")), None)
83 self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25)
84 self.assertEqual(list(attrs.items()), [])
85 self.assertEqual(list(attrs.values()), [])
Lars Gustäbel96753b32000-09-24 12:24:24 +000086
Guido van Rossumd8faa362007-04-27 19:54:29 +000087 def verify_attrs_wattr(self, attrs):
Ezio Melottib3aedd42010-11-20 19:04:17 +000088 self.assertEqual(attrs.getLength(), 1)
89 self.assertEqual(attrs.getNames(), ["attr"])
90 self.assertEqual(attrs.getQNames(), ["attr"])
91 self.assertEqual(len(attrs), 1)
Benjamin Peterson577473f2010-01-19 00:09:57 +000092 self.assertIn("attr", attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +000093 self.assertEqual(list(attrs.keys()), ["attr"])
94 self.assertEqual(attrs.get("attr"), "val")
95 self.assertEqual(attrs.get("attr", 25), "val")
96 self.assertEqual(list(attrs.items()), [("attr", "val")])
97 self.assertEqual(list(attrs.values()), ["val"])
98 self.assertEqual(attrs.getValue("attr"), "val")
99 self.assertEqual(attrs.getValueByQName("attr"), "val")
100 self.assertEqual(attrs.getNameByQName("attr"), "attr")
101 self.assertEqual(attrs["attr"], "val")
102 self.assertEqual(attrs.getQNameByName("attr"), "attr")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000103
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300104
105def xml_str(doc, encoding=None):
106 if encoding is None:
107 return doc
108 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
109
110def xml_bytes(doc, encoding, decl_encoding=...):
111 if decl_encoding is ...:
112 decl_encoding = encoding
113 return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
114
115def make_xml_file(doc, encoding, decl_encoding=...):
116 if decl_encoding is ...:
117 decl_encoding = encoding
118 with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
119 f.write(xml_str(doc, decl_encoding))
120
121
122class ParseTest(unittest.TestCase):
123 data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>'
124
125 def tearDown(self):
Hai Shibb0424b2020-08-04 00:47:42 +0800126 os_helper.unlink(TESTFN)
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300127
128 def check_parse(self, f):
129 from xml.sax import parse
130 result = StringIO()
131 parse(f, XMLGenerator(result, 'utf-8'))
132 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
133
134 def test_parse_text(self):
135 encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
136 'utf-16', 'utf-16le', 'utf-16be')
137 for encoding in encodings:
138 self.check_parse(StringIO(xml_str(self.data, encoding)))
139 make_xml_file(self.data, encoding)
140 with open(TESTFN, 'r', encoding=encoding) as f:
141 self.check_parse(f)
142 self.check_parse(StringIO(self.data))
143 make_xml_file(self.data, encoding, None)
144 with open(TESTFN, 'r', encoding=encoding) as f:
145 self.check_parse(f)
146
147 def test_parse_bytes(self):
148 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
149 # UTF-16 is autodetected
150 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
151 for encoding in encodings:
152 self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
153 make_xml_file(self.data, encoding)
154 self.check_parse(TESTFN)
155 with open(TESTFN, 'rb') as f:
156 self.check_parse(f)
157 self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
158 make_xml_file(self.data, encoding, None)
159 self.check_parse(TESTFN)
160 with open(TESTFN, 'rb') as f:
161 self.check_parse(f)
162 # accept UTF-8 with BOM
163 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
164 make_xml_file(self.data, 'utf-8-sig', 'utf-8')
165 self.check_parse(TESTFN)
166 with open(TESTFN, 'rb') as f:
167 self.check_parse(f)
168 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
169 make_xml_file(self.data, 'utf-8-sig', None)
170 self.check_parse(TESTFN)
171 with open(TESTFN, 'rb') as f:
172 self.check_parse(f)
173 # accept data with declared encoding
174 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
175 make_xml_file(self.data, 'iso-8859-1')
176 self.check_parse(TESTFN)
177 with open(TESTFN, 'rb') as f:
178 self.check_parse(f)
179 # fail on non-UTF-8 incompatible data without declared encoding
180 with self.assertRaises(SAXException):
181 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
182 make_xml_file(self.data, 'iso-8859-1', None)
Victor Stinneref9c0e72017-05-05 09:46:47 +0200183 with self.assertRaises(SAXException):
184 self.check_parse(TESTFN)
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300185 with open(TESTFN, 'rb') as f:
186 with self.assertRaises(SAXException):
187 self.check_parse(f)
188
Mickaël Schoentgen929b7042019-04-14 09:16:54 +0000189 def test_parse_path_object(self):
190 make_xml_file(self.data, 'utf-8', None)
191 self.check_parse(FakePath(TESTFN))
192
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300193 def test_parse_InputSource(self):
194 # accept data without declared but with explicitly specified encoding
195 make_xml_file(self.data, 'iso-8859-1', None)
196 with open(TESTFN, 'rb') as f:
197 input = InputSource()
198 input.setByteStream(f)
199 input.setEncoding('iso-8859-1')
200 self.check_parse(input)
201
Victor Stinneref9c0e72017-05-05 09:46:47 +0200202 def test_parse_close_source(self):
203 builtin_open = open
204 fileobj = None
205
206 def mock_open(*args):
207 nonlocal fileobj
208 fileobj = builtin_open(*args)
209 return fileobj
210
211 with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
212 make_xml_file(self.data, 'iso-8859-1', None)
213 with self.assertRaises(SAXException):
214 self.check_parse(TESTFN)
215 self.assertTrue(fileobj.closed)
216
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300217 def check_parseString(self, s):
218 from xml.sax import parseString
219 result = StringIO()
220 parseString(s, XMLGenerator(result, 'utf-8'))
221 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
222
Serhiy Storchaka778db282015-04-04 10:12:26 +0300223 def test_parseString_text(self):
224 encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
225 'utf-16', 'utf-16le', 'utf-16be')
226 for encoding in encodings:
227 self.check_parseString(xml_str(self.data, encoding))
228 self.check_parseString(self.data)
229
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300230 def test_parseString_bytes(self):
231 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
232 # UTF-16 is autodetected
233 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
234 for encoding in encodings:
235 self.check_parseString(xml_bytes(self.data, encoding))
236 self.check_parseString(xml_bytes(self.data, encoding, None))
237 # accept UTF-8 with BOM
238 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
239 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
240 # accept data with declared encoding
241 self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
242 # fail on non-UTF-8 incompatible data without declared encoding
243 with self.assertRaises(SAXException):
244 self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
245
Guido van Rossumd8faa362007-04-27 19:54:29 +0000246class MakeParserTest(unittest.TestCase):
247 def test_make_parser2(self):
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000248 # Creating parsers several times in a row should succeed.
249 # Testing this because there have been failures of this kind
250 # before.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000251 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000252 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000253 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000254 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000255 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000256 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000257 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000258 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000259 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000260 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000262 p = make_parser()
Tim Petersd2bf3b72001-01-18 02:22:22 +0000263
Andrés Delfinoa6dc5312018-10-26 11:56:57 -0300264 def test_make_parser3(self):
265 # Testing that make_parser can handle different types of
266 # iterables.
267 make_parser(['module'])
268 make_parser(('module', ))
269 make_parser({'module'})
270 make_parser(frozenset({'module'}))
271 make_parser({'module': None})
272 make_parser(iter(['module']))
273
274 def test_make_parser4(self):
275 # Testing that make_parser can handle empty iterables.
276 make_parser([])
277 make_parser(tuple())
278 make_parser(set())
279 make_parser(frozenset())
280 make_parser({})
281 make_parser(iter([]))
282
283 def test_make_parser5(self):
284 # Testing that make_parser can handle iterables with more than
285 # one item.
286 make_parser(['module1', 'module2'])
287 make_parser(('module1', 'module2'))
288 make_parser({'module1', 'module2'})
289 make_parser(frozenset({'module1', 'module2'}))
290 make_parser({'module1': None, 'module2': None})
291 make_parser(iter(['module1', 'module2']))
Tim Petersd2bf3b72001-01-18 02:22:22 +0000292
Lars Gustäbel96753b32000-09-24 12:24:24 +0000293# ===========================================================================
294#
295# saxutils tests
296#
297# ===========================================================================
298
Guido van Rossumd8faa362007-04-27 19:54:29 +0000299class SaxutilsTest(unittest.TestCase):
300 # ===== escape
301 def test_escape_basic(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000302 self.assertEqual(escape("Donald Duck & Co"), "Donald Duck &amp; Co")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000303
Guido van Rossumd8faa362007-04-27 19:54:29 +0000304 def test_escape_all(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000305 self.assertEqual(escape("<Donald Duck & Co>"),
306 "&lt;Donald Duck &amp; Co&gt;")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000307
Guido van Rossumd8faa362007-04-27 19:54:29 +0000308 def test_escape_extra(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000309 self.assertEqual(escape("Hei på deg", {"å" : "&aring;"}),
310 "Hei p&aring; deg")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000311
Guido van Rossumd8faa362007-04-27 19:54:29 +0000312 # ===== unescape
313 def test_unescape_basic(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000314 self.assertEqual(unescape("Donald Duck &amp; Co"), "Donald Duck & Co")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000315
Guido van Rossumd8faa362007-04-27 19:54:29 +0000316 def test_unescape_all(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000317 self.assertEqual(unescape("&lt;Donald Duck &amp; Co&gt;"),
318 "<Donald Duck & Co>")
Martin v. Löwis74b51ac2002-10-26 14:50:45 +0000319
Guido van Rossumd8faa362007-04-27 19:54:29 +0000320 def test_unescape_extra(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000321 self.assertEqual(unescape("Hei på deg", {"å" : "&aring;"}),
322 "Hei p&aring; deg")
Martin v. Löwis74b51ac2002-10-26 14:50:45 +0000323
Guido van Rossumd8faa362007-04-27 19:54:29 +0000324 def test_unescape_amp_extra(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000325 self.assertEqual(unescape("&amp;foo;", {"&foo;": "splat"}), "&foo;")
Martin v. Löwis74b51ac2002-10-26 14:50:45 +0000326
Guido van Rossumd8faa362007-04-27 19:54:29 +0000327 # ===== quoteattr
328 def test_quoteattr_basic(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000329 self.assertEqual(quoteattr("Donald Duck & Co"),
330 '"Donald Duck &amp; Co"')
Martin v. Löwis74b51ac2002-10-26 14:50:45 +0000331
Guido van Rossumd8faa362007-04-27 19:54:29 +0000332 def test_single_quoteattr(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000333 self.assertEqual(quoteattr('Includes "double" quotes'),
334 '\'Includes "double" quotes\'')
Fred Drake32f3add2002-10-28 17:58:48 +0000335
Guido van Rossumd8faa362007-04-27 19:54:29 +0000336 def test_double_quoteattr(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000337 self.assertEqual(quoteattr("Includes 'single' quotes"),
338 "\"Includes 'single' quotes\"")
Fred Drakeacd32d32001-07-19 16:10:15 +0000339
Guido van Rossumd8faa362007-04-27 19:54:29 +0000340 def test_single_double_quoteattr(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000341 self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
342 "\"Includes 'single' and &quot;double&quot; quotes\"")
Fred Drakeacd32d32001-07-19 16:10:15 +0000343
Guido van Rossumd8faa362007-04-27 19:54:29 +0000344 # ===== make_parser
345 def test_make_parser(self):
Martin v. Löwis962c9e72000-10-06 17:41:52 +0000346 # Creating a parser should succeed - it should fall back
347 # to the expatreader
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000348 p = make_parser(['xml.parsers.no_such_parser'])
Martin v. Löwis962c9e72000-10-06 17:41:52 +0000349
350
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300351class PrepareInputSourceTest(unittest.TestCase):
352
353 def setUp(self):
Hai Shibb0424b2020-08-04 00:47:42 +0800354 self.file = os_helper.TESTFN
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300355 with open(self.file, "w") as tmp:
356 tmp.write("This was read from a file.")
357
358 def tearDown(self):
Hai Shibb0424b2020-08-04 00:47:42 +0800359 os_helper.unlink(self.file)
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300360
361 def make_byte_stream(self):
362 return BytesIO(b"This is a byte stream.")
363
Serhiy Storchaka61de0872015-04-02 21:00:13 +0300364 def make_character_stream(self):
365 return StringIO("This is a character stream.")
366
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300367 def checkContent(self, stream, content):
368 self.assertIsNotNone(stream)
369 self.assertEqual(stream.read(), content)
370 stream.close()
371
372
Serhiy Storchaka61de0872015-04-02 21:00:13 +0300373 def test_character_stream(self):
374 # If the source is an InputSource with a character stream, use it.
375 src = InputSource(self.file)
376 src.setCharacterStream(self.make_character_stream())
377 prep = prepare_input_source(src)
378 self.assertIsNone(prep.getByteStream())
379 self.checkContent(prep.getCharacterStream(),
380 "This is a character stream.")
381
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300382 def test_byte_stream(self):
383 # If the source is an InputSource that does not have a character
384 # stream but does have a byte stream, use the byte stream.
385 src = InputSource(self.file)
386 src.setByteStream(self.make_byte_stream())
387 prep = prepare_input_source(src)
388 self.assertIsNone(prep.getCharacterStream())
389 self.checkContent(prep.getByteStream(),
390 b"This is a byte stream.")
391
392 def test_system_id(self):
393 # If the source is an InputSource that has neither a character
394 # stream nor a byte stream, open the system ID.
395 src = InputSource(self.file)
396 prep = prepare_input_source(src)
397 self.assertIsNone(prep.getCharacterStream())
398 self.checkContent(prep.getByteStream(),
399 b"This was read from a file.")
400
401 def test_string(self):
402 # If the source is a string, use it as a system ID and open it.
403 prep = prepare_input_source(self.file)
404 self.assertIsNone(prep.getCharacterStream())
405 self.checkContent(prep.getByteStream(),
406 b"This was read from a file.")
407
Mickaël Schoentgen929b7042019-04-14 09:16:54 +0000408 def test_path_objects(self):
409 # If the source is a Path object, use it as a system ID and open it.
410 prep = prepare_input_source(FakePath(self.file))
411 self.assertIsNone(prep.getCharacterStream())
412 self.checkContent(prep.getByteStream(),
413 b"This was read from a file.")
414
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300415 def test_binary_file(self):
416 # If the source is a binary file-like object, use it as a byte
417 # stream.
418 prep = prepare_input_source(self.make_byte_stream())
419 self.assertIsNone(prep.getCharacterStream())
420 self.checkContent(prep.getByteStream(),
421 b"This is a byte stream.")
422
Serhiy Storchaka61de0872015-04-02 21:00:13 +0300423 def test_text_file(self):
424 # If the source is a text file-like object, use it as a character
425 # stream.
426 prep = prepare_input_source(self.make_character_stream())
427 self.assertIsNone(prep.getByteStream())
428 self.checkContent(prep.getCharacterStream(),
429 "This is a character stream.")
430
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300431
Lars Gustäbel96753b32000-09-24 12:24:24 +0000432# ===== XMLGenerator
433
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200434class XmlgenTest:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000435 def test_xmlgen_basic(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200436 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000437 gen = XMLGenerator(result)
438 gen.startDocument()
439 gen.startElement("doc", {})
440 gen.endElement("doc")
441 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000442
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200443 self.assertEqual(result.getvalue(), self.xml("<doc></doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000444
R. David Murraya90032a2010-10-17 22:46:45 +0000445 def test_xmlgen_basic_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200446 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000447 gen = XMLGenerator(result, short_empty_elements=True)
448 gen.startDocument()
449 gen.startElement("doc", {})
450 gen.endElement("doc")
451 gen.endDocument()
452
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200453 self.assertEqual(result.getvalue(), self.xml("<doc/>"))
R. David Murraya90032a2010-10-17 22:46:45 +0000454
Guido van Rossumd8faa362007-04-27 19:54:29 +0000455 def test_xmlgen_content(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200456 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000457 gen = XMLGenerator(result)
Fred Drake004d5e62000-10-23 17:22:08 +0000458
Guido van Rossumd8faa362007-04-27 19:54:29 +0000459 gen.startDocument()
460 gen.startElement("doc", {})
461 gen.characters("huhei")
462 gen.endElement("doc")
463 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000464
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200465 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000466
R. David Murraya90032a2010-10-17 22:46:45 +0000467 def test_xmlgen_content_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200468 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000469 gen = XMLGenerator(result, short_empty_elements=True)
470
471 gen.startDocument()
472 gen.startElement("doc", {})
473 gen.characters("huhei")
474 gen.endElement("doc")
475 gen.endDocument()
476
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200477 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
R. David Murraya90032a2010-10-17 22:46:45 +0000478
Guido van Rossumd8faa362007-04-27 19:54:29 +0000479 def test_xmlgen_pi(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200480 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000481 gen = XMLGenerator(result)
Fred Drake004d5e62000-10-23 17:22:08 +0000482
Guido van Rossumd8faa362007-04-27 19:54:29 +0000483 gen.startDocument()
484 gen.processingInstruction("test", "data")
485 gen.startElement("doc", {})
486 gen.endElement("doc")
487 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000488
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200489 self.assertEqual(result.getvalue(),
490 self.xml("<?test data?><doc></doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000491
Guido van Rossumd8faa362007-04-27 19:54:29 +0000492 def test_xmlgen_content_escape(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200493 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000494 gen = XMLGenerator(result)
Fred Drake004d5e62000-10-23 17:22:08 +0000495
Guido van Rossumd8faa362007-04-27 19:54:29 +0000496 gen.startDocument()
497 gen.startElement("doc", {})
498 gen.characters("<huhei&")
499 gen.endElement("doc")
500 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000501
Ezio Melottib3aedd42010-11-20 19:04:17 +0000502 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200503 self.xml("<doc>&lt;huhei&amp;</doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000504
Guido van Rossumd8faa362007-04-27 19:54:29 +0000505 def test_xmlgen_attr_escape(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200506 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000507 gen = XMLGenerator(result)
Fred Drakec9fadf92001-08-07 19:17:06 +0000508
Guido van Rossumd8faa362007-04-27 19:54:29 +0000509 gen.startDocument()
510 gen.startElement("doc", {"a": '"'})
511 gen.startElement("e", {"a": "'"})
512 gen.endElement("e")
513 gen.startElement("e", {"a": "'\""})
514 gen.endElement("e")
515 gen.startElement("e", {"a": "\n\r\t"})
516 gen.endElement("e")
517 gen.endElement("doc")
518 gen.endDocument()
Fred Drakec9fadf92001-08-07 19:17:06 +0000519
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200520 self.assertEqual(result.getvalue(), self.xml(
521 "<doc a='\"'><e a=\"'\"></e>"
522 "<e a=\"'&quot;\"></e>"
523 "<e a=\"&#10;&#13;&#9;\"></e></doc>"))
524
525 def test_xmlgen_encoding(self):
526 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
527 'utf-16', 'utf-16be', 'utf-16le',
528 'utf-32', 'utf-32be', 'utf-32le')
529 for encoding in encodings:
530 result = self.ioclass()
531 gen = XMLGenerator(result, encoding=encoding)
532
533 gen.startDocument()
534 gen.startElement("doc", {"a": '\u20ac'})
535 gen.characters("\u20ac")
536 gen.endElement("doc")
537 gen.endDocument()
538
539 self.assertEqual(result.getvalue(),
540 self.xml('<doc a="\u20ac">\u20ac</doc>', encoding=encoding))
541
542 def test_xmlgen_unencodable(self):
543 result = self.ioclass()
544 gen = XMLGenerator(result, encoding='ascii')
545
546 gen.startDocument()
547 gen.startElement("doc", {"a": '\u20ac'})
548 gen.characters("\u20ac")
549 gen.endElement("doc")
550 gen.endDocument()
551
552 self.assertEqual(result.getvalue(),
553 self.xml('<doc a="&#8364;">&#8364;</doc>', encoding='ascii'))
Fred Drakec9fadf92001-08-07 19:17:06 +0000554
Guido van Rossumd8faa362007-04-27 19:54:29 +0000555 def test_xmlgen_ignorable(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200556 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000557 gen = XMLGenerator(result)
Fred Drake004d5e62000-10-23 17:22:08 +0000558
Guido van Rossumd8faa362007-04-27 19:54:29 +0000559 gen.startDocument()
560 gen.startElement("doc", {})
561 gen.ignorableWhitespace(" ")
562 gen.endElement("doc")
563 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000564
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200565 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000566
R. David Murraya90032a2010-10-17 22:46:45 +0000567 def test_xmlgen_ignorable_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200568 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000569 gen = XMLGenerator(result, short_empty_elements=True)
570
571 gen.startDocument()
572 gen.startElement("doc", {})
573 gen.ignorableWhitespace(" ")
574 gen.endElement("doc")
575 gen.endDocument()
576
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200577 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
R. David Murraya90032a2010-10-17 22:46:45 +0000578
Serhiy Storchaka3eab6b32013-05-12 17:31:16 +0300579 def test_xmlgen_encoding_bytes(self):
580 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
581 'utf-16', 'utf-16be', 'utf-16le',
582 'utf-32', 'utf-32be', 'utf-32le')
583 for encoding in encodings:
584 result = self.ioclass()
585 gen = XMLGenerator(result, encoding=encoding)
586
587 gen.startDocument()
588 gen.startElement("doc", {"a": '\u20ac'})
589 gen.characters("\u20ac".encode(encoding))
590 gen.ignorableWhitespace(" ".encode(encoding))
591 gen.endElement("doc")
592 gen.endDocument()
593
594 self.assertEqual(result.getvalue(),
595 self.xml('<doc a="\u20ac">\u20ac </doc>', encoding=encoding))
596
Guido van Rossumd8faa362007-04-27 19:54:29 +0000597 def test_xmlgen_ns(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200598 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000599 gen = XMLGenerator(result)
Lars Gustäbel96753b32000-09-24 12:24:24 +0000600
Guido van Rossumd8faa362007-04-27 19:54:29 +0000601 gen.startDocument()
602 gen.startPrefixMapping("ns1", ns_uri)
603 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
604 # add an unqualified name
605 gen.startElementNS((None, "udoc"), None, {})
606 gen.endElementNS((None, "udoc"), None)
607 gen.endElementNS((ns_uri, "doc"), "ns1:doc")
608 gen.endPrefixMapping("ns1")
609 gen.endDocument()
Fred Drake004d5e62000-10-23 17:22:08 +0000610
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200611 self.assertEqual(result.getvalue(), self.xml(
612 '<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' %
Guido van Rossumd8faa362007-04-27 19:54:29 +0000613 ns_uri))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000614
R. David Murraya90032a2010-10-17 22:46:45 +0000615 def test_xmlgen_ns_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200616 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000617 gen = XMLGenerator(result, short_empty_elements=True)
618
619 gen.startDocument()
620 gen.startPrefixMapping("ns1", ns_uri)
621 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
622 # add an unqualified name
623 gen.startElementNS((None, "udoc"), None, {})
624 gen.endElementNS((None, "udoc"), None)
625 gen.endElementNS((ns_uri, "doc"), "ns1:doc")
626 gen.endPrefixMapping("ns1")
627 gen.endDocument()
628
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200629 self.assertEqual(result.getvalue(), self.xml(
630 '<ns1:doc xmlns:ns1="%s"><udoc/></ns1:doc>' %
R. David Murraya90032a2010-10-17 22:46:45 +0000631 ns_uri))
632
Guido van Rossumd8faa362007-04-27 19:54:29 +0000633 def test_1463026_1(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200634 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000635 gen = XMLGenerator(result)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000636
Guido van Rossumd8faa362007-04-27 19:54:29 +0000637 gen.startDocument()
638 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
639 gen.endElementNS((None, 'a'), 'a')
640 gen.endDocument()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000641
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200642 self.assertEqual(result.getvalue(), self.xml('<a b="c"></a>'))
Thomas Wouterscf297e42007-02-23 15:07:44 +0000643
R. David Murraya90032a2010-10-17 22:46:45 +0000644 def test_1463026_1_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200645 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000646 gen = XMLGenerator(result, short_empty_elements=True)
647
648 gen.startDocument()
649 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
650 gen.endElementNS((None, 'a'), 'a')
651 gen.endDocument()
652
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200653 self.assertEqual(result.getvalue(), self.xml('<a b="c"/>'))
R. David Murraya90032a2010-10-17 22:46:45 +0000654
Guido van Rossumd8faa362007-04-27 19:54:29 +0000655 def test_1463026_2(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200656 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000657 gen = XMLGenerator(result)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000658
Guido van Rossumd8faa362007-04-27 19:54:29 +0000659 gen.startDocument()
660 gen.startPrefixMapping(None, 'qux')
661 gen.startElementNS(('qux', 'a'), 'a', {})
662 gen.endElementNS(('qux', 'a'), 'a')
663 gen.endPrefixMapping(None)
664 gen.endDocument()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000665
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200666 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"></a>'))
Thomas Wouterscf297e42007-02-23 15:07:44 +0000667
R. David Murraya90032a2010-10-17 22:46:45 +0000668 def test_1463026_2_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200669 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000670 gen = XMLGenerator(result, short_empty_elements=True)
671
672 gen.startDocument()
673 gen.startPrefixMapping(None, 'qux')
674 gen.startElementNS(('qux', 'a'), 'a', {})
675 gen.endElementNS(('qux', 'a'), 'a')
676 gen.endPrefixMapping(None)
677 gen.endDocument()
678
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200679 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"/>'))
R. David Murraya90032a2010-10-17 22:46:45 +0000680
Guido van Rossumd8faa362007-04-27 19:54:29 +0000681 def test_1463026_3(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200682 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000683 gen = XMLGenerator(result)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000684
Guido van Rossumd8faa362007-04-27 19:54:29 +0000685 gen.startDocument()
686 gen.startPrefixMapping('my', 'qux')
687 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
688 gen.endElementNS(('qux', 'a'), 'a')
689 gen.endPrefixMapping('my')
690 gen.endDocument()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000691
Ezio Melottib3aedd42010-11-20 19:04:17 +0000692 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200693 self.xml('<my:a xmlns:my="qux" b="c"></my:a>'))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000694
R. David Murraya90032a2010-10-17 22:46:45 +0000695 def test_1463026_3_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200696 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000697 gen = XMLGenerator(result, short_empty_elements=True)
698
699 gen.startDocument()
700 gen.startPrefixMapping('my', 'qux')
701 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
702 gen.endElementNS(('qux', 'a'), 'a')
703 gen.endPrefixMapping('my')
704 gen.endDocument()
705
Ezio Melottib3aedd42010-11-20 19:04:17 +0000706 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200707 self.xml('<my:a xmlns:my="qux" b="c"/>'))
R. David Murraya90032a2010-10-17 22:46:45 +0000708
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000709 def test_5027_1(self):
710 # The xml prefix (as in xml:lang below) is reserved and bound by
711 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
Andrew Svetlov737fb892012-12-18 21:14:22 +0200712 # a bug whereby a KeyError is raised because this namespace is missing
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000713 # from a dictionary.
714 #
715 # This test demonstrates the bug by parsing a document.
716 test_xml = StringIO(
717 '<?xml version="1.0"?>'
718 '<a:g1 xmlns:a="http://example.com/ns">'
719 '<a:g2 xml:lang="en">Hello</a:g2>'
720 '</a:g1>')
721
722 parser = make_parser()
723 parser.setFeature(feature_namespaces, True)
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200724 result = self.ioclass()
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000725 gen = XMLGenerator(result)
726 parser.setContentHandler(gen)
727 parser.parse(test_xml)
728
Ezio Melottib3aedd42010-11-20 19:04:17 +0000729 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200730 self.xml(
Ezio Melottib3aedd42010-11-20 19:04:17 +0000731 '<a:g1 xmlns:a="http://example.com/ns">'
732 '<a:g2 xml:lang="en">Hello</a:g2>'
733 '</a:g1>'))
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000734
735 def test_5027_2(self):
736 # The xml prefix (as in xml:lang below) is reserved and bound by
737 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
Andrew Svetlov737fb892012-12-18 21:14:22 +0200738 # a bug whereby a KeyError is raised because this namespace is missing
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000739 # from a dictionary.
740 #
741 # This test demonstrates the bug by direct manipulation of the
742 # XMLGenerator.
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200743 result = self.ioclass()
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000744 gen = XMLGenerator(result)
745
746 gen.startDocument()
747 gen.startPrefixMapping('a', 'http://example.com/ns')
748 gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {})
749 lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'}
750 gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr)
751 gen.characters('Hello')
752 gen.endElementNS(('http://example.com/ns', 'g2'), 'g2')
753 gen.endElementNS(('http://example.com/ns', 'g1'), 'g1')
754 gen.endPrefixMapping('a')
755 gen.endDocument()
756
Ezio Melottib3aedd42010-11-20 19:04:17 +0000757 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200758 self.xml(
Ezio Melottib3aedd42010-11-20 19:04:17 +0000759 '<a:g1 xmlns:a="http://example.com/ns">'
760 '<a:g2 xml:lang="en">Hello</a:g2>'
761 '</a:g1>'))
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000762
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200763 def test_no_close_file(self):
764 result = self.ioclass()
765 def func(out):
766 gen = XMLGenerator(out)
767 gen.startDocument()
768 gen.startElement("doc", {})
769 func(result)
770 self.assertFalse(result.closed)
771
Serhiy Storchakaa5f13d22013-02-25 13:46:10 +0200772 def test_xmlgen_fragment(self):
773 result = self.ioclass()
774 gen = XMLGenerator(result)
775
776 # Don't call gen.startDocument()
777 gen.startElement("foo", {"a": "1.0"})
778 gen.characters("Hello")
779 gen.endElement("foo")
780 gen.startElement("bar", {"b": "2.0"})
781 gen.endElement("bar")
782 # Don't call gen.endDocument()
783
784 self.assertEqual(result.getvalue(),
785 self.xml('<foo a="1.0">Hello</foo><bar b="2.0"></bar>')[len(self.xml('')):])
786
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200787class StringXmlgenTest(XmlgenTest, unittest.TestCase):
788 ioclass = StringIO
789
790 def xml(self, doc, encoding='iso-8859-1'):
791 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
792
793 test_xmlgen_unencodable = None
794
795class BytesXmlgenTest(XmlgenTest, unittest.TestCase):
796 ioclass = BytesIO
797
798 def xml(self, doc, encoding='iso-8859-1'):
799 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
800 (encoding, doc)).encode(encoding, 'xmlcharrefreplace')
801
802class WriterXmlgenTest(BytesXmlgenTest):
803 class ioclass(list):
804 write = list.append
805 closed = False
806
807 def seekable(self):
808 return True
809
810 def tell(self):
811 # return 0 at start and not 0 after start
812 return len(self)
813
814 def getvalue(self):
815 return b''.join(self)
816
Georg Brandlc502df42013-05-12 11:41:12 +0200817class StreamWriterXmlgenTest(XmlgenTest, unittest.TestCase):
818 def ioclass(self):
819 raw = BytesIO()
820 writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace')
821 writer.getvalue = raw.getvalue
822 return writer
823
824 def xml(self, doc, encoding='iso-8859-1'):
825 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
826 (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
827
828class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase):
Hai Shibb0424b2020-08-04 00:47:42 +0800829 fname = os_helper.TESTFN + '-codecs'
Georg Brandlc502df42013-05-12 11:41:12 +0200830
831 def ioclass(self):
832 writer = codecs.open(self.fname, 'w', encoding='ascii',
833 errors='xmlcharrefreplace', buffering=0)
Antoine Pitrou2adb6fe2013-05-13 22:34:21 +0200834 def cleanup():
835 writer.close()
Hai Shibb0424b2020-08-04 00:47:42 +0800836 os_helper.unlink(self.fname)
Antoine Pitrou2adb6fe2013-05-13 22:34:21 +0200837 self.addCleanup(cleanup)
Richard Oudkerk90a24272013-05-18 18:11:30 +0100838 def getvalue():
839 # Windows will not let use reopen without first closing
840 writer.close()
841 with open(writer.name, 'rb') as f:
842 return f.read()
843 writer.getvalue = getvalue
Georg Brandlc502df42013-05-12 11:41:12 +0200844 return writer
845
Georg Brandlc502df42013-05-12 11:41:12 +0200846 def xml(self, doc, encoding='iso-8859-1'):
847 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
848 (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200849
850start = b'<?xml version="1.0" encoding="iso-8859-1"?>\n'
851
Fred Drake004d5e62000-10-23 17:22:08 +0000852
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853class XMLFilterBaseTest(unittest.TestCase):
854 def test_filter_basic(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200855 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856 gen = XMLGenerator(result)
857 filter = XMLFilterBase()
858 filter.setContentHandler(gen)
Lars Gustäbel96753b32000-09-24 12:24:24 +0000859
Guido van Rossumd8faa362007-04-27 19:54:29 +0000860 filter.startDocument()
861 filter.startElement("doc", {})
862 filter.characters("content")
863 filter.ignorableWhitespace(" ")
864 filter.endElement("doc")
865 filter.endDocument()
866
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200867 self.assertEqual(result.getvalue(), start + b"<doc>content </doc>")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000868
869# ===========================================================================
870#
871# expatreader tests
872#
873# ===========================================================================
874
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200875with open(TEST_XMLFILE_OUT, 'rb') as f:
Benjamin Petersond5df36d2010-10-31 18:23:23 +0000876 xml_test_out = f.read()
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000877
Guido van Rossumd8faa362007-04-27 19:54:29 +0000878class ExpatReaderTest(XmlTestBase):
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000879
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 # ===== XMLReader support
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000881
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300882 def test_expat_binary_file(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200884 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885 xmlgen = XMLGenerator(result)
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000886
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887 parser.setContentHandler(xmlgen)
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200888 with open(TEST_XMLFILE, 'rb') as f:
Benjamin Petersond5df36d2010-10-31 18:23:23 +0000889 parser.parse(f)
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000890
Ezio Melottib3aedd42010-11-20 19:04:17 +0000891 self.assertEqual(result.getvalue(), xml_test_out)
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000892
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300893 def test_expat_text_file(self):
894 parser = create_parser()
895 result = BytesIO()
896 xmlgen = XMLGenerator(result)
897
898 parser.setContentHandler(xmlgen)
899 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
900 parser.parse(f)
901
902 self.assertEqual(result.getvalue(), xml_test_out)
903
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +0200904 @requires_nonascii_filenames
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300905 def test_expat_binary_file_nonascii(self):
Hai Shibb0424b2020-08-04 00:47:42 +0800906 fname = os_helper.TESTFN_UNICODE
Serhiy Storchakad5202392013-02-02 10:31:17 +0200907 shutil.copyfile(TEST_XMLFILE, fname)
Hai Shibb0424b2020-08-04 00:47:42 +0800908 self.addCleanup(os_helper.unlink, fname)
Serhiy Storchakad5202392013-02-02 10:31:17 +0200909
910 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200911 result = BytesIO()
Serhiy Storchakad5202392013-02-02 10:31:17 +0200912 xmlgen = XMLGenerator(result)
913
914 parser.setContentHandler(xmlgen)
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300915 parser.parse(open(fname, 'rb'))
Serhiy Storchakad5202392013-02-02 10:31:17 +0200916
917 self.assertEqual(result.getvalue(), xml_test_out)
918
Serhiy Storchakafc8e9b02014-11-27 22:13:16 +0200919 def test_expat_binary_file_bytes_name(self):
920 fname = os.fsencode(TEST_XMLFILE)
921 parser = create_parser()
922 result = BytesIO()
923 xmlgen = XMLGenerator(result)
924
925 parser.setContentHandler(xmlgen)
926 with open(fname, 'rb') as f:
927 parser.parse(f)
928
929 self.assertEqual(result.getvalue(), xml_test_out)
930
931 def test_expat_binary_file_int_name(self):
932 parser = create_parser()
933 result = BytesIO()
934 xmlgen = XMLGenerator(result)
935
936 parser.setContentHandler(xmlgen)
937 with open(TEST_XMLFILE, 'rb') as f:
938 with open(f.fileno(), 'rb', closefd=False) as f2:
939 parser.parse(f2)
940
941 self.assertEqual(result.getvalue(), xml_test_out)
942
Guido van Rossumd8faa362007-04-27 19:54:29 +0000943 # ===== DTDHandler support
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000944
Guido van Rossumd8faa362007-04-27 19:54:29 +0000945 class TestDTDHandler:
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000946
Guido van Rossumd8faa362007-04-27 19:54:29 +0000947 def __init__(self):
948 self._notations = []
949 self._entities = []
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000950
Guido van Rossumd8faa362007-04-27 19:54:29 +0000951 def notationDecl(self, name, publicId, systemId):
952 self._notations.append((name, publicId, systemId))
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000953
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
955 self._entities.append((name, publicId, systemId, ndata))
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000956
Christian Heimes17b1d5d2018-09-23 09:50:25 +0200957
958 class TestEntityRecorder:
959 def __init__(self):
960 self.entities = []
961
962 def resolveEntity(self, publicId, systemId):
963 self.entities.append((publicId, systemId))
964 source = InputSource()
965 source.setPublicId(publicId)
966 source.setSystemId(systemId)
967 return source
968
Guido van Rossumd8faa362007-04-27 19:54:29 +0000969 def test_expat_dtdhandler(self):
970 parser = create_parser()
971 handler = self.TestDTDHandler()
972 parser.setDTDHandler(handler)
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000973
Guido van Rossumd8faa362007-04-27 19:54:29 +0000974 parser.feed('<!DOCTYPE doc [\n')
975 parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
976 parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
977 parser.feed(']>\n')
978 parser.feed('<doc></doc>')
979 parser.close()
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000980
Ezio Melottib3aedd42010-11-20 19:04:17 +0000981 self.assertEqual(handler._notations,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000982 [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
Ezio Melottib3aedd42010-11-20 19:04:17 +0000983 self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000984
Christian Heimes17b1d5d2018-09-23 09:50:25 +0200985 def test_expat_external_dtd_enabled(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200986 # clear _opener global variable
987 self.addCleanup(urllib.request.urlcleanup)
988
Christian Heimes17b1d5d2018-09-23 09:50:25 +0200989 parser = create_parser()
990 parser.setFeature(feature_external_ges, True)
991 resolver = self.TestEntityRecorder()
992 parser.setEntityResolver(resolver)
993
994 with self.assertRaises(URLError):
995 parser.feed(
996 '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
997 )
998 self.assertEqual(
999 resolver.entities, [(None, 'unsupported://non-existing')]
1000 )
1001
1002 def test_expat_external_dtd_default(self):
1003 parser = create_parser()
1004 resolver = self.TestEntityRecorder()
1005 parser.setEntityResolver(resolver)
1006
1007 parser.feed(
1008 '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
1009 )
1010 parser.feed('<doc />')
1011 parser.close()
1012 self.assertEqual(resolver.entities, [])
1013
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 # ===== EntityResolver support
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001015
Guido van Rossumd8faa362007-04-27 19:54:29 +00001016 class TestEntityResolver:
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001017
Guido van Rossumd8faa362007-04-27 19:54:29 +00001018 def resolveEntity(self, publicId, systemId):
1019 inpsrc = InputSource()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001020 inpsrc.setByteStream(BytesIO(b"<entity/>"))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021 return inpsrc
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001022
Christian Heimes17b1d5d2018-09-23 09:50:25 +02001023 def test_expat_entityresolver_enabled(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001024 parser = create_parser()
Christian Heimes17b1d5d2018-09-23 09:50:25 +02001025 parser.setFeature(feature_external_ges, True)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001026 parser.setEntityResolver(self.TestEntityResolver())
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001027 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001028 parser.setContentHandler(XMLGenerator(result))
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001029
Guido van Rossumd8faa362007-04-27 19:54:29 +00001030 parser.feed('<!DOCTYPE doc [\n')
1031 parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
1032 parser.feed(']>\n')
1033 parser.feed('<doc>&test;</doc>')
1034 parser.close()
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001035
Ezio Melottib3aedd42010-11-20 19:04:17 +00001036 self.assertEqual(result.getvalue(), start +
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001037 b"<doc><entity></entity></doc>")
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001038
Christian Heimes17b1d5d2018-09-23 09:50:25 +02001039 def test_expat_entityresolver_default(self):
1040 parser = create_parser()
1041 self.assertEqual(parser.getFeature(feature_external_ges), False)
1042 parser.setEntityResolver(self.TestEntityResolver())
1043 result = BytesIO()
1044 parser.setContentHandler(XMLGenerator(result))
1045
1046 parser.feed('<!DOCTYPE doc [\n')
1047 parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
1048 parser.feed(']>\n')
1049 parser.feed('<doc>&test;</doc>')
1050 parser.close()
1051
1052 self.assertEqual(result.getvalue(), start +
1053 b"<doc></doc>")
1054
Guido van Rossumd8faa362007-04-27 19:54:29 +00001055 # ===== Attributes support
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001056
Guido van Rossumd8faa362007-04-27 19:54:29 +00001057 class AttrGatherer(ContentHandler):
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001058
Guido van Rossumd8faa362007-04-27 19:54:29 +00001059 def startElement(self, name, attrs):
1060 self._attrs = attrs
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001061
Guido van Rossumd8faa362007-04-27 19:54:29 +00001062 def startElementNS(self, name, qname, attrs):
1063 self._attrs = attrs
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001064
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 def test_expat_attrs_empty(self):
1066 parser = create_parser()
1067 gather = self.AttrGatherer()
1068 parser.setContentHandler(gather)
1069
1070 parser.feed("<doc/>")
1071 parser.close()
1072
1073 self.verify_empty_attrs(gather._attrs)
1074
1075 def test_expat_attrs_wattr(self):
1076 parser = create_parser()
1077 gather = self.AttrGatherer()
1078 parser.setContentHandler(gather)
1079
1080 parser.feed("<doc attr='val'/>")
1081 parser.close()
1082
1083 self.verify_attrs_wattr(gather._attrs)
1084
1085 def test_expat_nsattrs_empty(self):
1086 parser = create_parser(1)
1087 gather = self.AttrGatherer()
1088 parser.setContentHandler(gather)
1089
1090 parser.feed("<doc/>")
1091 parser.close()
1092
1093 self.verify_empty_nsattrs(gather._attrs)
1094
1095 def test_expat_nsattrs_wattr(self):
1096 parser = create_parser(1)
1097 gather = self.AttrGatherer()
1098 parser.setContentHandler(gather)
1099
1100 parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri)
1101 parser.close()
1102
1103 attrs = gather._attrs
1104
Ezio Melottib3aedd42010-11-20 19:04:17 +00001105 self.assertEqual(attrs.getLength(), 1)
1106 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 self.assertTrue((attrs.getQNames() == [] or
1108 attrs.getQNames() == ["ns:attr"]))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001109 self.assertEqual(len(attrs), 1)
Benjamin Peterson577473f2010-01-19 00:09:57 +00001110 self.assertIn((ns_uri, "attr"), attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001111 self.assertEqual(attrs.get((ns_uri, "attr")), "val")
1112 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
1113 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
1114 self.assertEqual(list(attrs.values()), ["val"])
1115 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
1116 self.assertEqual(attrs[(ns_uri, "attr")], "val")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001117
1118 # ===== InputSource support
1119
Benjamin Petersona7f4f5a2008-09-04 02:22:52 +00001120 def test_expat_inpsource_filename(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001121 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001122 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001123 xmlgen = XMLGenerator(result)
1124
1125 parser.setContentHandler(xmlgen)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 parser.parse(TEST_XMLFILE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001127
Ezio Melottib3aedd42010-11-20 19:04:17 +00001128 self.assertEqual(result.getvalue(), xml_test_out)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001129
Benjamin Petersona7f4f5a2008-09-04 02:22:52 +00001130 def test_expat_inpsource_sysid(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001131 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001132 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001133 xmlgen = XMLGenerator(result)
1134
1135 parser.setContentHandler(xmlgen)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001136 parser.parse(InputSource(TEST_XMLFILE))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001137
Ezio Melottib3aedd42010-11-20 19:04:17 +00001138 self.assertEqual(result.getvalue(), xml_test_out)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001139
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +02001140 @requires_nonascii_filenames
Serhiy Storchakad5202392013-02-02 10:31:17 +02001141 def test_expat_inpsource_sysid_nonascii(self):
Hai Shibb0424b2020-08-04 00:47:42 +08001142 fname = os_helper.TESTFN_UNICODE
Serhiy Storchakad5202392013-02-02 10:31:17 +02001143 shutil.copyfile(TEST_XMLFILE, fname)
Hai Shibb0424b2020-08-04 00:47:42 +08001144 self.addCleanup(os_helper.unlink, fname)
Serhiy Storchakad5202392013-02-02 10:31:17 +02001145
1146 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001147 result = BytesIO()
Serhiy Storchakad5202392013-02-02 10:31:17 +02001148 xmlgen = XMLGenerator(result)
1149
1150 parser.setContentHandler(xmlgen)
1151 parser.parse(InputSource(fname))
1152
1153 self.assertEqual(result.getvalue(), xml_test_out)
1154
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +03001155 def test_expat_inpsource_byte_stream(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001156 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001157 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001158 xmlgen = XMLGenerator(result)
1159
1160 parser.setContentHandler(xmlgen)
1161 inpsrc = InputSource()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001162 with open(TEST_XMLFILE, 'rb') as f:
Benjamin Petersond5df36d2010-10-31 18:23:23 +00001163 inpsrc.setByteStream(f)
1164 parser.parse(inpsrc)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165
Ezio Melottib3aedd42010-11-20 19:04:17 +00001166 self.assertEqual(result.getvalue(), xml_test_out)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167
Serhiy Storchaka61de0872015-04-02 21:00:13 +03001168 def test_expat_inpsource_character_stream(self):
1169 parser = create_parser()
1170 result = BytesIO()
1171 xmlgen = XMLGenerator(result)
1172
1173 parser.setContentHandler(xmlgen)
1174 inpsrc = InputSource()
1175 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
1176 inpsrc.setCharacterStream(f)
1177 parser.parse(inpsrc)
1178
1179 self.assertEqual(result.getvalue(), xml_test_out)
1180
Guido van Rossumd8faa362007-04-27 19:54:29 +00001181 # ===== IncrementalParser support
1182
1183 def test_expat_incremental(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001184 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001185 xmlgen = XMLGenerator(result)
1186 parser = create_parser()
1187 parser.setContentHandler(xmlgen)
1188
1189 parser.feed("<doc>")
1190 parser.feed("</doc>")
1191 parser.close()
1192
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001193 self.assertEqual(result.getvalue(), start + b"<doc></doc>")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001194
1195 def test_expat_incremental_reset(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001196 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001197 xmlgen = XMLGenerator(result)
1198 parser = create_parser()
1199 parser.setContentHandler(xmlgen)
1200
1201 parser.feed("<doc>")
1202 parser.feed("text")
1203
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001204 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001205 xmlgen = XMLGenerator(result)
1206 parser.setContentHandler(xmlgen)
1207 parser.reset()
1208
1209 parser.feed("<doc>")
1210 parser.feed("text")
1211 parser.feed("</doc>")
1212 parser.close()
1213
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001214 self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001215
1216 # ===== Locator support
1217
1218 def test_expat_locator_noinfo(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001219 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001220 xmlgen = XMLGenerator(result)
1221 parser = create_parser()
1222 parser.setContentHandler(xmlgen)
1223
1224 parser.feed("<doc>")
1225 parser.feed("</doc>")
1226 parser.close()
1227
Ezio Melottib3aedd42010-11-20 19:04:17 +00001228 self.assertEqual(parser.getSystemId(), None)
1229 self.assertEqual(parser.getPublicId(), None)
1230 self.assertEqual(parser.getLineNumber(), 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001231
Benjamin Petersona7f4f5a2008-09-04 02:22:52 +00001232 def test_expat_locator_withinfo(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001233 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001234 xmlgen = XMLGenerator(result)
1235 parser = create_parser()
1236 parser.setContentHandler(xmlgen)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001237 parser.parse(TEST_XMLFILE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001238
Ezio Melottib3aedd42010-11-20 19:04:17 +00001239 self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
1240 self.assertEqual(parser.getPublicId(), None)
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001241
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +02001242 @requires_nonascii_filenames
Serhiy Storchakad5202392013-02-02 10:31:17 +02001243 def test_expat_locator_withinfo_nonascii(self):
Hai Shibb0424b2020-08-04 00:47:42 +08001244 fname = os_helper.TESTFN_UNICODE
Serhiy Storchakad5202392013-02-02 10:31:17 +02001245 shutil.copyfile(TEST_XMLFILE, fname)
Hai Shibb0424b2020-08-04 00:47:42 +08001246 self.addCleanup(os_helper.unlink, fname)
Serhiy Storchakad5202392013-02-02 10:31:17 +02001247
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001248 result = BytesIO()
Serhiy Storchakad5202392013-02-02 10:31:17 +02001249 xmlgen = XMLGenerator(result)
1250 parser = create_parser()
1251 parser.setContentHandler(xmlgen)
1252 parser.parse(fname)
1253
1254 self.assertEqual(parser.getSystemId(), fname)
1255 self.assertEqual(parser.getPublicId(), None)
1256
Martin v. Löwis80670bc2000-10-06 21:13:23 +00001257
1258# ===========================================================================
1259#
1260# error reporting
1261#
1262# ===========================================================================
1263
Guido van Rossumd8faa362007-04-27 19:54:29 +00001264class ErrorReportingTest(unittest.TestCase):
1265 def test_expat_inpsource_location(self):
1266 parser = create_parser()
1267 parser.setContentHandler(ContentHandler()) # do nothing
1268 source = InputSource()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001269 source.setByteStream(BytesIO(b"<foo bar foobar>")) #ill-formed
Guido van Rossumd8faa362007-04-27 19:54:29 +00001270 name = "a file name"
1271 source.setSystemId(name)
1272 try:
1273 parser.parse(source)
1274 self.fail()
1275 except SAXException as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +00001276 self.assertEqual(e.getSystemId(), name)
Martin v. Löwis80670bc2000-10-06 21:13:23 +00001277
Guido van Rossumd8faa362007-04-27 19:54:29 +00001278 def test_expat_incomplete(self):
1279 parser = create_parser()
1280 parser.setContentHandler(ContentHandler()) # do nothing
1281 self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>"))
Serhiy Storchakaab914782015-05-06 09:36:06 +03001282 self.assertEqual(parser.getColumnNumber(), 5)
1283 self.assertEqual(parser.getLineNumber(), 1)
Martin v. Löwis80670bc2000-10-06 21:13:23 +00001284
Guido van Rossumd8faa362007-04-27 19:54:29 +00001285 def test_sax_parse_exception_str(self):
1286 # pass various values from a locator to the SAXParseException to
1287 # make sure that the __str__() doesn't fall apart when None is
1288 # passed instead of an integer line and column number
1289 #
1290 # use "normal" values for the locator:
1291 str(SAXParseException("message", None,
1292 self.DummyLocator(1, 1)))
1293 # use None for the line number:
1294 str(SAXParseException("message", None,
1295 self.DummyLocator(None, 1)))
1296 # use None for the column number:
1297 str(SAXParseException("message", None,
1298 self.DummyLocator(1, None)))
1299 # use None for both:
1300 str(SAXParseException("message", None,
1301 self.DummyLocator(None, None)))
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001302
Guido van Rossumd8faa362007-04-27 19:54:29 +00001303 class DummyLocator:
1304 def __init__(self, lineno, colno):
1305 self._lineno = lineno
1306 self._colno = colno
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001307
Guido van Rossumd8faa362007-04-27 19:54:29 +00001308 def getPublicId(self):
1309 return "pubid"
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001310
Guido van Rossumd8faa362007-04-27 19:54:29 +00001311 def getSystemId(self):
1312 return "sysid"
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001313
Guido van Rossumd8faa362007-04-27 19:54:29 +00001314 def getLineNumber(self):
1315 return self._lineno
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001316
Guido van Rossumd8faa362007-04-27 19:54:29 +00001317 def getColumnNumber(self):
1318 return self._colno
Martin v. Löwis80670bc2000-10-06 21:13:23 +00001319
Lars Gustäbelab647872000-09-24 18:40:52 +00001320# ===========================================================================
1321#
1322# xmlreader tests
1323#
1324# ===========================================================================
1325
Guido van Rossumd8faa362007-04-27 19:54:29 +00001326class XmlReaderTest(XmlTestBase):
Lars Gustäbelab647872000-09-24 18:40:52 +00001327
Guido van Rossumd8faa362007-04-27 19:54:29 +00001328 # ===== AttributesImpl
1329 def test_attrs_empty(self):
1330 self.verify_empty_attrs(AttributesImpl({}))
Lars Gustäbelab647872000-09-24 18:40:52 +00001331
Guido van Rossumd8faa362007-04-27 19:54:29 +00001332 def test_attrs_wattr(self):
1333 self.verify_attrs_wattr(AttributesImpl({"attr" : "val"}))
Lars Gustäbelab647872000-09-24 18:40:52 +00001334
Guido van Rossumd8faa362007-04-27 19:54:29 +00001335 def test_nsattrs_empty(self):
1336 self.verify_empty_nsattrs(AttributesNSImpl({}, {}))
Lars Gustäbelab647872000-09-24 18:40:52 +00001337
Guido van Rossumd8faa362007-04-27 19:54:29 +00001338 def test_nsattrs_wattr(self):
1339 attrs = AttributesNSImpl({(ns_uri, "attr") : "val"},
1340 {(ns_uri, "attr") : "ns:attr"})
Fred Drake004d5e62000-10-23 17:22:08 +00001341
Ezio Melottib3aedd42010-11-20 19:04:17 +00001342 self.assertEqual(attrs.getLength(), 1)
1343 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
1344 self.assertEqual(attrs.getQNames(), ["ns:attr"])
1345 self.assertEqual(len(attrs), 1)
Benjamin Peterson577473f2010-01-19 00:09:57 +00001346 self.assertIn((ns_uri, "attr"), attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001347 self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")])
1348 self.assertEqual(attrs.get((ns_uri, "attr")), "val")
1349 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
1350 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
1351 self.assertEqual(list(attrs.values()), ["val"])
1352 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
1353 self.assertEqual(attrs.getValueByQName("ns:attr"), "val")
1354 self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr"))
1355 self.assertEqual(attrs[(ns_uri, "attr")], "val")
1356 self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
Fred Drake004d5e62000-10-23 17:22:08 +00001357
Lars Gustäbelab647872000-09-24 18:40:52 +00001358
Christian Heimesbbe741d2008-03-28 10:53:29 +00001359def test_main():
Guido van Rossumd8faa362007-04-27 19:54:29 +00001360 run_unittest(MakeParserTest,
Serhiy Storchaka13e41c52015-04-02 23:05:57 +03001361 ParseTest,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001362 SaxutilsTest,
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +03001363 PrepareInputSourceTest,
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001364 StringXmlgenTest,
1365 BytesXmlgenTest,
1366 WriterXmlgenTest,
Georg Brandlc502df42013-05-12 11:41:12 +02001367 StreamWriterXmlgenTest,
1368 StreamReaderWriterXmlgenTest,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001369 ExpatReaderTest,
1370 ErrorReportingTest,
1371 XmlReaderTest)
Lars Gustäbelb7536d52000-09-24 18:53:56 +00001372
Guido van Rossumd8faa362007-04-27 19:54:29 +00001373if __name__ == "__main__":
Christian Heimesbbe741d2008-03-28 10:53:29 +00001374 test_main()