blob: eda4e6a46df437a24f21ab96a88cb7c32c7faf7d [file] [log] [blame]
Antoine Pitroud72402e2010-10-27 18:52:48 +00001# regression test for SAX 2.0
Lars Gustäbel96753b32000-09-24 12:24:24 +00002# $Id$
3
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004from xml.sax import make_parser, ContentHandler, \
5 SAXException, SAXReaderNotAvailable, SAXParseException
R David Murraya846f5a2013-03-18 00:18:12 -04006import unittest
Victor Stinneref9c0e72017-05-05 09:46:47 +02007from unittest import mock
Martin v. Löwis962c9e72000-10-06 17:41:52 +00008try:
9 make_parser()
Martin v. Löwis80670bc2000-10-06 21:13:23 +000010except SAXReaderNotAvailable:
Martin v. Löwis962c9e72000-10-06 17:41:52 +000011 # don't try to test this module if we cannot create a parser
R David Murraya846f5a2013-03-18 00:18:12 -040012 raise unittest.SkipTest("no XML parsers available")
Thomas Wouters0e3f5912006-08-11 14:57:12 +000013from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +030014 XMLFilterBase, prepare_input_source
Thomas Wouters0e3f5912006-08-11 14:57:12 +000015from xml.sax.expatreader import create_parser
Zackery Spytze28b8c92020-08-09 04:50:53 -060016from xml.sax.handler import (feature_namespaces, feature_external_ges,
17 LexicalHandler)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000018from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
Serhiy Storchaka88efc522013-02-10 14:29:52 +020019from io import BytesIO, StringIO
Georg Brandlc502df42013-05-12 11:41:12 +020020import codecs
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +020021import os.path
Serhiy Storchakad5202392013-02-02 10:31:17 +020022import shutil
Hai Shi24bddc12020-05-28 22:24:39 +080023import sys
Christian Heimes17b1d5d2018-09-23 09:50:25 +020024from urllib.error import URLError
Victor Stinner7cb92042019-07-02 14:50:19 +020025import urllib.request
Hai Shibb0424b2020-08-04 00:47:42 +080026from test.support import os_helper
Serhiy Storchakabedce352021-09-19 22:36:03 +030027from test.support import findfile
Hai Shibb0424b2020-08-04 00:47:42 +080028from test.support.os_helper import FakePath, TESTFN
29
Florent Xiclunaf15351d2010-03-13 23:24:31 +000030
31TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
32TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
Victor Stinner6c6f8512010-08-07 10:09:35 +000033try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +000034 TEST_XMLFILE.encode("utf-8")
35 TEST_XMLFILE_OUT.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +000036except UnicodeEncodeError:
37 raise unittest.SkipTest("filename is not encodable to utf8")
Lars Gustäbel96753b32000-09-24 12:24:24 +000038
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +020039supports_nonascii_filenames = True
40if not os.path.supports_unicode_filenames:
41 try:
Hai Shibb0424b2020-08-04 00:47:42 +080042 os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding())
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +020043 except (UnicodeError, TypeError):
44 # Either the file system encoding is None, or the file name
45 # cannot be encoded in the file system encoding.
46 supports_nonascii_filenames = False
47requires_nonascii_filenames = unittest.skipUnless(
48 supports_nonascii_filenames,
49 'Requires non-ascii filenames support')
50
Guido van Rossumd8faa362007-04-27 19:54:29 +000051ns_uri = "http://www.python.org/xml-ns/saxtest/"
Lars Gustäbel96753b32000-09-24 12:24:24 +000052
Guido van Rossumd8faa362007-04-27 19:54:29 +000053class XmlTestBase(unittest.TestCase):
54 def verify_empty_attrs(self, attrs):
55 self.assertRaises(KeyError, attrs.getValue, "attr")
56 self.assertRaises(KeyError, attrs.getValueByQName, "attr")
57 self.assertRaises(KeyError, attrs.getNameByQName, "attr")
58 self.assertRaises(KeyError, attrs.getQNameByName, "attr")
59 self.assertRaises(KeyError, attrs.__getitem__, "attr")
Ezio Melottib3aedd42010-11-20 19:04:17 +000060 self.assertEqual(attrs.getLength(), 0)
61 self.assertEqual(attrs.getNames(), [])
62 self.assertEqual(attrs.getQNames(), [])
63 self.assertEqual(len(attrs), 0)
Ezio Melottib58e0bd2010-01-23 15:40:09 +000064 self.assertNotIn("attr", attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +000065 self.assertEqual(list(attrs.keys()), [])
66 self.assertEqual(attrs.get("attrs"), None)
67 self.assertEqual(attrs.get("attrs", 25), 25)
68 self.assertEqual(list(attrs.items()), [])
69 self.assertEqual(list(attrs.values()), [])
Lars Gustäbel96753b32000-09-24 12:24:24 +000070
Guido van Rossumd8faa362007-04-27 19:54:29 +000071 def verify_empty_nsattrs(self, attrs):
72 self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr"))
73 self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr")
74 self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr")
75 self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr"))
76 self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr"))
Ezio Melottib3aedd42010-11-20 19:04:17 +000077 self.assertEqual(attrs.getLength(), 0)
78 self.assertEqual(attrs.getNames(), [])
79 self.assertEqual(attrs.getQNames(), [])
80 self.assertEqual(len(attrs), 0)
Ezio Melottib58e0bd2010-01-23 15:40:09 +000081 self.assertNotIn((ns_uri, "attr"), attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +000082 self.assertEqual(list(attrs.keys()), [])
83 self.assertEqual(attrs.get((ns_uri, "attr")), None)
84 self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25)
85 self.assertEqual(list(attrs.items()), [])
86 self.assertEqual(list(attrs.values()), [])
Lars Gustäbel96753b32000-09-24 12:24:24 +000087
Guido van Rossumd8faa362007-04-27 19:54:29 +000088 def verify_attrs_wattr(self, attrs):
Ezio Melottib3aedd42010-11-20 19:04:17 +000089 self.assertEqual(attrs.getLength(), 1)
90 self.assertEqual(attrs.getNames(), ["attr"])
91 self.assertEqual(attrs.getQNames(), ["attr"])
92 self.assertEqual(len(attrs), 1)
Benjamin Peterson577473f2010-01-19 00:09:57 +000093 self.assertIn("attr", attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +000094 self.assertEqual(list(attrs.keys()), ["attr"])
95 self.assertEqual(attrs.get("attr"), "val")
96 self.assertEqual(attrs.get("attr", 25), "val")
97 self.assertEqual(list(attrs.items()), [("attr", "val")])
98 self.assertEqual(list(attrs.values()), ["val"])
99 self.assertEqual(attrs.getValue("attr"), "val")
100 self.assertEqual(attrs.getValueByQName("attr"), "val")
101 self.assertEqual(attrs.getNameByQName("attr"), "attr")
102 self.assertEqual(attrs["attr"], "val")
103 self.assertEqual(attrs.getQNameByName("attr"), "attr")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000104
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300105
106def xml_str(doc, encoding=None):
107 if encoding is None:
108 return doc
109 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
110
111def xml_bytes(doc, encoding, decl_encoding=...):
112 if decl_encoding is ...:
113 decl_encoding = encoding
114 return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
115
116def make_xml_file(doc, encoding, decl_encoding=...):
117 if decl_encoding is ...:
118 decl_encoding = encoding
119 with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
120 f.write(xml_str(doc, decl_encoding))
121
122
123class ParseTest(unittest.TestCase):
124 data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>'
125
126 def tearDown(self):
Hai Shibb0424b2020-08-04 00:47:42 +0800127 os_helper.unlink(TESTFN)
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300128
129 def check_parse(self, f):
130 from xml.sax import parse
131 result = StringIO()
132 parse(f, XMLGenerator(result, 'utf-8'))
133 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
134
135 def test_parse_text(self):
136 encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
137 'utf-16', 'utf-16le', 'utf-16be')
138 for encoding in encodings:
139 self.check_parse(StringIO(xml_str(self.data, encoding)))
140 make_xml_file(self.data, encoding)
141 with open(TESTFN, 'r', encoding=encoding) as f:
142 self.check_parse(f)
143 self.check_parse(StringIO(self.data))
144 make_xml_file(self.data, encoding, None)
145 with open(TESTFN, 'r', encoding=encoding) as f:
146 self.check_parse(f)
147
148 def test_parse_bytes(self):
149 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
150 # UTF-16 is autodetected
151 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
152 for encoding in encodings:
153 self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
154 make_xml_file(self.data, encoding)
155 self.check_parse(TESTFN)
156 with open(TESTFN, 'rb') as f:
157 self.check_parse(f)
158 self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
159 make_xml_file(self.data, encoding, None)
160 self.check_parse(TESTFN)
161 with open(TESTFN, 'rb') as f:
162 self.check_parse(f)
163 # accept UTF-8 with BOM
164 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
165 make_xml_file(self.data, 'utf-8-sig', 'utf-8')
166 self.check_parse(TESTFN)
167 with open(TESTFN, 'rb') as f:
168 self.check_parse(f)
169 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
170 make_xml_file(self.data, 'utf-8-sig', None)
171 self.check_parse(TESTFN)
172 with open(TESTFN, 'rb') as f:
173 self.check_parse(f)
174 # accept data with declared encoding
175 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
176 make_xml_file(self.data, 'iso-8859-1')
177 self.check_parse(TESTFN)
178 with open(TESTFN, 'rb') as f:
179 self.check_parse(f)
180 # fail on non-UTF-8 incompatible data without declared encoding
181 with self.assertRaises(SAXException):
182 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
183 make_xml_file(self.data, 'iso-8859-1', None)
Victor Stinneref9c0e72017-05-05 09:46:47 +0200184 with self.assertRaises(SAXException):
185 self.check_parse(TESTFN)
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300186 with open(TESTFN, 'rb') as f:
187 with self.assertRaises(SAXException):
188 self.check_parse(f)
189
Mickaël Schoentgen929b7042019-04-14 09:16:54 +0000190 def test_parse_path_object(self):
191 make_xml_file(self.data, 'utf-8', None)
192 self.check_parse(FakePath(TESTFN))
193
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300194 def test_parse_InputSource(self):
195 # accept data without declared but with explicitly specified encoding
196 make_xml_file(self.data, 'iso-8859-1', None)
197 with open(TESTFN, 'rb') as f:
198 input = InputSource()
199 input.setByteStream(f)
200 input.setEncoding('iso-8859-1')
201 self.check_parse(input)
202
Victor Stinneref9c0e72017-05-05 09:46:47 +0200203 def test_parse_close_source(self):
204 builtin_open = open
205 fileobj = None
206
207 def mock_open(*args):
208 nonlocal fileobj
209 fileobj = builtin_open(*args)
210 return fileobj
211
212 with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
213 make_xml_file(self.data, 'iso-8859-1', None)
214 with self.assertRaises(SAXException):
215 self.check_parse(TESTFN)
216 self.assertTrue(fileobj.closed)
217
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300218 def check_parseString(self, s):
219 from xml.sax import parseString
220 result = StringIO()
221 parseString(s, XMLGenerator(result, 'utf-8'))
222 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
223
Serhiy Storchaka778db282015-04-04 10:12:26 +0300224 def test_parseString_text(self):
225 encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
226 'utf-16', 'utf-16le', 'utf-16be')
227 for encoding in encodings:
228 self.check_parseString(xml_str(self.data, encoding))
229 self.check_parseString(self.data)
230
Serhiy Storchaka13e41c52015-04-02 23:05:57 +0300231 def test_parseString_bytes(self):
232 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
233 # UTF-16 is autodetected
234 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
235 for encoding in encodings:
236 self.check_parseString(xml_bytes(self.data, encoding))
237 self.check_parseString(xml_bytes(self.data, encoding, None))
238 # accept UTF-8 with BOM
239 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
240 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
241 # accept data with declared encoding
242 self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
243 # fail on non-UTF-8 incompatible data without declared encoding
244 with self.assertRaises(SAXException):
245 self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
246
Guido van Rossumd8faa362007-04-27 19:54:29 +0000247class MakeParserTest(unittest.TestCase):
248 def test_make_parser2(self):
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000249 # Creating parsers several times in a row should succeed.
250 # Testing this because there have been failures of this kind
251 # before.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000253 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000254 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000255 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000256 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000257 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000258 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000259 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000260 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000261 p = make_parser()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262 from xml.sax import make_parser
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000263 p = make_parser()
Tim Petersd2bf3b72001-01-18 02:22:22 +0000264
Andrés Delfinoa6dc5312018-10-26 11:56:57 -0300265 def test_make_parser3(self):
266 # Testing that make_parser can handle different types of
267 # iterables.
268 make_parser(['module'])
269 make_parser(('module', ))
270 make_parser({'module'})
271 make_parser(frozenset({'module'}))
272 make_parser({'module': None})
273 make_parser(iter(['module']))
274
275 def test_make_parser4(self):
276 # Testing that make_parser can handle empty iterables.
277 make_parser([])
278 make_parser(tuple())
279 make_parser(set())
280 make_parser(frozenset())
281 make_parser({})
282 make_parser(iter([]))
283
284 def test_make_parser5(self):
285 # Testing that make_parser can handle iterables with more than
286 # one item.
287 make_parser(['module1', 'module2'])
288 make_parser(('module1', 'module2'))
289 make_parser({'module1', 'module2'})
290 make_parser(frozenset({'module1', 'module2'}))
291 make_parser({'module1': None, 'module2': None})
292 make_parser(iter(['module1', 'module2']))
Tim Petersd2bf3b72001-01-18 02:22:22 +0000293
Lars Gustäbel96753b32000-09-24 12:24:24 +0000294# ===========================================================================
295#
296# saxutils tests
297#
298# ===========================================================================
299
Guido van Rossumd8faa362007-04-27 19:54:29 +0000300class SaxutilsTest(unittest.TestCase):
301 # ===== escape
302 def test_escape_basic(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000303 self.assertEqual(escape("Donald Duck & Co"), "Donald Duck &amp; Co")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000304
Guido van Rossumd8faa362007-04-27 19:54:29 +0000305 def test_escape_all(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000306 self.assertEqual(escape("<Donald Duck & Co>"),
307 "&lt;Donald Duck &amp; Co&gt;")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000308
Guido van Rossumd8faa362007-04-27 19:54:29 +0000309 def test_escape_extra(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000310 self.assertEqual(escape("Hei på deg", {"å" : "&aring;"}),
311 "Hei p&aring; deg")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000312
Guido van Rossumd8faa362007-04-27 19:54:29 +0000313 # ===== unescape
314 def test_unescape_basic(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000315 self.assertEqual(unescape("Donald Duck &amp; Co"), "Donald Duck & Co")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000316
Guido van Rossumd8faa362007-04-27 19:54:29 +0000317 def test_unescape_all(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000318 self.assertEqual(unescape("&lt;Donald Duck &amp; Co&gt;"),
319 "<Donald Duck & Co>")
Martin v. Löwis74b51ac2002-10-26 14:50:45 +0000320
Guido van Rossumd8faa362007-04-27 19:54:29 +0000321 def test_unescape_extra(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000322 self.assertEqual(unescape("Hei på deg", {"å" : "&aring;"}),
323 "Hei p&aring; deg")
Martin v. Löwis74b51ac2002-10-26 14:50:45 +0000324
Guido van Rossumd8faa362007-04-27 19:54:29 +0000325 def test_unescape_amp_extra(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000326 self.assertEqual(unescape("&amp;foo;", {"&foo;": "splat"}), "&foo;")
Martin v. Löwis74b51ac2002-10-26 14:50:45 +0000327
Guido van Rossumd8faa362007-04-27 19:54:29 +0000328 # ===== quoteattr
329 def test_quoteattr_basic(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000330 self.assertEqual(quoteattr("Donald Duck & Co"),
331 '"Donald Duck &amp; Co"')
Martin v. Löwis74b51ac2002-10-26 14:50:45 +0000332
Guido van Rossumd8faa362007-04-27 19:54:29 +0000333 def test_single_quoteattr(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000334 self.assertEqual(quoteattr('Includes "double" quotes'),
335 '\'Includes "double" quotes\'')
Fred Drake32f3add2002-10-28 17:58:48 +0000336
Guido van Rossumd8faa362007-04-27 19:54:29 +0000337 def test_double_quoteattr(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000338 self.assertEqual(quoteattr("Includes 'single' quotes"),
339 "\"Includes 'single' quotes\"")
Fred Drakeacd32d32001-07-19 16:10:15 +0000340
Guido van Rossumd8faa362007-04-27 19:54:29 +0000341 def test_single_double_quoteattr(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000342 self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
343 "\"Includes 'single' and &quot;double&quot; quotes\"")
Fred Drakeacd32d32001-07-19 16:10:15 +0000344
Guido van Rossumd8faa362007-04-27 19:54:29 +0000345 # ===== make_parser
346 def test_make_parser(self):
Martin v. Löwis962c9e72000-10-06 17:41:52 +0000347 # Creating a parser should succeed - it should fall back
348 # to the expatreader
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000349 p = make_parser(['xml.parsers.no_such_parser'])
Martin v. Löwis962c9e72000-10-06 17:41:52 +0000350
351
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300352class PrepareInputSourceTest(unittest.TestCase):
353
354 def setUp(self):
Hai Shibb0424b2020-08-04 00:47:42 +0800355 self.file = os_helper.TESTFN
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300356 with open(self.file, "w") as tmp:
357 tmp.write("This was read from a file.")
358
359 def tearDown(self):
Hai Shibb0424b2020-08-04 00:47:42 +0800360 os_helper.unlink(self.file)
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300361
362 def make_byte_stream(self):
363 return BytesIO(b"This is a byte stream.")
364
Serhiy Storchaka61de0872015-04-02 21:00:13 +0300365 def make_character_stream(self):
366 return StringIO("This is a character stream.")
367
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300368 def checkContent(self, stream, content):
369 self.assertIsNotNone(stream)
370 self.assertEqual(stream.read(), content)
371 stream.close()
372
373
Serhiy Storchaka61de0872015-04-02 21:00:13 +0300374 def test_character_stream(self):
375 # If the source is an InputSource with a character stream, use it.
376 src = InputSource(self.file)
377 src.setCharacterStream(self.make_character_stream())
378 prep = prepare_input_source(src)
379 self.assertIsNone(prep.getByteStream())
380 self.checkContent(prep.getCharacterStream(),
381 "This is a character stream.")
382
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300383 def test_byte_stream(self):
384 # If the source is an InputSource that does not have a character
385 # stream but does have a byte stream, use the byte stream.
386 src = InputSource(self.file)
387 src.setByteStream(self.make_byte_stream())
388 prep = prepare_input_source(src)
389 self.assertIsNone(prep.getCharacterStream())
390 self.checkContent(prep.getByteStream(),
391 b"This is a byte stream.")
392
393 def test_system_id(self):
394 # If the source is an InputSource that has neither a character
395 # stream nor a byte stream, open the system ID.
396 src = InputSource(self.file)
397 prep = prepare_input_source(src)
398 self.assertIsNone(prep.getCharacterStream())
399 self.checkContent(prep.getByteStream(),
400 b"This was read from a file.")
401
402 def test_string(self):
403 # If the source is a string, use it as a system ID and open it.
404 prep = prepare_input_source(self.file)
405 self.assertIsNone(prep.getCharacterStream())
406 self.checkContent(prep.getByteStream(),
407 b"This was read from a file.")
408
Mickaël Schoentgen929b7042019-04-14 09:16:54 +0000409 def test_path_objects(self):
410 # If the source is a Path object, use it as a system ID and open it.
411 prep = prepare_input_source(FakePath(self.file))
412 self.assertIsNone(prep.getCharacterStream())
413 self.checkContent(prep.getByteStream(),
414 b"This was read from a file.")
415
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300416 def test_binary_file(self):
417 # If the source is a binary file-like object, use it as a byte
418 # stream.
419 prep = prepare_input_source(self.make_byte_stream())
420 self.assertIsNone(prep.getCharacterStream())
421 self.checkContent(prep.getByteStream(),
422 b"This is a byte stream.")
423
Serhiy Storchaka61de0872015-04-02 21:00:13 +0300424 def test_text_file(self):
425 # If the source is a text file-like object, use it as a character
426 # stream.
427 prep = prepare_input_source(self.make_character_stream())
428 self.assertIsNone(prep.getByteStream())
429 self.checkContent(prep.getCharacterStream(),
430 "This is a character stream.")
431
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300432
Lars Gustäbel96753b32000-09-24 12:24:24 +0000433# ===== XMLGenerator
434
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200435class XmlgenTest:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000436 def test_xmlgen_basic(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200437 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000438 gen = XMLGenerator(result)
439 gen.startDocument()
440 gen.startElement("doc", {})
441 gen.endElement("doc")
442 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000443
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200444 self.assertEqual(result.getvalue(), self.xml("<doc></doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000445
R. David Murraya90032a2010-10-17 22:46:45 +0000446 def test_xmlgen_basic_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200447 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000448 gen = XMLGenerator(result, short_empty_elements=True)
449 gen.startDocument()
450 gen.startElement("doc", {})
451 gen.endElement("doc")
452 gen.endDocument()
453
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200454 self.assertEqual(result.getvalue(), self.xml("<doc/>"))
R. David Murraya90032a2010-10-17 22:46:45 +0000455
Guido van Rossumd8faa362007-04-27 19:54:29 +0000456 def test_xmlgen_content(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200457 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000458 gen = XMLGenerator(result)
Fred Drake004d5e62000-10-23 17:22:08 +0000459
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 gen.startDocument()
461 gen.startElement("doc", {})
462 gen.characters("huhei")
463 gen.endElement("doc")
464 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000465
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200466 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000467
R. David Murraya90032a2010-10-17 22:46:45 +0000468 def test_xmlgen_content_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200469 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000470 gen = XMLGenerator(result, short_empty_elements=True)
471
472 gen.startDocument()
473 gen.startElement("doc", {})
474 gen.characters("huhei")
475 gen.endElement("doc")
476 gen.endDocument()
477
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200478 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
R. David Murraya90032a2010-10-17 22:46:45 +0000479
Guido van Rossumd8faa362007-04-27 19:54:29 +0000480 def test_xmlgen_pi(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200481 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000482 gen = XMLGenerator(result)
Fred Drake004d5e62000-10-23 17:22:08 +0000483
Guido van Rossumd8faa362007-04-27 19:54:29 +0000484 gen.startDocument()
485 gen.processingInstruction("test", "data")
486 gen.startElement("doc", {})
487 gen.endElement("doc")
488 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000489
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200490 self.assertEqual(result.getvalue(),
491 self.xml("<?test data?><doc></doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000492
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493 def test_xmlgen_content_escape(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200494 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000495 gen = XMLGenerator(result)
Fred Drake004d5e62000-10-23 17:22:08 +0000496
Guido van Rossumd8faa362007-04-27 19:54:29 +0000497 gen.startDocument()
498 gen.startElement("doc", {})
499 gen.characters("<huhei&")
500 gen.endElement("doc")
501 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000502
Ezio Melottib3aedd42010-11-20 19:04:17 +0000503 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200504 self.xml("<doc>&lt;huhei&amp;</doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000505
Guido van Rossumd8faa362007-04-27 19:54:29 +0000506 def test_xmlgen_attr_escape(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200507 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000508 gen = XMLGenerator(result)
Fred Drakec9fadf92001-08-07 19:17:06 +0000509
Guido van Rossumd8faa362007-04-27 19:54:29 +0000510 gen.startDocument()
511 gen.startElement("doc", {"a": '"'})
512 gen.startElement("e", {"a": "'"})
513 gen.endElement("e")
514 gen.startElement("e", {"a": "'\""})
515 gen.endElement("e")
516 gen.startElement("e", {"a": "\n\r\t"})
517 gen.endElement("e")
518 gen.endElement("doc")
519 gen.endDocument()
Fred Drakec9fadf92001-08-07 19:17:06 +0000520
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200521 self.assertEqual(result.getvalue(), self.xml(
522 "<doc a='\"'><e a=\"'\"></e>"
523 "<e a=\"'&quot;\"></e>"
524 "<e a=\"&#10;&#13;&#9;\"></e></doc>"))
525
526 def test_xmlgen_encoding(self):
527 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
528 'utf-16', 'utf-16be', 'utf-16le',
529 'utf-32', 'utf-32be', 'utf-32le')
530 for encoding in encodings:
531 result = self.ioclass()
532 gen = XMLGenerator(result, encoding=encoding)
533
534 gen.startDocument()
535 gen.startElement("doc", {"a": '\u20ac'})
536 gen.characters("\u20ac")
537 gen.endElement("doc")
538 gen.endDocument()
539
540 self.assertEqual(result.getvalue(),
541 self.xml('<doc a="\u20ac">\u20ac</doc>', encoding=encoding))
542
543 def test_xmlgen_unencodable(self):
544 result = self.ioclass()
545 gen = XMLGenerator(result, encoding='ascii')
546
547 gen.startDocument()
548 gen.startElement("doc", {"a": '\u20ac'})
549 gen.characters("\u20ac")
550 gen.endElement("doc")
551 gen.endDocument()
552
553 self.assertEqual(result.getvalue(),
554 self.xml('<doc a="&#8364;">&#8364;</doc>', encoding='ascii'))
Fred Drakec9fadf92001-08-07 19:17:06 +0000555
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556 def test_xmlgen_ignorable(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200557 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000558 gen = XMLGenerator(result)
Fred Drake004d5e62000-10-23 17:22:08 +0000559
Guido van Rossumd8faa362007-04-27 19:54:29 +0000560 gen.startDocument()
561 gen.startElement("doc", {})
562 gen.ignorableWhitespace(" ")
563 gen.endElement("doc")
564 gen.endDocument()
Lars Gustäbel96753b32000-09-24 12:24:24 +0000565
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200566 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000567
R. David Murraya90032a2010-10-17 22:46:45 +0000568 def test_xmlgen_ignorable_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200569 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000570 gen = XMLGenerator(result, short_empty_elements=True)
571
572 gen.startDocument()
573 gen.startElement("doc", {})
574 gen.ignorableWhitespace(" ")
575 gen.endElement("doc")
576 gen.endDocument()
577
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200578 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
R. David Murraya90032a2010-10-17 22:46:45 +0000579
Serhiy Storchaka3eab6b32013-05-12 17:31:16 +0300580 def test_xmlgen_encoding_bytes(self):
581 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
582 'utf-16', 'utf-16be', 'utf-16le',
583 'utf-32', 'utf-32be', 'utf-32le')
584 for encoding in encodings:
585 result = self.ioclass()
586 gen = XMLGenerator(result, encoding=encoding)
587
588 gen.startDocument()
589 gen.startElement("doc", {"a": '\u20ac'})
590 gen.characters("\u20ac".encode(encoding))
591 gen.ignorableWhitespace(" ".encode(encoding))
592 gen.endElement("doc")
593 gen.endDocument()
594
595 self.assertEqual(result.getvalue(),
596 self.xml('<doc a="\u20ac">\u20ac </doc>', encoding=encoding))
597
Guido van Rossumd8faa362007-04-27 19:54:29 +0000598 def test_xmlgen_ns(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200599 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000600 gen = XMLGenerator(result)
Lars Gustäbel96753b32000-09-24 12:24:24 +0000601
Guido van Rossumd8faa362007-04-27 19:54:29 +0000602 gen.startDocument()
603 gen.startPrefixMapping("ns1", ns_uri)
604 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
605 # add an unqualified name
606 gen.startElementNS((None, "udoc"), None, {})
607 gen.endElementNS((None, "udoc"), None)
608 gen.endElementNS((ns_uri, "doc"), "ns1:doc")
609 gen.endPrefixMapping("ns1")
610 gen.endDocument()
Fred Drake004d5e62000-10-23 17:22:08 +0000611
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200612 self.assertEqual(result.getvalue(), self.xml(
613 '<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' %
Guido van Rossumd8faa362007-04-27 19:54:29 +0000614 ns_uri))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000615
R. David Murraya90032a2010-10-17 22:46:45 +0000616 def test_xmlgen_ns_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200617 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000618 gen = XMLGenerator(result, short_empty_elements=True)
619
620 gen.startDocument()
621 gen.startPrefixMapping("ns1", ns_uri)
622 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
623 # add an unqualified name
624 gen.startElementNS((None, "udoc"), None, {})
625 gen.endElementNS((None, "udoc"), None)
626 gen.endElementNS((ns_uri, "doc"), "ns1:doc")
627 gen.endPrefixMapping("ns1")
628 gen.endDocument()
629
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200630 self.assertEqual(result.getvalue(), self.xml(
631 '<ns1:doc xmlns:ns1="%s"><udoc/></ns1:doc>' %
R. David Murraya90032a2010-10-17 22:46:45 +0000632 ns_uri))
633
Guido van Rossumd8faa362007-04-27 19:54:29 +0000634 def test_1463026_1(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200635 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000636 gen = XMLGenerator(result)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000637
Guido van Rossumd8faa362007-04-27 19:54:29 +0000638 gen.startDocument()
639 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
640 gen.endElementNS((None, 'a'), 'a')
641 gen.endDocument()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000642
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200643 self.assertEqual(result.getvalue(), self.xml('<a b="c"></a>'))
Thomas Wouterscf297e42007-02-23 15:07:44 +0000644
R. David Murraya90032a2010-10-17 22:46:45 +0000645 def test_1463026_1_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200646 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000647 gen = XMLGenerator(result, short_empty_elements=True)
648
649 gen.startDocument()
650 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
651 gen.endElementNS((None, 'a'), 'a')
652 gen.endDocument()
653
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200654 self.assertEqual(result.getvalue(), self.xml('<a b="c"/>'))
R. David Murraya90032a2010-10-17 22:46:45 +0000655
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656 def test_1463026_2(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200657 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000658 gen = XMLGenerator(result)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000659
Guido van Rossumd8faa362007-04-27 19:54:29 +0000660 gen.startDocument()
661 gen.startPrefixMapping(None, 'qux')
662 gen.startElementNS(('qux', 'a'), 'a', {})
663 gen.endElementNS(('qux', 'a'), 'a')
664 gen.endPrefixMapping(None)
665 gen.endDocument()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000666
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200667 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"></a>'))
Thomas Wouterscf297e42007-02-23 15:07:44 +0000668
R. David Murraya90032a2010-10-17 22:46:45 +0000669 def test_1463026_2_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200670 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000671 gen = XMLGenerator(result, short_empty_elements=True)
672
673 gen.startDocument()
674 gen.startPrefixMapping(None, 'qux')
675 gen.startElementNS(('qux', 'a'), 'a', {})
676 gen.endElementNS(('qux', 'a'), 'a')
677 gen.endPrefixMapping(None)
678 gen.endDocument()
679
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200680 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"/>'))
R. David Murraya90032a2010-10-17 22:46:45 +0000681
Guido van Rossumd8faa362007-04-27 19:54:29 +0000682 def test_1463026_3(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200683 result = self.ioclass()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000684 gen = XMLGenerator(result)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000685
Guido van Rossumd8faa362007-04-27 19:54:29 +0000686 gen.startDocument()
687 gen.startPrefixMapping('my', 'qux')
688 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
689 gen.endElementNS(('qux', 'a'), 'a')
690 gen.endPrefixMapping('my')
691 gen.endDocument()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000692
Ezio Melottib3aedd42010-11-20 19:04:17 +0000693 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200694 self.xml('<my:a xmlns:my="qux" b="c"></my:a>'))
Lars Gustäbel96753b32000-09-24 12:24:24 +0000695
R. David Murraya90032a2010-10-17 22:46:45 +0000696 def test_1463026_3_empty(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200697 result = self.ioclass()
R. David Murraya90032a2010-10-17 22:46:45 +0000698 gen = XMLGenerator(result, short_empty_elements=True)
699
700 gen.startDocument()
701 gen.startPrefixMapping('my', 'qux')
702 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
703 gen.endElementNS(('qux', 'a'), 'a')
704 gen.endPrefixMapping('my')
705 gen.endDocument()
706
Ezio Melottib3aedd42010-11-20 19:04:17 +0000707 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200708 self.xml('<my:a xmlns:my="qux" b="c"/>'))
R. David Murraya90032a2010-10-17 22:46:45 +0000709
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000710 def test_5027_1(self):
711 # The xml prefix (as in xml:lang below) is reserved and bound by
712 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
Andrew Svetlov737fb892012-12-18 21:14:22 +0200713 # a bug whereby a KeyError is raised because this namespace is missing
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000714 # from a dictionary.
715 #
716 # This test demonstrates the bug by parsing a document.
717 test_xml = StringIO(
718 '<?xml version="1.0"?>'
719 '<a:g1 xmlns:a="http://example.com/ns">'
720 '<a:g2 xml:lang="en">Hello</a:g2>'
721 '</a:g1>')
722
723 parser = make_parser()
724 parser.setFeature(feature_namespaces, True)
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200725 result = self.ioclass()
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000726 gen = XMLGenerator(result)
727 parser.setContentHandler(gen)
728 parser.parse(test_xml)
729
Ezio Melottib3aedd42010-11-20 19:04:17 +0000730 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200731 self.xml(
Ezio Melottib3aedd42010-11-20 19:04:17 +0000732 '<a:g1 xmlns:a="http://example.com/ns">'
733 '<a:g2 xml:lang="en">Hello</a:g2>'
734 '</a:g1>'))
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000735
736 def test_5027_2(self):
737 # The xml prefix (as in xml:lang below) is reserved and bound by
738 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had
Andrew Svetlov737fb892012-12-18 21:14:22 +0200739 # a bug whereby a KeyError is raised because this namespace is missing
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000740 # from a dictionary.
741 #
742 # This test demonstrates the bug by direct manipulation of the
743 # XMLGenerator.
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200744 result = self.ioclass()
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000745 gen = XMLGenerator(result)
746
747 gen.startDocument()
748 gen.startPrefixMapping('a', 'http://example.com/ns')
749 gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {})
750 lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'}
751 gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr)
752 gen.characters('Hello')
753 gen.endElementNS(('http://example.com/ns', 'g2'), 'g2')
754 gen.endElementNS(('http://example.com/ns', 'g1'), 'g1')
755 gen.endPrefixMapping('a')
756 gen.endDocument()
757
Ezio Melottib3aedd42010-11-20 19:04:17 +0000758 self.assertEqual(result.getvalue(),
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200759 self.xml(
Ezio Melottib3aedd42010-11-20 19:04:17 +0000760 '<a:g1 xmlns:a="http://example.com/ns">'
761 '<a:g2 xml:lang="en">Hello</a:g2>'
762 '</a:g1>'))
Antoine Pitrou6b03ee62010-10-27 18:33:30 +0000763
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200764 def test_no_close_file(self):
765 result = self.ioclass()
766 def func(out):
767 gen = XMLGenerator(out)
768 gen.startDocument()
769 gen.startElement("doc", {})
770 func(result)
771 self.assertFalse(result.closed)
772
Serhiy Storchakaa5f13d22013-02-25 13:46:10 +0200773 def test_xmlgen_fragment(self):
774 result = self.ioclass()
775 gen = XMLGenerator(result)
776
777 # Don't call gen.startDocument()
778 gen.startElement("foo", {"a": "1.0"})
779 gen.characters("Hello")
780 gen.endElement("foo")
781 gen.startElement("bar", {"b": "2.0"})
782 gen.endElement("bar")
783 # Don't call gen.endDocument()
784
785 self.assertEqual(result.getvalue(),
786 self.xml('<foo a="1.0">Hello</foo><bar b="2.0"></bar>')[len(self.xml('')):])
787
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200788class StringXmlgenTest(XmlgenTest, unittest.TestCase):
789 ioclass = StringIO
790
791 def xml(self, doc, encoding='iso-8859-1'):
792 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
793
794 test_xmlgen_unencodable = None
795
796class BytesXmlgenTest(XmlgenTest, unittest.TestCase):
797 ioclass = BytesIO
798
799 def xml(self, doc, encoding='iso-8859-1'):
800 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
801 (encoding, doc)).encode(encoding, 'xmlcharrefreplace')
802
803class WriterXmlgenTest(BytesXmlgenTest):
804 class ioclass(list):
805 write = list.append
806 closed = False
807
808 def seekable(self):
809 return True
810
811 def tell(self):
812 # return 0 at start and not 0 after start
813 return len(self)
814
815 def getvalue(self):
816 return b''.join(self)
817
Georg Brandlc502df42013-05-12 11:41:12 +0200818class StreamWriterXmlgenTest(XmlgenTest, unittest.TestCase):
819 def ioclass(self):
820 raw = BytesIO()
821 writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace')
822 writer.getvalue = raw.getvalue
823 return writer
824
825 def xml(self, doc, encoding='iso-8859-1'):
826 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
827 (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
828
829class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase):
Hai Shibb0424b2020-08-04 00:47:42 +0800830 fname = os_helper.TESTFN + '-codecs'
Georg Brandlc502df42013-05-12 11:41:12 +0200831
832 def ioclass(self):
833 writer = codecs.open(self.fname, 'w', encoding='ascii',
834 errors='xmlcharrefreplace', buffering=0)
Antoine Pitrou2adb6fe2013-05-13 22:34:21 +0200835 def cleanup():
836 writer.close()
Hai Shibb0424b2020-08-04 00:47:42 +0800837 os_helper.unlink(self.fname)
Antoine Pitrou2adb6fe2013-05-13 22:34:21 +0200838 self.addCleanup(cleanup)
Richard Oudkerk90a24272013-05-18 18:11:30 +0100839 def getvalue():
840 # Windows will not let use reopen without first closing
841 writer.close()
842 with open(writer.name, 'rb') as f:
843 return f.read()
844 writer.getvalue = getvalue
Georg Brandlc502df42013-05-12 11:41:12 +0200845 return writer
846
Georg Brandlc502df42013-05-12 11:41:12 +0200847 def xml(self, doc, encoding='iso-8859-1'):
848 return ('<?xml version="1.0" encoding="%s"?>\n%s' %
849 (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200850
851start = b'<?xml version="1.0" encoding="iso-8859-1"?>\n'
852
Fred Drake004d5e62000-10-23 17:22:08 +0000853
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854class XMLFilterBaseTest(unittest.TestCase):
855 def test_filter_basic(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200856 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857 gen = XMLGenerator(result)
858 filter = XMLFilterBase()
859 filter.setContentHandler(gen)
Lars Gustäbel96753b32000-09-24 12:24:24 +0000860
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 filter.startDocument()
862 filter.startElement("doc", {})
863 filter.characters("content")
864 filter.ignorableWhitespace(" ")
865 filter.endElement("doc")
866 filter.endDocument()
867
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200868 self.assertEqual(result.getvalue(), start + b"<doc>content </doc>")
Lars Gustäbel96753b32000-09-24 12:24:24 +0000869
870# ===========================================================================
871#
872# expatreader tests
873#
874# ===========================================================================
875
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200876with open(TEST_XMLFILE_OUT, 'rb') as f:
Benjamin Petersond5df36d2010-10-31 18:23:23 +0000877 xml_test_out = f.read()
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000878
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879class ExpatReaderTest(XmlTestBase):
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000880
Guido van Rossumd8faa362007-04-27 19:54:29 +0000881 # ===== XMLReader support
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000882
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300883 def test_expat_binary_file(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200885 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886 xmlgen = XMLGenerator(result)
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000887
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888 parser.setContentHandler(xmlgen)
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200889 with open(TEST_XMLFILE, 'rb') as f:
Benjamin Petersond5df36d2010-10-31 18:23:23 +0000890 parser.parse(f)
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000891
Ezio Melottib3aedd42010-11-20 19:04:17 +0000892 self.assertEqual(result.getvalue(), xml_test_out)
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000893
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300894 def test_expat_text_file(self):
895 parser = create_parser()
896 result = BytesIO()
897 xmlgen = XMLGenerator(result)
898
899 parser.setContentHandler(xmlgen)
900 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
901 parser.parse(f)
902
903 self.assertEqual(result.getvalue(), xml_test_out)
904
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +0200905 @requires_nonascii_filenames
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300906 def test_expat_binary_file_nonascii(self):
Hai Shibb0424b2020-08-04 00:47:42 +0800907 fname = os_helper.TESTFN_UNICODE
Serhiy Storchakad5202392013-02-02 10:31:17 +0200908 shutil.copyfile(TEST_XMLFILE, fname)
Hai Shibb0424b2020-08-04 00:47:42 +0800909 self.addCleanup(os_helper.unlink, fname)
Serhiy Storchakad5202392013-02-02 10:31:17 +0200910
911 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +0200912 result = BytesIO()
Serhiy Storchakad5202392013-02-02 10:31:17 +0200913 xmlgen = XMLGenerator(result)
914
915 parser.setContentHandler(xmlgen)
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +0300916 parser.parse(open(fname, 'rb'))
Serhiy Storchakad5202392013-02-02 10:31:17 +0200917
918 self.assertEqual(result.getvalue(), xml_test_out)
919
Serhiy Storchakafc8e9b02014-11-27 22:13:16 +0200920 def test_expat_binary_file_bytes_name(self):
921 fname = os.fsencode(TEST_XMLFILE)
922 parser = create_parser()
923 result = BytesIO()
924 xmlgen = XMLGenerator(result)
925
926 parser.setContentHandler(xmlgen)
927 with open(fname, 'rb') as f:
928 parser.parse(f)
929
930 self.assertEqual(result.getvalue(), xml_test_out)
931
932 def test_expat_binary_file_int_name(self):
933 parser = create_parser()
934 result = BytesIO()
935 xmlgen = XMLGenerator(result)
936
937 parser.setContentHandler(xmlgen)
938 with open(TEST_XMLFILE, 'rb') as f:
939 with open(f.fileno(), 'rb', closefd=False) as f2:
940 parser.parse(f2)
941
942 self.assertEqual(result.getvalue(), xml_test_out)
943
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944 # ===== DTDHandler support
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000945
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 class TestDTDHandler:
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000947
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948 def __init__(self):
949 self._notations = []
950 self._entities = []
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000951
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952 def notationDecl(self, name, publicId, systemId):
953 self._notations.append((name, publicId, systemId))
Lars Gustäbelb7536d52000-09-24 18:53:56 +0000954
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
956 self._entities.append((name, publicId, systemId, ndata))
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000957
Christian Heimes17b1d5d2018-09-23 09:50:25 +0200958
959 class TestEntityRecorder:
960 def __init__(self):
961 self.entities = []
962
963 def resolveEntity(self, publicId, systemId):
964 self.entities.append((publicId, systemId))
965 source = InputSource()
966 source.setPublicId(publicId)
967 source.setSystemId(systemId)
968 return source
969
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970 def test_expat_dtdhandler(self):
971 parser = create_parser()
972 handler = self.TestDTDHandler()
973 parser.setDTDHandler(handler)
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000974
Guido van Rossumd8faa362007-04-27 19:54:29 +0000975 parser.feed('<!DOCTYPE doc [\n')
976 parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
977 parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
978 parser.feed(']>\n')
979 parser.feed('<doc></doc>')
980 parser.close()
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000981
Ezio Melottib3aedd42010-11-20 19:04:17 +0000982 self.assertEqual(handler._notations,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000983 [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
Ezio Melottib3aedd42010-11-20 19:04:17 +0000984 self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
Lars Gustäbel2fc52942000-10-24 15:35:07 +0000985
Christian Heimes17b1d5d2018-09-23 09:50:25 +0200986 def test_expat_external_dtd_enabled(self):
Victor Stinner7cb92042019-07-02 14:50:19 +0200987 # clear _opener global variable
988 self.addCleanup(urllib.request.urlcleanup)
989
Christian Heimes17b1d5d2018-09-23 09:50:25 +0200990 parser = create_parser()
991 parser.setFeature(feature_external_ges, True)
992 resolver = self.TestEntityRecorder()
993 parser.setEntityResolver(resolver)
994
995 with self.assertRaises(URLError):
996 parser.feed(
997 '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
998 )
999 self.assertEqual(
1000 resolver.entities, [(None, 'unsupported://non-existing')]
1001 )
1002
1003 def test_expat_external_dtd_default(self):
1004 parser = create_parser()
1005 resolver = self.TestEntityRecorder()
1006 parser.setEntityResolver(resolver)
1007
1008 parser.feed(
1009 '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
1010 )
1011 parser.feed('<doc />')
1012 parser.close()
1013 self.assertEqual(resolver.entities, [])
1014
Guido van Rossumd8faa362007-04-27 19:54:29 +00001015 # ===== EntityResolver support
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001016
Guido van Rossumd8faa362007-04-27 19:54:29 +00001017 class TestEntityResolver:
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001018
Guido van Rossumd8faa362007-04-27 19:54:29 +00001019 def resolveEntity(self, publicId, systemId):
1020 inpsrc = InputSource()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001021 inpsrc.setByteStream(BytesIO(b"<entity/>"))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001022 return inpsrc
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001023
Christian Heimes17b1d5d2018-09-23 09:50:25 +02001024 def test_expat_entityresolver_enabled(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001025 parser = create_parser()
Christian Heimes17b1d5d2018-09-23 09:50:25 +02001026 parser.setFeature(feature_external_ges, True)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001027 parser.setEntityResolver(self.TestEntityResolver())
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001028 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001029 parser.setContentHandler(XMLGenerator(result))
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001030
Guido van Rossumd8faa362007-04-27 19:54:29 +00001031 parser.feed('<!DOCTYPE doc [\n')
1032 parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
1033 parser.feed(']>\n')
1034 parser.feed('<doc>&test;</doc>')
1035 parser.close()
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001036
Ezio Melottib3aedd42010-11-20 19:04:17 +00001037 self.assertEqual(result.getvalue(), start +
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001038 b"<doc><entity></entity></doc>")
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001039
Christian Heimes17b1d5d2018-09-23 09:50:25 +02001040 def test_expat_entityresolver_default(self):
1041 parser = create_parser()
1042 self.assertEqual(parser.getFeature(feature_external_ges), False)
1043 parser.setEntityResolver(self.TestEntityResolver())
1044 result = BytesIO()
1045 parser.setContentHandler(XMLGenerator(result))
1046
1047 parser.feed('<!DOCTYPE doc [\n')
1048 parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
1049 parser.feed(']>\n')
1050 parser.feed('<doc>&test;</doc>')
1051 parser.close()
1052
1053 self.assertEqual(result.getvalue(), start +
1054 b"<doc></doc>")
1055
Guido van Rossumd8faa362007-04-27 19:54:29 +00001056 # ===== Attributes support
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001057
Guido van Rossumd8faa362007-04-27 19:54:29 +00001058 class AttrGatherer(ContentHandler):
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001059
Guido van Rossumd8faa362007-04-27 19:54:29 +00001060 def startElement(self, name, attrs):
1061 self._attrs = attrs
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001062
Guido van Rossumd8faa362007-04-27 19:54:29 +00001063 def startElementNS(self, name, qname, attrs):
1064 self._attrs = attrs
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001065
Guido van Rossumd8faa362007-04-27 19:54:29 +00001066 def test_expat_attrs_empty(self):
1067 parser = create_parser()
1068 gather = self.AttrGatherer()
1069 parser.setContentHandler(gather)
1070
1071 parser.feed("<doc/>")
1072 parser.close()
1073
1074 self.verify_empty_attrs(gather._attrs)
1075
1076 def test_expat_attrs_wattr(self):
1077 parser = create_parser()
1078 gather = self.AttrGatherer()
1079 parser.setContentHandler(gather)
1080
1081 parser.feed("<doc attr='val'/>")
1082 parser.close()
1083
1084 self.verify_attrs_wattr(gather._attrs)
1085
1086 def test_expat_nsattrs_empty(self):
1087 parser = create_parser(1)
1088 gather = self.AttrGatherer()
1089 parser.setContentHandler(gather)
1090
1091 parser.feed("<doc/>")
1092 parser.close()
1093
1094 self.verify_empty_nsattrs(gather._attrs)
1095
1096 def test_expat_nsattrs_wattr(self):
1097 parser = create_parser(1)
1098 gather = self.AttrGatherer()
1099 parser.setContentHandler(gather)
1100
1101 parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri)
1102 parser.close()
1103
1104 attrs = gather._attrs
1105
Ezio Melottib3aedd42010-11-20 19:04:17 +00001106 self.assertEqual(attrs.getLength(), 1)
1107 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001108 self.assertTrue((attrs.getQNames() == [] or
1109 attrs.getQNames() == ["ns:attr"]))
Ezio Melottib3aedd42010-11-20 19:04:17 +00001110 self.assertEqual(len(attrs), 1)
Benjamin Peterson577473f2010-01-19 00:09:57 +00001111 self.assertIn((ns_uri, "attr"), attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001112 self.assertEqual(attrs.get((ns_uri, "attr")), "val")
1113 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
1114 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
1115 self.assertEqual(list(attrs.values()), ["val"])
1116 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
1117 self.assertEqual(attrs[(ns_uri, "attr")], "val")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001118
1119 # ===== InputSource support
1120
Benjamin Petersona7f4f5a2008-09-04 02:22:52 +00001121 def test_expat_inpsource_filename(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001123 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001124 xmlgen = XMLGenerator(result)
1125
1126 parser.setContentHandler(xmlgen)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 parser.parse(TEST_XMLFILE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001128
Ezio Melottib3aedd42010-11-20 19:04:17 +00001129 self.assertEqual(result.getvalue(), xml_test_out)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001130
Benjamin Petersona7f4f5a2008-09-04 02:22:52 +00001131 def test_expat_inpsource_sysid(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001132 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001133 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001134 xmlgen = XMLGenerator(result)
1135
1136 parser.setContentHandler(xmlgen)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001137 parser.parse(InputSource(TEST_XMLFILE))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001138
Ezio Melottib3aedd42010-11-20 19:04:17 +00001139 self.assertEqual(result.getvalue(), xml_test_out)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001140
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +02001141 @requires_nonascii_filenames
Serhiy Storchakad5202392013-02-02 10:31:17 +02001142 def test_expat_inpsource_sysid_nonascii(self):
Hai Shibb0424b2020-08-04 00:47:42 +08001143 fname = os_helper.TESTFN_UNICODE
Serhiy Storchakad5202392013-02-02 10:31:17 +02001144 shutil.copyfile(TEST_XMLFILE, fname)
Hai Shibb0424b2020-08-04 00:47:42 +08001145 self.addCleanup(os_helper.unlink, fname)
Serhiy Storchakad5202392013-02-02 10:31:17 +02001146
1147 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001148 result = BytesIO()
Serhiy Storchakad5202392013-02-02 10:31:17 +02001149 xmlgen = XMLGenerator(result)
1150
1151 parser.setContentHandler(xmlgen)
1152 parser.parse(InputSource(fname))
1153
1154 self.assertEqual(result.getvalue(), xml_test_out)
1155
Serhiy Storchakaaa9563c2015-04-02 20:55:59 +03001156 def test_expat_inpsource_byte_stream(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001157 parser = create_parser()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001158 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159 xmlgen = XMLGenerator(result)
1160
1161 parser.setContentHandler(xmlgen)
1162 inpsrc = InputSource()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001163 with open(TEST_XMLFILE, 'rb') as f:
Benjamin Petersond5df36d2010-10-31 18:23:23 +00001164 inpsrc.setByteStream(f)
1165 parser.parse(inpsrc)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001166
Ezio Melottib3aedd42010-11-20 19:04:17 +00001167 self.assertEqual(result.getvalue(), xml_test_out)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001168
Serhiy Storchaka61de0872015-04-02 21:00:13 +03001169 def test_expat_inpsource_character_stream(self):
1170 parser = create_parser()
1171 result = BytesIO()
1172 xmlgen = XMLGenerator(result)
1173
1174 parser.setContentHandler(xmlgen)
1175 inpsrc = InputSource()
1176 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
1177 inpsrc.setCharacterStream(f)
1178 parser.parse(inpsrc)
1179
1180 self.assertEqual(result.getvalue(), xml_test_out)
1181
Guido van Rossumd8faa362007-04-27 19:54:29 +00001182 # ===== IncrementalParser support
1183
1184 def test_expat_incremental(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001185 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001186 xmlgen = XMLGenerator(result)
1187 parser = create_parser()
1188 parser.setContentHandler(xmlgen)
1189
1190 parser.feed("<doc>")
1191 parser.feed("</doc>")
1192 parser.close()
1193
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001194 self.assertEqual(result.getvalue(), start + b"<doc></doc>")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001195
1196 def test_expat_incremental_reset(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001197 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001198 xmlgen = XMLGenerator(result)
1199 parser = create_parser()
1200 parser.setContentHandler(xmlgen)
1201
1202 parser.feed("<doc>")
1203 parser.feed("text")
1204
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001205 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001206 xmlgen = XMLGenerator(result)
1207 parser.setContentHandler(xmlgen)
1208 parser.reset()
1209
1210 parser.feed("<doc>")
1211 parser.feed("text")
1212 parser.feed("</doc>")
1213 parser.close()
1214
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001215 self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216
1217 # ===== Locator support
1218
1219 def test_expat_locator_noinfo(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001220 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001221 xmlgen = XMLGenerator(result)
1222 parser = create_parser()
1223 parser.setContentHandler(xmlgen)
1224
1225 parser.feed("<doc>")
1226 parser.feed("</doc>")
1227 parser.close()
1228
Ezio Melottib3aedd42010-11-20 19:04:17 +00001229 self.assertEqual(parser.getSystemId(), None)
1230 self.assertEqual(parser.getPublicId(), None)
1231 self.assertEqual(parser.getLineNumber(), 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001232
Benjamin Petersona7f4f5a2008-09-04 02:22:52 +00001233 def test_expat_locator_withinfo(self):
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001234 result = BytesIO()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001235 xmlgen = XMLGenerator(result)
1236 parser = create_parser()
1237 parser.setContentHandler(xmlgen)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001238 parser.parse(TEST_XMLFILE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001239
Ezio Melottib3aedd42010-11-20 19:04:17 +00001240 self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
1241 self.assertEqual(parser.getPublicId(), None)
Lars Gustäbel2fc52942000-10-24 15:35:07 +00001242
Serhiy Storchaka1a4ed4c2013-02-02 12:17:05 +02001243 @requires_nonascii_filenames
Serhiy Storchakad5202392013-02-02 10:31:17 +02001244 def test_expat_locator_withinfo_nonascii(self):
Hai Shibb0424b2020-08-04 00:47:42 +08001245 fname = os_helper.TESTFN_UNICODE
Serhiy Storchakad5202392013-02-02 10:31:17 +02001246 shutil.copyfile(TEST_XMLFILE, fname)
Hai Shibb0424b2020-08-04 00:47:42 +08001247 self.addCleanup(os_helper.unlink, fname)
Serhiy Storchakad5202392013-02-02 10:31:17 +02001248
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001249 result = BytesIO()
Serhiy Storchakad5202392013-02-02 10:31:17 +02001250 xmlgen = XMLGenerator(result)
1251 parser = create_parser()
1252 parser.setContentHandler(xmlgen)
1253 parser.parse(fname)
1254
1255 self.assertEqual(parser.getSystemId(), fname)
1256 self.assertEqual(parser.getPublicId(), None)
1257
Martin v. Löwis80670bc2000-10-06 21:13:23 +00001258
1259# ===========================================================================
1260#
1261# error reporting
1262#
1263# ===========================================================================
1264
Guido van Rossumd8faa362007-04-27 19:54:29 +00001265class ErrorReportingTest(unittest.TestCase):
1266 def test_expat_inpsource_location(self):
1267 parser = create_parser()
1268 parser.setContentHandler(ContentHandler()) # do nothing
1269 source = InputSource()
Serhiy Storchaka88efc522013-02-10 14:29:52 +02001270 source.setByteStream(BytesIO(b"<foo bar foobar>")) #ill-formed
Guido van Rossumd8faa362007-04-27 19:54:29 +00001271 name = "a file name"
1272 source.setSystemId(name)
1273 try:
1274 parser.parse(source)
1275 self.fail()
1276 except SAXException as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +00001277 self.assertEqual(e.getSystemId(), name)
Martin v. Löwis80670bc2000-10-06 21:13:23 +00001278
Guido van Rossumd8faa362007-04-27 19:54:29 +00001279 def test_expat_incomplete(self):
1280 parser = create_parser()
1281 parser.setContentHandler(ContentHandler()) # do nothing
1282 self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>"))
Serhiy Storchakaab914782015-05-06 09:36:06 +03001283 self.assertEqual(parser.getColumnNumber(), 5)
1284 self.assertEqual(parser.getLineNumber(), 1)
Martin v. Löwis80670bc2000-10-06 21:13:23 +00001285
Guido van Rossumd8faa362007-04-27 19:54:29 +00001286 def test_sax_parse_exception_str(self):
1287 # pass various values from a locator to the SAXParseException to
1288 # make sure that the __str__() doesn't fall apart when None is
1289 # passed instead of an integer line and column number
1290 #
1291 # use "normal" values for the locator:
1292 str(SAXParseException("message", None,
1293 self.DummyLocator(1, 1)))
1294 # use None for the line number:
1295 str(SAXParseException("message", None,
1296 self.DummyLocator(None, 1)))
1297 # use None for the column number:
1298 str(SAXParseException("message", None,
1299 self.DummyLocator(1, None)))
1300 # use None for both:
1301 str(SAXParseException("message", None,
1302 self.DummyLocator(None, None)))
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001303
Guido van Rossumd8faa362007-04-27 19:54:29 +00001304 class DummyLocator:
1305 def __init__(self, lineno, colno):
1306 self._lineno = lineno
1307 self._colno = colno
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001308
Guido van Rossumd8faa362007-04-27 19:54:29 +00001309 def getPublicId(self):
1310 return "pubid"
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001311
Guido van Rossumd8faa362007-04-27 19:54:29 +00001312 def getSystemId(self):
1313 return "sysid"
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001314
Guido van Rossumd8faa362007-04-27 19:54:29 +00001315 def getLineNumber(self):
1316 return self._lineno
Fred Drake6fd0b0d2004-03-20 08:15:30 +00001317
Guido van Rossumd8faa362007-04-27 19:54:29 +00001318 def getColumnNumber(self):
1319 return self._colno
Martin v. Löwis80670bc2000-10-06 21:13:23 +00001320
Lars Gustäbelab647872000-09-24 18:40:52 +00001321# ===========================================================================
1322#
1323# xmlreader tests
1324#
1325# ===========================================================================
1326
Guido van Rossumd8faa362007-04-27 19:54:29 +00001327class XmlReaderTest(XmlTestBase):
Lars Gustäbelab647872000-09-24 18:40:52 +00001328
Guido van Rossumd8faa362007-04-27 19:54:29 +00001329 # ===== AttributesImpl
1330 def test_attrs_empty(self):
1331 self.verify_empty_attrs(AttributesImpl({}))
Lars Gustäbelab647872000-09-24 18:40:52 +00001332
Guido van Rossumd8faa362007-04-27 19:54:29 +00001333 def test_attrs_wattr(self):
1334 self.verify_attrs_wattr(AttributesImpl({"attr" : "val"}))
Lars Gustäbelab647872000-09-24 18:40:52 +00001335
Guido van Rossumd8faa362007-04-27 19:54:29 +00001336 def test_nsattrs_empty(self):
1337 self.verify_empty_nsattrs(AttributesNSImpl({}, {}))
Lars Gustäbelab647872000-09-24 18:40:52 +00001338
Guido van Rossumd8faa362007-04-27 19:54:29 +00001339 def test_nsattrs_wattr(self):
1340 attrs = AttributesNSImpl({(ns_uri, "attr") : "val"},
1341 {(ns_uri, "attr") : "ns:attr"})
Fred Drake004d5e62000-10-23 17:22:08 +00001342
Ezio Melottib3aedd42010-11-20 19:04:17 +00001343 self.assertEqual(attrs.getLength(), 1)
1344 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
1345 self.assertEqual(attrs.getQNames(), ["ns:attr"])
1346 self.assertEqual(len(attrs), 1)
Benjamin Peterson577473f2010-01-19 00:09:57 +00001347 self.assertIn((ns_uri, "attr"), attrs)
Ezio Melottib3aedd42010-11-20 19:04:17 +00001348 self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")])
1349 self.assertEqual(attrs.get((ns_uri, "attr")), "val")
1350 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
1351 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
1352 self.assertEqual(list(attrs.values()), ["val"])
1353 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
1354 self.assertEqual(attrs.getValueByQName("ns:attr"), "val")
1355 self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr"))
1356 self.assertEqual(attrs[(ns_uri, "attr")], "val")
1357 self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
Fred Drake004d5e62000-10-23 17:22:08 +00001358
Lars Gustäbelab647872000-09-24 18:40:52 +00001359
Zackery Spytze28b8c92020-08-09 04:50:53 -06001360class LexicalHandlerTest(unittest.TestCase):
1361 def setUp(self):
1362 self.parser = None
1363
1364 self.specified_version = '1.0'
1365 self.specified_encoding = 'UTF-8'
1366 self.specified_doctype = 'wish'
1367 self.specified_entity_names = ('nbsp', 'source', 'target')
1368 self.specified_comment = ('Comment in a DTD',
1369 'Really! You think so?')
1370 self.test_data = StringIO()
1371 self.test_data.write('<?xml version="{}" encoding="{}"?>\n'.
1372 format(self.specified_version,
1373 self.specified_encoding))
1374 self.test_data.write('<!DOCTYPE {} [\n'.
1375 format(self.specified_doctype))
1376 self.test_data.write('<!-- {} -->\n'.
1377 format(self.specified_comment[0]))
1378 self.test_data.write('<!ELEMENT {} (to,from,heading,body,footer)>\n'.
1379 format(self.specified_doctype))
1380 self.test_data.write('<!ELEMENT to (#PCDATA)>\n')
1381 self.test_data.write('<!ELEMENT from (#PCDATA)>\n')
1382 self.test_data.write('<!ELEMENT heading (#PCDATA)>\n')
1383 self.test_data.write('<!ELEMENT body (#PCDATA)>\n')
1384 self.test_data.write('<!ELEMENT footer (#PCDATA)>\n')
1385 self.test_data.write('<!ENTITY {} "&#xA0;">\n'.
1386 format(self.specified_entity_names[0]))
1387 self.test_data.write('<!ENTITY {} "Written by: Alexander.">\n'.
1388 format(self.specified_entity_names[1]))
1389 self.test_data.write('<!ENTITY {} "Hope it gets to: Aristotle.">\n'.
1390 format(self.specified_entity_names[2]))
1391 self.test_data.write(']>\n')
1392 self.test_data.write('<{}>'.format(self.specified_doctype))
1393 self.test_data.write('<to>Aristotle</to>\n')
1394 self.test_data.write('<from>Alexander</from>\n')
1395 self.test_data.write('<heading>Supplication</heading>\n')
1396 self.test_data.write('<body>Teach me patience!</body>\n')
1397 self.test_data.write('<footer>&{};&{};&{};</footer>\n'.
1398 format(self.specified_entity_names[1],
1399 self.specified_entity_names[0],
1400 self.specified_entity_names[2]))
1401 self.test_data.write('<!-- {} -->\n'.format(self.specified_comment[1]))
1402 self.test_data.write('</{}>\n'.format(self.specified_doctype))
1403 self.test_data.seek(0)
1404
1405 # Data received from handlers - to be validated
1406 self.version = None
1407 self.encoding = None
1408 self.standalone = None
1409 self.doctype = None
1410 self.publicID = None
1411 self.systemID = None
1412 self.end_of_dtd = False
1413 self.comments = []
1414
1415 def test_handlers(self):
1416 class TestLexicalHandler(LexicalHandler):
1417 def __init__(self, test_harness, *args, **kwargs):
1418 super().__init__(*args, **kwargs)
1419 self.test_harness = test_harness
1420
1421 def startDTD(self, doctype, publicID, systemID):
1422 self.test_harness.doctype = doctype
1423 self.test_harness.publicID = publicID
1424 self.test_harness.systemID = systemID
1425
1426 def endDTD(self):
1427 self.test_harness.end_of_dtd = True
1428
1429 def comment(self, text):
1430 self.test_harness.comments.append(text)
1431
1432 self.parser = create_parser()
1433 self.parser.setContentHandler(ContentHandler())
1434 self.parser.setProperty(
1435 'http://xml.org/sax/properties/lexical-handler',
1436 TestLexicalHandler(self))
1437 source = InputSource()
1438 source.setCharacterStream(self.test_data)
1439 self.parser.parse(source)
1440 self.assertEqual(self.doctype, self.specified_doctype)
1441 self.assertIsNone(self.publicID)
1442 self.assertIsNone(self.systemID)
1443 self.assertTrue(self.end_of_dtd)
1444 self.assertEqual(len(self.comments),
1445 len(self.specified_comment))
1446 self.assertEqual(f' {self.specified_comment[0]} ', self.comments[0])
1447
1448
1449class CDATAHandlerTest(unittest.TestCase):
1450 def setUp(self):
1451 self.parser = None
1452 self.specified_chars = []
1453 self.specified_chars.append(('Parseable character data', False))
1454 self.specified_chars.append(('<> &% - assorted other XML junk.', True))
1455 self.char_index = 0 # Used to index specified results within handlers
1456 self.test_data = StringIO()
1457 self.test_data.write('<root_doc>\n')
1458 self.test_data.write('<some_pcdata>\n')
1459 self.test_data.write(f'{self.specified_chars[0][0]}\n')
1460 self.test_data.write('</some_pcdata>\n')
1461 self.test_data.write('<some_cdata>\n')
1462 self.test_data.write(f'<![CDATA[{self.specified_chars[1][0]}]]>\n')
1463 self.test_data.write('</some_cdata>\n')
1464 self.test_data.write('</root_doc>\n')
1465 self.test_data.seek(0)
1466
1467 # Data received from handlers - to be validated
1468 self.chardata = []
1469 self.in_cdata = False
1470
1471 def test_handlers(self):
1472 class TestLexicalHandler(LexicalHandler):
1473 def __init__(self, test_harness, *args, **kwargs):
1474 super().__init__(*args, **kwargs)
1475 self.test_harness = test_harness
1476
1477 def startCDATA(self):
1478 self.test_harness.in_cdata = True
1479
1480 def endCDATA(self):
1481 self.test_harness.in_cdata = False
1482
1483 class TestCharHandler(ContentHandler):
1484 def __init__(self, test_harness, *args, **kwargs):
1485 super().__init__(*args, **kwargs)
1486 self.test_harness = test_harness
1487
1488 def characters(self, content):
1489 if content != '\n':
1490 h = self.test_harness
1491 t = h.specified_chars[h.char_index]
1492 h.assertEqual(t[0], content)
1493 h.assertEqual(t[1], h.in_cdata)
1494 h.char_index += 1
1495
1496 self.parser = create_parser()
1497 self.parser.setContentHandler(TestCharHandler(self))
1498 self.parser.setProperty(
1499 'http://xml.org/sax/properties/lexical-handler',
1500 TestLexicalHandler(self))
1501 source = InputSource()
1502 source.setCharacterStream(self.test_data)
1503 self.parser.parse(source)
1504
1505 self.assertFalse(self.in_cdata)
1506 self.assertEqual(self.char_index, 2)
1507
1508
Guido van Rossumd8faa362007-04-27 19:54:29 +00001509if __name__ == "__main__":
Serhiy Storchakabedce352021-09-19 22:36:03 +03001510 unittest.main()