blob: b2b4dea060532d8cad2db4a0f29b154a8688c41c [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02005import os
Paul Monsonf3550692019-06-19 13:09:54 -07006import platform
Serhiy Storchakade5f9f42015-09-07 22:51:56 +03007import sys
Antoine Pitrou2b3b95b2014-11-29 15:56:07 +01008import sysconfig
Guido van Rossumd8faa362007-04-27 19:54:29 +00009import unittest
Antoine Pitrou0ddbf472014-10-08 20:00:09 +020010import traceback
Guido van Rossumd8faa362007-04-27 19:54:29 +000011
Fred Drake7fbc85c2000-09-23 04:47:56 +000012from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +000013from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +000014
Zachary Ware38c707e2015-04-13 15:00:43 -050015from test.support import sortdict
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000016
17
Guido van Rossumd8faa362007-04-27 19:54:29 +000018class SetAttributeTest(unittest.TestCase):
19 def setUp(self):
20 self.parser = expat.ParserCreate(namespace_separator='!')
Serhiy Storchaka931331a2015-09-07 22:37:02 +030021
22 def test_buffer_text(self):
23 self.assertIs(self.parser.buffer_text, False)
24 for x in 0, 1, 2, 0:
25 self.parser.buffer_text = x
26 self.assertIs(self.parser.buffer_text, bool(x))
27
28 def test_namespace_prefixes(self):
29 self.assertIs(self.parser.namespace_prefixes, False)
30 for x in 0, 1, 2, 0:
31 self.parser.namespace_prefixes = x
32 self.assertIs(self.parser.namespace_prefixes, bool(x))
Fred Drake265a8042000-09-21 20:32:13 +000033
Guido van Rossumd8faa362007-04-27 19:54:29 +000034 def test_ordered_attributes(self):
Serhiy Storchaka931331a2015-09-07 22:37:02 +030035 self.assertIs(self.parser.ordered_attributes, False)
36 for x in 0, 1, 2, 0:
Guido van Rossumd8faa362007-04-27 19:54:29 +000037 self.parser.ordered_attributes = x
Serhiy Storchaka931331a2015-09-07 22:37:02 +030038 self.assertIs(self.parser.ordered_attributes, bool(x))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000039
Guido van Rossumd8faa362007-04-27 19:54:29 +000040 def test_specified_attributes(self):
Serhiy Storchaka931331a2015-09-07 22:37:02 +030041 self.assertIs(self.parser.specified_attributes, False)
42 for x in 0, 1, 2, 0:
Guido van Rossumd8faa362007-04-27 19:54:29 +000043 self.parser.specified_attributes = x
Serhiy Storchaka931331a2015-09-07 22:37:02 +030044 self.assertIs(self.parser.specified_attributes, bool(x))
45
Serhiy Storchaka931331a2015-09-07 22:37:02 +030046 def test_invalid_attributes(self):
47 with self.assertRaises(AttributeError):
48 self.parser.returns_unicode = 1
49 with self.assertRaises(AttributeError):
50 self.parser.returns_unicode
51
52 # Issue #25019
53 self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0)
54 self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0)
55 self.assertRaises(TypeError, getattr, self.parser, range(0xF))
Fred Drake8f42e2b2001-04-25 16:03:54 +000056
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000057
Guido van Rossum4ca94712007-07-23 17:42:32 +000058data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000059<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000060<?xml-stylesheet href="stylesheet.css"?>
61<!-- comment data -->
62<!DOCTYPE quotations SYSTEM "quotations.dtd" [
63<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000064<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000065<!NOTATION notation SYSTEM "notation.jpeg">
66<!ENTITY acirc "&#226;">
67<!ENTITY external_entity SYSTEM "entity.file">
68<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
69%unparsed_entity;
70]>
71
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000072<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000073<myns:subelement xmlns:myns="http://www.python.org/namespace">
74 Contents of subelements
75</myns:subelement>
76<sub2><![CDATA[contents of CDATA section]]></sub2>
77&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000078&skipped_entity;
Serhiy Storchaka43536e92013-02-04 18:26:15 +020079\xb5
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000080</root>
Fred Drake265a8042000-09-21 20:32:13 +000081'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000084# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000085class ParseTest(unittest.TestCase):
86 class Outputter:
87 def __init__(self):
88 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000089
Guido van Rossumd8faa362007-04-27 19:54:29 +000090 def StartElementHandler(self, name, attrs):
91 self.out.append('Start element: ' + repr(name) + ' ' +
92 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000093
Guido van Rossumd8faa362007-04-27 19:54:29 +000094 def EndElementHandler(self, name):
95 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000096
Guido van Rossumd8faa362007-04-27 19:54:29 +000097 def CharacterDataHandler(self, data):
98 data = data.strip()
99 if data:
100 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000101
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102 def ProcessingInstructionHandler(self, target, data):
103 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
104
105 def StartNamespaceDeclHandler(self, prefix, uri):
106 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
107
108 def EndNamespaceDeclHandler(self, prefix):
109 self.out.append('End of NS decl: ' + repr(prefix))
110
111 def StartCdataSectionHandler(self):
112 self.out.append('Start of CDATA section')
113
114 def EndCdataSectionHandler(self):
115 self.out.append('End of CDATA section')
116
117 def CommentHandler(self, text):
118 self.out.append('Comment: ' + repr(text))
119
120 def NotationDeclHandler(self, *args):
121 name, base, sysid, pubid = args
122 self.out.append('Notation declared: %s' %(args,))
123
124 def UnparsedEntityDeclHandler(self, *args):
125 entityName, base, systemId, publicId, notationName = args
126 self.out.append('Unparsed entity decl: %s' %(args,))
127
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000128 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000129 self.out.append('Not standalone')
130 return 1
131
132 def ExternalEntityRefHandler(self, *args):
133 context, base, sysId, pubId = args
134 self.out.append('External entity ref: %s' %(args[1:],))
135 return 1
136
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000137 def StartDoctypeDeclHandler(self, *args):
138 self.out.append(('Start doctype', args))
139 return 1
140
141 def EndDoctypeDeclHandler(self):
142 self.out.append("End doctype")
143 return 1
144
145 def EntityDeclHandler(self, *args):
146 self.out.append(('Entity declaration', args))
147 return 1
148
149 def XmlDeclHandler(self, *args):
150 self.out.append(('XML declaration', args))
151 return 1
152
153 def ElementDeclHandler(self, *args):
154 self.out.append(('Element declaration', args))
155 return 1
156
157 def AttlistDeclHandler(self, *args):
158 self.out.append(('Attribute list declaration', args))
159 return 1
160
161 def SkippedEntityHandler(self, *args):
162 self.out.append(("Skipped entity", args))
163 return 1
164
Guido van Rossumd8faa362007-04-27 19:54:29 +0000165 def DefaultHandler(self, userData):
166 pass
167
168 def DefaultHandlerExpand(self, userData):
169 pass
170
171 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000172 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
173 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
174 'NotationDeclHandler', 'StartNamespaceDeclHandler',
175 'EndNamespaceDeclHandler', 'CommentHandler',
176 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
177 'DefaultHandlerExpand', 'NotStandaloneHandler',
178 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
179 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
180 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181 ]
182
Antoine Pitrou452196f2011-01-05 18:44:14 +0000183 def _hookup_callbacks(self, parser, handler):
184 """
185 Set each of the callbacks defined on handler and named in
186 self.handler_names on the given parser.
187 """
188 for name in self.handler_names:
189 setattr(parser, name, getattr(handler, name))
190
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000191 def _verify_parse_output(self, operations):
192 expected_operations = [
193 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
194 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
195 "Comment: ' comment data '",
196 "Not standalone",
197 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
198 ('Element declaration', ('root', (2, 0, None, ()))),
199 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
200 1)),
201 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
202 0)),
203 "Notation declared: ('notation', None, 'notation.jpeg', None)",
204 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
205 ('Entity declaration', ('external_entity', 0, None, None,
206 'entity.file', None, None)),
207 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
208 "Not standalone",
209 "End doctype",
210 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
211 "NS decl: 'myns' 'http://www.python.org/namespace'",
212 "Start element: 'http://www.python.org/namespace!subelement' {}",
213 "Character data: 'Contents of subelements'",
214 "End element: 'http://www.python.org/namespace!subelement'",
215 "End of NS decl: 'myns'",
216 "Start element: 'sub2' {}",
217 'Start of CDATA section',
218 "Character data: 'contents of CDATA section'",
219 'End of CDATA section',
220 "End element: 'sub2'",
221 "External entity ref: (None, 'entity.file', None)",
222 ('Skipped entity', ('skipped_entity', 0)),
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200223 "Character data: '\xb5'",
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000224 "End element: 'root'",
225 ]
226 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000227 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000228
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200229 def test_parse_bytes(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000230 out = self.Outputter()
231 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000232 self._hookup_callbacks(parser, out)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000233
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300234 parser.Parse(data, True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000235
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000236 operations = out.out
237 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000238 # Issue #6697.
239 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000240
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200241 def test_parse_str(self):
242 out = self.Outputter()
243 parser = expat.ParserCreate(namespace_separator='!')
244 self._hookup_callbacks(parser, out)
245
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300246 parser.Parse(data.decode('iso-8859-1'), True)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200247
248 operations = out.out
249 self._verify_parse_output(operations)
250
Guido van Rossumd8faa362007-04-27 19:54:29 +0000251 def test_parse_file(self):
252 # Try parsing a file
253 out = self.Outputter()
254 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000255 self._hookup_callbacks(parser, out)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000256 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000257
258 parser.ParseFile(file)
259
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000260 operations = out.out
261 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000262
Ned Deilye7d532f2014-03-27 16:39:58 -0700263 def test_parse_again(self):
264 parser = expat.ParserCreate()
265 file = BytesIO(data)
266 parser.ParseFile(file)
267 # Issue 6676: ensure a meaningful exception is raised when attempting
268 # to parse more than one XML document per xmlparser instance,
269 # a limitation of the Expat library.
270 with self.assertRaises(expat.error) as cm:
271 parser.ParseFile(file)
272 self.assertEqual(expat.ErrorString(cm.exception.code),
273 expat.errors.XML_ERROR_FINISHED)
274
Guido van Rossumd8faa362007-04-27 19:54:29 +0000275class NamespaceSeparatorTest(unittest.TestCase):
276 def test_legal(self):
277 # Tests that make sure we get errors when the namespace_separator value
278 # is illegal, and that we don't for good values:
279 expat.ParserCreate()
280 expat.ParserCreate(namespace_separator=None)
281 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000282
Guido van Rossumd8faa362007-04-27 19:54:29 +0000283 def test_illegal(self):
284 try:
285 expat.ParserCreate(namespace_separator=42)
286 self.fail()
287 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000288 self.assertEqual(str(e),
Rémi Lapeyre4901fe22019-08-29 16:49:08 +0200289 "ParserCreate() argument 'namespace_separator' must be str or None, not int")
Fred Drake8f42e2b2001-04-25 16:03:54 +0000290
Guido van Rossumd8faa362007-04-27 19:54:29 +0000291 try:
292 expat.ParserCreate(namespace_separator='too long')
293 self.fail()
294 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000295 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000296 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000297
Guido van Rossumd8faa362007-04-27 19:54:29 +0000298 def test_zero_length(self):
299 # ParserCreate() needs to accept a namespace_separator of zero length
300 # to satisfy the requirements of RDF applications that are required
301 # to simply glue together the namespace URI and the localname. Though
302 # considered a wart of the RDF specifications, it needs to be supported.
303 #
304 # See XML-SIG mailing list thread starting with
305 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
306 #
307 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000308
Fred Drake2a3d7db2002-06-28 22:56:48 +0000309
Guido van Rossumd8faa362007-04-27 19:54:29 +0000310class InterningTest(unittest.TestCase):
311 def test(self):
312 # Test the interning machinery.
313 p = expat.ParserCreate()
314 L = []
315 def collector(name, *args):
316 L.append(name)
317 p.StartElementHandler = collector
318 p.EndElementHandler = collector
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300319 p.Parse(b"<e> <e/> <e></e> </e>", True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000320 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000321 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000322 for entry in L:
323 # L should have the same string repeated over and over.
324 self.assertTrue(tag is entry)
325
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000326 def test_issue9402(self):
327 # create an ExternalEntityParserCreate with buffer text
328 class ExternalOutputter:
329 def __init__(self, parser):
330 self.parser = parser
331 self.parser_result = None
332
333 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
334 external_parser = self.parser.ExternalEntityParserCreate("")
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300335 self.parser_result = external_parser.Parse(b"", True)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000336 return 1
337
338 parser = expat.ParserCreate(namespace_separator='!')
339 parser.buffer_text = 1
340 out = ExternalOutputter(parser)
341 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300342 parser.Parse(data, True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000343 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000344
Guido van Rossumd8faa362007-04-27 19:54:29 +0000345
346class BufferTextTest(unittest.TestCase):
347 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000348 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000349 self.parser = expat.ParserCreate()
350 self.parser.buffer_text = 1
351 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000352
353 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000354 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000355 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000356 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000357
358 def CharacterDataHandler(self, text):
359 self.stuff.append(text)
360
361 def StartElementHandler(self, name, attrs):
362 self.stuff.append("<%s>" % name)
363 bt = attrs.get("buffer-text")
364 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000365 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000366 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000367 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000368
369 def EndElementHandler(self, name):
370 self.stuff.append("</%s>" % name)
371
372 def CommentHandler(self, data):
373 self.stuff.append("<!--%s-->" % data)
374
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375 def setHandlers(self, handlers=[]):
376 for name in handlers:
377 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000378
Guido van Rossumd8faa362007-04-27 19:54:29 +0000379 def test_default_to_disabled(self):
380 parser = expat.ParserCreate()
381 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000382
Guido van Rossumd8faa362007-04-27 19:54:29 +0000383 def test_buffering_enabled(self):
384 # Make sure buffering is turned on
385 self.assertTrue(self.parser.buffer_text)
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300386 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000387 self.assertEqual(self.stuff, ['123'],
388 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000389
Guido van Rossumd8faa362007-04-27 19:54:29 +0000390 def test1(self):
391 # XXX This test exposes more detail of Expat's text chunking than we
392 # XXX like, but it tests what we need to concisely.
393 self.setHandlers(["StartElementHandler"])
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300394 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000395 self.assertEqual(self.stuff,
396 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
397 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000398
Guido van Rossumd8faa362007-04-27 19:54:29 +0000399 def test2(self):
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300400 self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000401 self.assertEqual(self.stuff, ["1<2> \n 3"],
402 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000403
Guido van Rossumd8faa362007-04-27 19:54:29 +0000404 def test3(self):
405 self.setHandlers(["StartElementHandler"])
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300406 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000407 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
408 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000409
Guido van Rossumd8faa362007-04-27 19:54:29 +0000410 def test4(self):
411 self.setHandlers(["StartElementHandler", "EndElementHandler"])
412 self.parser.CharacterDataHandler = None
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300413 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000414 self.assertEqual(self.stuff,
415 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000416
Guido van Rossumd8faa362007-04-27 19:54:29 +0000417 def test5(self):
418 self.setHandlers(["StartElementHandler", "EndElementHandler"])
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300419 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000420 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000421 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000422
Guido van Rossumd8faa362007-04-27 19:54:29 +0000423 def test6(self):
424 self.setHandlers(["CommentHandler", "EndElementHandler",
425 "StartElementHandler"])
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300426 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000427 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000428 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
429 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000430
Guido van Rossumd8faa362007-04-27 19:54:29 +0000431 def test7(self):
432 self.setHandlers(["CommentHandler", "EndElementHandler",
433 "StartElementHandler"])
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300434 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000435 self.assertEqual(self.stuff,
436 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
437 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
438 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000439
Fred Draked7ea55b2004-08-13 03:09:07 +0000440
441# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000442class HandlerExceptionTest(unittest.TestCase):
443 def StartElementHandler(self, name, attrs):
444 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000445
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200446 def check_traceback_entry(self, entry, filename, funcname):
447 self.assertEqual(os.path.basename(entry[0]), filename)
448 self.assertEqual(entry[2], funcname)
449
450 def test_exception(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451 parser = expat.ParserCreate()
452 parser.StartElementHandler = self.StartElementHandler
453 try:
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300454 parser.Parse(b"<a><b><c/></b></a>", True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000455 self.fail()
456 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000457 self.assertEqual(e.args[0], 'a',
458 "Expected RuntimeError for element 'a', but" + \
459 " found %r" % e.args[0])
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200460 # Check that the traceback contains the relevant line in pyexpat.c
461 entries = traceback.extract_tb(e.__traceback__)
462 self.assertEqual(len(entries), 3)
463 self.check_traceback_entry(entries[0],
464 "test_pyexpat.py", "test_exception")
465 self.check_traceback_entry(entries[1],
466 "pyexpat.c", "StartElement")
467 self.check_traceback_entry(entries[2],
468 "test_pyexpat.py", "StartElementHandler")
Paul Monsonf3550692019-06-19 13:09:54 -0700469 if sysconfig.is_python_build() and not (sys.platform == 'win32' and platform.machine() == 'ARM'):
Antoine Pitrou2b3b95b2014-11-29 15:56:07 +0100470 self.assertIn('call_with_frame("StartElement"', entries[1][3])
Fred Draked7ea55b2004-08-13 03:09:07 +0000471
Dave Cole3203efb2004-08-26 00:37:31 +0000472
473# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000474class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000475 def StartElementHandler(self, name, attrs):
476 self.check_pos('s')
477
478 def EndElementHandler(self, name):
479 self.check_pos('e')
480
481 def check_pos(self, event):
482 pos = (event,
483 self.parser.CurrentByteIndex,
484 self.parser.CurrentLineNumber,
485 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000486 self.assertTrue(self.upto < len(self.expected_list),
487 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000488 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000489 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000490 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000491 self.upto += 1
492
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493 def test(self):
494 self.parser = expat.ParserCreate()
495 self.parser.StartElementHandler = self.StartElementHandler
496 self.parser.EndElementHandler = self.EndElementHandler
497 self.upto = 0
498 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
499 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000500
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200501 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>'
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300502 self.parser.Parse(xml, True)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000503
504
Guido van Rossumd8faa362007-04-27 19:54:29 +0000505class sf1296433Test(unittest.TestCase):
506 def test_parse_only_xml_data(self):
507 # http://python.org/sf/1296433
508 #
509 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
510 # this one doesn't crash
511 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000512
Guido van Rossumd8faa362007-04-27 19:54:29 +0000513 class SpecificException(Exception):
514 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000515
Guido van Rossumd8faa362007-04-27 19:54:29 +0000516 def handler(text):
517 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000518
Guido van Rossumd8faa362007-04-27 19:54:29 +0000519 parser = expat.ParserCreate()
520 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000521
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200522 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000523
Christian Heimes2380ac72008-01-09 00:17:24 +0000524class ChardataBufferTest(unittest.TestCase):
525 """
526 test setting of chardata buffer size
527 """
528
529 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000530 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000531
532 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000533 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000534
535 def test_wrong_size(self):
536 parser = expat.ParserCreate()
537 parser.buffer_text = 1
Serhiy Storchaka931331a2015-09-07 22:37:02 +0300538 with self.assertRaises(ValueError):
539 parser.buffer_size = -1
540 with self.assertRaises(ValueError):
541 parser.buffer_size = 0
Serhiy Storchakade5f9f42015-09-07 22:51:56 +0300542 with self.assertRaises((ValueError, OverflowError)):
543 parser.buffer_size = sys.maxsize + 1
Serhiy Storchaka931331a2015-09-07 22:37:02 +0300544 with self.assertRaises(TypeError):
545 parser.buffer_size = 512.0
Christian Heimes2380ac72008-01-09 00:17:24 +0000546
547 def test_unchanged_size(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200548 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
549 xml2 = b'a'*512 + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000550 parser = expat.ParserCreate()
551 parser.CharacterDataHandler = self.counting_handler
552 parser.buffer_size = 512
553 parser.buffer_text = 1
554
555 # Feed 512 bytes of character data: the handler should be called
556 # once.
557 self.n = 0
558 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000559 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000560
561 # Reassign to buffer_size, but assign the same size.
562 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000563 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000564
565 # Try parsing rest of the document
566 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000567 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000568
569
570 def test_disabling_buffer(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200571 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
572 xml2 = b'b' * 1024
573 xml3 = b'c' * 1024 + b'</a>';
Christian Heimes2380ac72008-01-09 00:17:24 +0000574 parser = expat.ParserCreate()
575 parser.CharacterDataHandler = self.counting_handler
576 parser.buffer_text = 1
577 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000578 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000579
580 # Parse one chunk of XML
581 self.n = 0
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300582 parser.Parse(xml1, False)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000583 self.assertEqual(parser.buffer_size, 1024)
584 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000585
586 # Turn off buffering and parse the next chunk.
587 parser.buffer_text = 0
588 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000589 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000590 for i in range(10):
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300591 parser.Parse(xml2, False)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000592 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000593
594 parser.buffer_text = 1
595 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000596 self.assertEqual(parser.buffer_size, 1024)
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300597 parser.Parse(xml3, True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000598 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000599
Christian Heimes2380ac72008-01-09 00:17:24 +0000600 def counting_handler(self, text):
601 self.n += 1
602
603 def small_buffer_test(self, buffer_len):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200604 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000605 parser = expat.ParserCreate()
606 parser.CharacterDataHandler = self.counting_handler
607 parser.buffer_size = 1024
608 parser.buffer_text = 1
609
610 self.n = 0
611 parser.Parse(xml)
612 return self.n
613
614 def test_change_size_1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200615 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
616 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000617 parser = expat.ParserCreate()
618 parser.CharacterDataHandler = self.counting_handler
619 parser.buffer_text = 1
620 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000621 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000622
623 self.n = 0
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300624 parser.Parse(xml1, False)
Christian Heimes2380ac72008-01-09 00:17:24 +0000625 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000626 self.assertEqual(parser.buffer_size, 2048)
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300627 parser.Parse(xml2, True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000628 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000629
630 def test_change_size_2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200631 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
632 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000633 parser = expat.ParserCreate()
634 parser.CharacterDataHandler = self.counting_handler
635 parser.buffer_text = 1
636 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000637 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000638
639 self.n=0
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300640 parser.Parse(xml1, False)
Christian Heimes2380ac72008-01-09 00:17:24 +0000641 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000642 self.assertEqual(parser.buffer_size, 1024)
Serhiy Storchakaeb897462019-09-01 12:11:43 +0300643 parser.Parse(xml2, True)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000644 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000645
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000646class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000647 def test1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200648 xml = b"\0\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000649 parser = expat.ParserCreate()
650 try:
651 parser.Parse(xml, True)
652 self.fail()
653 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000654 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000655
656 def test2(self):
Serhiy Storchaka1273dfc2013-02-08 11:22:05 +0200657 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200658 xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000659 parser = expat.ParserCreate()
Martin Panter076ca6c2016-07-14 01:31:46 +0000660 err_pattern = r'XML declaration not well-formed: line 1, column \d+'
661 with self.assertRaisesRegex(expat.ExpatError, err_pattern):
Brett Cannon2f827382009-08-13 19:58:01 +0000662 parser.Parse(xml, True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000663
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000664class ErrorMessageTest(unittest.TestCase):
665 def test_codes(self):
666 # verify mapping of errors.codes and errors.messages
667 self.assertEqual(errors.XML_ERROR_SYNTAX,
668 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
669
670 def test_expaterror(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200671 xml = b'<'
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000672 parser = expat.ParserCreate()
673 try:
674 parser.Parse(xml, True)
675 self.fail()
676 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000677 self.assertEqual(e.code,
678 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000679
680
Antoine Pitrou452196f2011-01-05 18:44:14 +0000681class ForeignDTDTests(unittest.TestCase):
682 """
683 Tests for the UseForeignDTD method of expat parser objects.
684 """
685 def test_use_foreign_dtd(self):
686 """
687 If UseForeignDTD is passed True and a document without an external
688 entity reference is parsed, ExternalEntityRefHandler is first called
689 with None for the public and system ids.
690 """
691 handler_call_args = []
692 def resolve_entity(context, base, system_id, public_id):
693 handler_call_args.append((public_id, system_id))
694 return 1
695
696 parser = expat.ParserCreate()
697 parser.UseForeignDTD(True)
698 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
699 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200700 parser.Parse(b"<?xml version='1.0'?><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000701 self.assertEqual(handler_call_args, [(None, None)])
702
Christian Heimese26d3af2012-09-24 13:17:08 +0200703 # test UseForeignDTD() is equal to UseForeignDTD(True)
704 handler_call_args[:] = []
705
706 parser = expat.ParserCreate()
707 parser.UseForeignDTD()
708 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
709 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200710 parser.Parse(b"<?xml version='1.0'?><element/>")
Christian Heimese26d3af2012-09-24 13:17:08 +0200711 self.assertEqual(handler_call_args, [(None, None)])
712
Antoine Pitrou452196f2011-01-05 18:44:14 +0000713 def test_ignore_use_foreign_dtd(self):
714 """
715 If UseForeignDTD is passed True and a document with an external
716 entity reference is parsed, ExternalEntityRefHandler is called with
717 the public and system ids from the document.
718 """
719 handler_call_args = []
720 def resolve_entity(context, base, system_id, public_id):
721 handler_call_args.append((public_id, system_id))
722 return 1
723
724 parser = expat.ParserCreate()
725 parser.UseForeignDTD(True)
726 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
727 parser.ExternalEntityRefHandler = resolve_entity
728 parser.Parse(
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200729 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000730 self.assertEqual(handler_call_args, [("bar", "baz")])
731
732
Guido van Rossumd8faa362007-04-27 19:54:29 +0000733if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500734 unittest.main()