blob: 92fffc45f30a964fb8ee59df03a05ce04985aa8b [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02005import os
Serhiy Storchakade5f9f42015-09-07 22:51:56 +03006import sys
Antoine Pitrou2b3b95b2014-11-29 15:56:07 +01007import sysconfig
Guido van Rossumd8faa362007-04-27 19:54:29 +00008import unittest
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02009import traceback
Guido van Rossumd8faa362007-04-27 19:54:29 +000010
Fred Drake7fbc85c2000-09-23 04:47:56 +000011from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +000012from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +000013
Zachary Ware38c707e2015-04-13 15:00:43 -050014from test.support import sortdict
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000015
16
Guido van Rossumd8faa362007-04-27 19:54:29 +000017class SetAttributeTest(unittest.TestCase):
18 def setUp(self):
19 self.parser = expat.ParserCreate(namespace_separator='!')
Serhiy Storchaka931331a2015-09-07 22:37:02 +030020
21 def test_buffer_text(self):
22 self.assertIs(self.parser.buffer_text, False)
23 for x in 0, 1, 2, 0:
24 self.parser.buffer_text = x
25 self.assertIs(self.parser.buffer_text, bool(x))
26
27 def test_namespace_prefixes(self):
28 self.assertIs(self.parser.namespace_prefixes, False)
29 for x in 0, 1, 2, 0:
30 self.parser.namespace_prefixes = x
31 self.assertIs(self.parser.namespace_prefixes, bool(x))
Fred Drake265a8042000-09-21 20:32:13 +000032
Guido van Rossumd8faa362007-04-27 19:54:29 +000033 def test_ordered_attributes(self):
Serhiy Storchaka931331a2015-09-07 22:37:02 +030034 self.assertIs(self.parser.ordered_attributes, False)
35 for x in 0, 1, 2, 0:
Guido van Rossumd8faa362007-04-27 19:54:29 +000036 self.parser.ordered_attributes = x
Serhiy Storchaka931331a2015-09-07 22:37:02 +030037 self.assertIs(self.parser.ordered_attributes, bool(x))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000038
Guido van Rossumd8faa362007-04-27 19:54:29 +000039 def test_specified_attributes(self):
Serhiy Storchaka931331a2015-09-07 22:37:02 +030040 self.assertIs(self.parser.specified_attributes, False)
41 for x in 0, 1, 2, 0:
Guido van Rossumd8faa362007-04-27 19:54:29 +000042 self.parser.specified_attributes = x
Serhiy Storchaka931331a2015-09-07 22:37:02 +030043 self.assertIs(self.parser.specified_attributes, bool(x))
44
Serhiy Storchaka931331a2015-09-07 22:37:02 +030045 def test_invalid_attributes(self):
46 with self.assertRaises(AttributeError):
47 self.parser.returns_unicode = 1
48 with self.assertRaises(AttributeError):
49 self.parser.returns_unicode
50
51 # Issue #25019
52 self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0)
53 self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0)
54 self.assertRaises(TypeError, getattr, self.parser, range(0xF))
Fred Drake8f42e2b2001-04-25 16:03:54 +000055
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000056
Guido van Rossum4ca94712007-07-23 17:42:32 +000057data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000058<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000059<?xml-stylesheet href="stylesheet.css"?>
60<!-- comment data -->
61<!DOCTYPE quotations SYSTEM "quotations.dtd" [
62<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000063<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000064<!NOTATION notation SYSTEM "notation.jpeg">
65<!ENTITY acirc "&#226;">
66<!ENTITY external_entity SYSTEM "entity.file">
67<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
68%unparsed_entity;
69]>
70
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000071<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000072<myns:subelement xmlns:myns="http://www.python.org/namespace">
73 Contents of subelements
74</myns:subelement>
75<sub2><![CDATA[contents of CDATA section]]></sub2>
76&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000077&skipped_entity;
Serhiy Storchaka43536e92013-02-04 18:26:15 +020078\xb5
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000079</root>
Fred Drake265a8042000-09-21 20:32:13 +000080'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000081
Guido van Rossumd8faa362007-04-27 19:54:29 +000082
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000083# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000084class ParseTest(unittest.TestCase):
85 class Outputter:
86 def __init__(self):
87 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000088
Guido van Rossumd8faa362007-04-27 19:54:29 +000089 def StartElementHandler(self, name, attrs):
90 self.out.append('Start element: ' + repr(name) + ' ' +
91 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000092
Guido van Rossumd8faa362007-04-27 19:54:29 +000093 def EndElementHandler(self, name):
94 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000095
Guido van Rossumd8faa362007-04-27 19:54:29 +000096 def CharacterDataHandler(self, data):
97 data = data.strip()
98 if data:
99 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000100
Guido van Rossumd8faa362007-04-27 19:54:29 +0000101 def ProcessingInstructionHandler(self, target, data):
102 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
103
104 def StartNamespaceDeclHandler(self, prefix, uri):
105 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
106
107 def EndNamespaceDeclHandler(self, prefix):
108 self.out.append('End of NS decl: ' + repr(prefix))
109
110 def StartCdataSectionHandler(self):
111 self.out.append('Start of CDATA section')
112
113 def EndCdataSectionHandler(self):
114 self.out.append('End of CDATA section')
115
116 def CommentHandler(self, text):
117 self.out.append('Comment: ' + repr(text))
118
119 def NotationDeclHandler(self, *args):
120 name, base, sysid, pubid = args
121 self.out.append('Notation declared: %s' %(args,))
122
123 def UnparsedEntityDeclHandler(self, *args):
124 entityName, base, systemId, publicId, notationName = args
125 self.out.append('Unparsed entity decl: %s' %(args,))
126
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000127 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000128 self.out.append('Not standalone')
129 return 1
130
131 def ExternalEntityRefHandler(self, *args):
132 context, base, sysId, pubId = args
133 self.out.append('External entity ref: %s' %(args[1:],))
134 return 1
135
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000136 def StartDoctypeDeclHandler(self, *args):
137 self.out.append(('Start doctype', args))
138 return 1
139
140 def EndDoctypeDeclHandler(self):
141 self.out.append("End doctype")
142 return 1
143
144 def EntityDeclHandler(self, *args):
145 self.out.append(('Entity declaration', args))
146 return 1
147
148 def XmlDeclHandler(self, *args):
149 self.out.append(('XML declaration', args))
150 return 1
151
152 def ElementDeclHandler(self, *args):
153 self.out.append(('Element declaration', args))
154 return 1
155
156 def AttlistDeclHandler(self, *args):
157 self.out.append(('Attribute list declaration', args))
158 return 1
159
160 def SkippedEntityHandler(self, *args):
161 self.out.append(("Skipped entity", args))
162 return 1
163
Guido van Rossumd8faa362007-04-27 19:54:29 +0000164 def DefaultHandler(self, userData):
165 pass
166
167 def DefaultHandlerExpand(self, userData):
168 pass
169
170 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000171 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
172 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
173 'NotationDeclHandler', 'StartNamespaceDeclHandler',
174 'EndNamespaceDeclHandler', 'CommentHandler',
175 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
176 'DefaultHandlerExpand', 'NotStandaloneHandler',
177 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
178 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
179 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000180 ]
181
Antoine Pitrou452196f2011-01-05 18:44:14 +0000182 def _hookup_callbacks(self, parser, handler):
183 """
184 Set each of the callbacks defined on handler and named in
185 self.handler_names on the given parser.
186 """
187 for name in self.handler_names:
188 setattr(parser, name, getattr(handler, name))
189
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000190 def _verify_parse_output(self, operations):
191 expected_operations = [
192 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
193 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
194 "Comment: ' comment data '",
195 "Not standalone",
196 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
197 ('Element declaration', ('root', (2, 0, None, ()))),
198 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
199 1)),
200 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
201 0)),
202 "Notation declared: ('notation', None, 'notation.jpeg', None)",
203 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
204 ('Entity declaration', ('external_entity', 0, None, None,
205 'entity.file', None, None)),
206 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
207 "Not standalone",
208 "End doctype",
209 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
210 "NS decl: 'myns' 'http://www.python.org/namespace'",
211 "Start element: 'http://www.python.org/namespace!subelement' {}",
212 "Character data: 'Contents of subelements'",
213 "End element: 'http://www.python.org/namespace!subelement'",
214 "End of NS decl: 'myns'",
215 "Start element: 'sub2' {}",
216 'Start of CDATA section',
217 "Character data: 'contents of CDATA section'",
218 'End of CDATA section',
219 "End element: 'sub2'",
220 "External entity ref: (None, 'entity.file', None)",
221 ('Skipped entity', ('skipped_entity', 0)),
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200222 "Character data: '\xb5'",
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000223 "End element: 'root'",
224 ]
225 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000226 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000227
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200228 def test_parse_bytes(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000229 out = self.Outputter()
230 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000231 self._hookup_callbacks(parser, out)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000232
233 parser.Parse(data, 1)
234
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000235 operations = out.out
236 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000237 # Issue #6697.
238 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000239
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200240 def test_parse_str(self):
241 out = self.Outputter()
242 parser = expat.ParserCreate(namespace_separator='!')
243 self._hookup_callbacks(parser, out)
244
245 parser.Parse(data.decode('iso-8859-1'), 1)
246
247 operations = out.out
248 self._verify_parse_output(operations)
249
Guido van Rossumd8faa362007-04-27 19:54:29 +0000250 def test_parse_file(self):
251 # Try parsing a file
252 out = self.Outputter()
253 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000254 self._hookup_callbacks(parser, out)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000255 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000256
257 parser.ParseFile(file)
258
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000259 operations = out.out
260 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000261
Ned Deilye7d532f2014-03-27 16:39:58 -0700262 def test_parse_again(self):
263 parser = expat.ParserCreate()
264 file = BytesIO(data)
265 parser.ParseFile(file)
266 # Issue 6676: ensure a meaningful exception is raised when attempting
267 # to parse more than one XML document per xmlparser instance,
268 # a limitation of the Expat library.
269 with self.assertRaises(expat.error) as cm:
270 parser.ParseFile(file)
271 self.assertEqual(expat.ErrorString(cm.exception.code),
272 expat.errors.XML_ERROR_FINISHED)
273
Guido van Rossumd8faa362007-04-27 19:54:29 +0000274class NamespaceSeparatorTest(unittest.TestCase):
275 def test_legal(self):
276 # Tests that make sure we get errors when the namespace_separator value
277 # is illegal, and that we don't for good values:
278 expat.ParserCreate()
279 expat.ParserCreate(namespace_separator=None)
280 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000281
Guido van Rossumd8faa362007-04-27 19:54:29 +0000282 def test_illegal(self):
283 try:
284 expat.ParserCreate(namespace_separator=42)
285 self.fail()
286 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000287 self.assertEqual(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000288 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000289
Guido van Rossumd8faa362007-04-27 19:54:29 +0000290 try:
291 expat.ParserCreate(namespace_separator='too long')
292 self.fail()
293 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000294 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000295 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000296
Guido van Rossumd8faa362007-04-27 19:54:29 +0000297 def test_zero_length(self):
298 # ParserCreate() needs to accept a namespace_separator of zero length
299 # to satisfy the requirements of RDF applications that are required
300 # to simply glue together the namespace URI and the localname. Though
301 # considered a wart of the RDF specifications, it needs to be supported.
302 #
303 # See XML-SIG mailing list thread starting with
304 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
305 #
306 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000307
Fred Drake2a3d7db2002-06-28 22:56:48 +0000308
Guido van Rossumd8faa362007-04-27 19:54:29 +0000309class InterningTest(unittest.TestCase):
310 def test(self):
311 # Test the interning machinery.
312 p = expat.ParserCreate()
313 L = []
314 def collector(name, *args):
315 L.append(name)
316 p.StartElementHandler = collector
317 p.EndElementHandler = collector
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200318 p.Parse(b"<e> <e/> <e></e> </e>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000319 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000320 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000321 for entry in L:
322 # L should have the same string repeated over and over.
323 self.assertTrue(tag is entry)
324
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000325 def test_issue9402(self):
326 # create an ExternalEntityParserCreate with buffer text
327 class ExternalOutputter:
328 def __init__(self, parser):
329 self.parser = parser
330 self.parser_result = None
331
332 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
333 external_parser = self.parser.ExternalEntityParserCreate("")
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200334 self.parser_result = external_parser.Parse(b"", 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000335 return 1
336
337 parser = expat.ParserCreate(namespace_separator='!')
338 parser.buffer_text = 1
339 out = ExternalOutputter(parser)
340 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
341 parser.Parse(data, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000342 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000343
Guido van Rossumd8faa362007-04-27 19:54:29 +0000344
345class BufferTextTest(unittest.TestCase):
346 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000347 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000348 self.parser = expat.ParserCreate()
349 self.parser.buffer_text = 1
350 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000351
352 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000353 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000354 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000355 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000356
357 def CharacterDataHandler(self, text):
358 self.stuff.append(text)
359
360 def StartElementHandler(self, name, attrs):
361 self.stuff.append("<%s>" % name)
362 bt = attrs.get("buffer-text")
363 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000364 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000365 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000366 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000367
368 def EndElementHandler(self, name):
369 self.stuff.append("</%s>" % name)
370
371 def CommentHandler(self, data):
372 self.stuff.append("<!--%s-->" % data)
373
Guido van Rossumd8faa362007-04-27 19:54:29 +0000374 def setHandlers(self, handlers=[]):
375 for name in handlers:
376 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000377
Guido van Rossumd8faa362007-04-27 19:54:29 +0000378 def test_default_to_disabled(self):
379 parser = expat.ParserCreate()
380 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000381
Guido van Rossumd8faa362007-04-27 19:54:29 +0000382 def test_buffering_enabled(self):
383 # Make sure buffering is turned on
384 self.assertTrue(self.parser.buffer_text)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200385 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000386 self.assertEqual(self.stuff, ['123'],
387 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000388
Guido van Rossumd8faa362007-04-27 19:54:29 +0000389 def test1(self):
390 # XXX This test exposes more detail of Expat's text chunking than we
391 # XXX like, but it tests what we need to concisely.
392 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200393 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000394 self.assertEqual(self.stuff,
395 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
396 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000397
Guido van Rossumd8faa362007-04-27 19:54:29 +0000398 def test2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200399 self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000400 self.assertEqual(self.stuff, ["1<2> \n 3"],
401 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000402
Guido van Rossumd8faa362007-04-27 19:54:29 +0000403 def test3(self):
404 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200405 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000406 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
407 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000408
Guido van Rossumd8faa362007-04-27 19:54:29 +0000409 def test4(self):
410 self.setHandlers(["StartElementHandler", "EndElementHandler"])
411 self.parser.CharacterDataHandler = None
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200412 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000413 self.assertEqual(self.stuff,
414 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000415
Guido van Rossumd8faa362007-04-27 19:54:29 +0000416 def test5(self):
417 self.setHandlers(["StartElementHandler", "EndElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200418 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000419 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000420 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000421
Guido van Rossumd8faa362007-04-27 19:54:29 +0000422 def test6(self):
423 self.setHandlers(["CommentHandler", "EndElementHandler",
424 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200425 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000426 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000427 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
428 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000429
Guido van Rossumd8faa362007-04-27 19:54:29 +0000430 def test7(self):
431 self.setHandlers(["CommentHandler", "EndElementHandler",
432 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200433 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000434 self.assertEqual(self.stuff,
435 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
436 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
437 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000438
Fred Draked7ea55b2004-08-13 03:09:07 +0000439
440# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000441class HandlerExceptionTest(unittest.TestCase):
442 def StartElementHandler(self, name, attrs):
443 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000444
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200445 def check_traceback_entry(self, entry, filename, funcname):
446 self.assertEqual(os.path.basename(entry[0]), filename)
447 self.assertEqual(entry[2], funcname)
448
449 def test_exception(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000450 parser = expat.ParserCreate()
451 parser.StartElementHandler = self.StartElementHandler
452 try:
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200453 parser.Parse(b"<a><b><c/></b></a>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000454 self.fail()
455 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000456 self.assertEqual(e.args[0], 'a',
457 "Expected RuntimeError for element 'a', but" + \
458 " found %r" % e.args[0])
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200459 # Check that the traceback contains the relevant line in pyexpat.c
460 entries = traceback.extract_tb(e.__traceback__)
461 self.assertEqual(len(entries), 3)
462 self.check_traceback_entry(entries[0],
463 "test_pyexpat.py", "test_exception")
464 self.check_traceback_entry(entries[1],
465 "pyexpat.c", "StartElement")
466 self.check_traceback_entry(entries[2],
467 "test_pyexpat.py", "StartElementHandler")
Antoine Pitrou2b3b95b2014-11-29 15:56:07 +0100468 if sysconfig.is_python_build():
469 self.assertIn('call_with_frame("StartElement"', entries[1][3])
Fred Draked7ea55b2004-08-13 03:09:07 +0000470
Dave Cole3203efb2004-08-26 00:37:31 +0000471
472# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000473class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000474 def StartElementHandler(self, name, attrs):
475 self.check_pos('s')
476
477 def EndElementHandler(self, name):
478 self.check_pos('e')
479
480 def check_pos(self, event):
481 pos = (event,
482 self.parser.CurrentByteIndex,
483 self.parser.CurrentLineNumber,
484 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000485 self.assertTrue(self.upto < len(self.expected_list),
486 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000487 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000488 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000489 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000490 self.upto += 1
491
Guido van Rossumd8faa362007-04-27 19:54:29 +0000492 def test(self):
493 self.parser = expat.ParserCreate()
494 self.parser.StartElementHandler = self.StartElementHandler
495 self.parser.EndElementHandler = self.EndElementHandler
496 self.upto = 0
497 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
498 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000499
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200500 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000501 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000502
503
Guido van Rossumd8faa362007-04-27 19:54:29 +0000504class sf1296433Test(unittest.TestCase):
505 def test_parse_only_xml_data(self):
506 # http://python.org/sf/1296433
507 #
508 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
509 # this one doesn't crash
510 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000511
Guido van Rossumd8faa362007-04-27 19:54:29 +0000512 class SpecificException(Exception):
513 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000514
Guido van Rossumd8faa362007-04-27 19:54:29 +0000515 def handler(text):
516 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000517
Guido van Rossumd8faa362007-04-27 19:54:29 +0000518 parser = expat.ParserCreate()
519 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000520
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200521 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000522
Christian Heimes2380ac72008-01-09 00:17:24 +0000523class ChardataBufferTest(unittest.TestCase):
524 """
525 test setting of chardata buffer size
526 """
527
528 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000529 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000530
531 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000532 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000533
534 def test_wrong_size(self):
535 parser = expat.ParserCreate()
536 parser.buffer_text = 1
Serhiy Storchaka931331a2015-09-07 22:37:02 +0300537 with self.assertRaises(ValueError):
538 parser.buffer_size = -1
539 with self.assertRaises(ValueError):
540 parser.buffer_size = 0
Serhiy Storchakade5f9f42015-09-07 22:51:56 +0300541 with self.assertRaises((ValueError, OverflowError)):
542 parser.buffer_size = sys.maxsize + 1
Serhiy Storchaka931331a2015-09-07 22:37:02 +0300543 with self.assertRaises(TypeError):
544 parser.buffer_size = 512.0
Christian Heimes2380ac72008-01-09 00:17:24 +0000545
546 def test_unchanged_size(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200547 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
548 xml2 = b'a'*512 + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000549 parser = expat.ParserCreate()
550 parser.CharacterDataHandler = self.counting_handler
551 parser.buffer_size = 512
552 parser.buffer_text = 1
553
554 # Feed 512 bytes of character data: the handler should be called
555 # once.
556 self.n = 0
557 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000558 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000559
560 # Reassign to buffer_size, but assign the same size.
561 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000562 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000563
564 # Try parsing rest of the document
565 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000566 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000567
568
569 def test_disabling_buffer(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200570 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
571 xml2 = b'b' * 1024
572 xml3 = b'c' * 1024 + b'</a>';
Christian Heimes2380ac72008-01-09 00:17:24 +0000573 parser = expat.ParserCreate()
574 parser.CharacterDataHandler = self.counting_handler
575 parser.buffer_text = 1
576 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000577 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000578
579 # Parse one chunk of XML
580 self.n = 0
581 parser.Parse(xml1, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000582 self.assertEqual(parser.buffer_size, 1024)
583 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000584
585 # Turn off buffering and parse the next chunk.
586 parser.buffer_text = 0
587 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000588 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000589 for i in range(10):
590 parser.Parse(xml2, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000591 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000592
593 parser.buffer_text = 1
594 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000595 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000596 parser.Parse(xml3, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000597 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000598
Christian Heimes2380ac72008-01-09 00:17:24 +0000599 def counting_handler(self, text):
600 self.n += 1
601
602 def small_buffer_test(self, buffer_len):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200603 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000604 parser = expat.ParserCreate()
605 parser.CharacterDataHandler = self.counting_handler
606 parser.buffer_size = 1024
607 parser.buffer_text = 1
608
609 self.n = 0
610 parser.Parse(xml)
611 return self.n
612
613 def test_change_size_1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200614 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
615 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000616 parser = expat.ParserCreate()
617 parser.CharacterDataHandler = self.counting_handler
618 parser.buffer_text = 1
619 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000620 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000621
622 self.n = 0
623 parser.Parse(xml1, 0)
624 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000625 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000626 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000627 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000628
629 def test_change_size_2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200630 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
631 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000632 parser = expat.ParserCreate()
633 parser.CharacterDataHandler = self.counting_handler
634 parser.buffer_text = 1
635 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000636 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000637
638 self.n=0
639 parser.Parse(xml1, 0)
640 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000641 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000642 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000643 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000644
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000645class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000646 def test1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200647 xml = b"\0\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000648 parser = expat.ParserCreate()
649 try:
650 parser.Parse(xml, True)
651 self.fail()
652 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000653 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000654
655 def test2(self):
Serhiy Storchaka1273dfc2013-02-08 11:22:05 +0200656 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200657 xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000658 parser = expat.ParserCreate()
Martin Panter076ca6c2016-07-14 01:31:46 +0000659 err_pattern = r'XML declaration not well-formed: line 1, column \d+'
660 with self.assertRaisesRegex(expat.ExpatError, err_pattern):
Brett Cannon2f827382009-08-13 19:58:01 +0000661 parser.Parse(xml, True)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000662
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000663class ErrorMessageTest(unittest.TestCase):
664 def test_codes(self):
665 # verify mapping of errors.codes and errors.messages
666 self.assertEqual(errors.XML_ERROR_SYNTAX,
667 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
668
669 def test_expaterror(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200670 xml = b'<'
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000671 parser = expat.ParserCreate()
672 try:
673 parser.Parse(xml, True)
674 self.fail()
675 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000676 self.assertEqual(e.code,
677 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000678
679
Antoine Pitrou452196f2011-01-05 18:44:14 +0000680class ForeignDTDTests(unittest.TestCase):
681 """
682 Tests for the UseForeignDTD method of expat parser objects.
683 """
684 def test_use_foreign_dtd(self):
685 """
686 If UseForeignDTD is passed True and a document without an external
687 entity reference is parsed, ExternalEntityRefHandler is first called
688 with None for the public and system ids.
689 """
690 handler_call_args = []
691 def resolve_entity(context, base, system_id, public_id):
692 handler_call_args.append((public_id, system_id))
693 return 1
694
695 parser = expat.ParserCreate()
696 parser.UseForeignDTD(True)
697 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
698 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200699 parser.Parse(b"<?xml version='1.0'?><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000700 self.assertEqual(handler_call_args, [(None, None)])
701
Christian Heimese26d3af2012-09-24 13:17:08 +0200702 # test UseForeignDTD() is equal to UseForeignDTD(True)
703 handler_call_args[:] = []
704
705 parser = expat.ParserCreate()
706 parser.UseForeignDTD()
707 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
708 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200709 parser.Parse(b"<?xml version='1.0'?><element/>")
Christian Heimese26d3af2012-09-24 13:17:08 +0200710 self.assertEqual(handler_call_args, [(None, None)])
711
Antoine Pitrou452196f2011-01-05 18:44:14 +0000712 def test_ignore_use_foreign_dtd(self):
713 """
714 If UseForeignDTD is passed True and a document with an external
715 entity reference is parsed, ExternalEntityRefHandler is called with
716 the public and system ids from the document.
717 """
718 handler_call_args = []
719 def resolve_entity(context, base, system_id, public_id):
720 handler_call_args.append((public_id, system_id))
721 return 1
722
723 parser = expat.ParserCreate()
724 parser.UseForeignDTD(True)
725 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
726 parser.ExternalEntityRefHandler = resolve_entity
727 parser.Parse(
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200728 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000729 self.assertEqual(handler_call_args, [("bar", "baz")])
730
731
Guido van Rossumd8faa362007-04-27 19:54:29 +0000732if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500733 unittest.main()