blob: 550aebf06658beac5d738b517146e50836d2f984 [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02005import os
Serhiy Storchakade5f9f42015-09-07 22:51:56 +03006import sys
Antoine Pitrou2b3b95b2014-11-29 15:56:07 +01007import sysconfig
Guido van Rossumd8faa362007-04-27 19:54:29 +00008import unittest
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02009import traceback
Guido van Rossumd8faa362007-04-27 19:54:29 +000010
Fred Drake7fbc85c2000-09-23 04:47:56 +000011from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +000012from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +000013
Zachary Ware38c707e2015-04-13 15:00:43 -050014from test.support import sortdict
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000015
16
Guido van Rossumd8faa362007-04-27 19:54:29 +000017class SetAttributeTest(unittest.TestCase):
18 def setUp(self):
19 self.parser = expat.ParserCreate(namespace_separator='!')
Serhiy Storchaka931331a2015-09-07 22:37:02 +030020
21 def test_buffer_text(self):
22 self.assertIs(self.parser.buffer_text, False)
23 for x in 0, 1, 2, 0:
24 self.parser.buffer_text = x
25 self.assertIs(self.parser.buffer_text, bool(x))
26
27 def test_namespace_prefixes(self):
28 self.assertIs(self.parser.namespace_prefixes, False)
29 for x in 0, 1, 2, 0:
30 self.parser.namespace_prefixes = x
31 self.assertIs(self.parser.namespace_prefixes, bool(x))
Fred Drake265a8042000-09-21 20:32:13 +000032
Guido van Rossumd8faa362007-04-27 19:54:29 +000033 def test_ordered_attributes(self):
Serhiy Storchaka931331a2015-09-07 22:37:02 +030034 self.assertIs(self.parser.ordered_attributes, False)
35 for x in 0, 1, 2, 0:
Guido van Rossumd8faa362007-04-27 19:54:29 +000036 self.parser.ordered_attributes = x
Serhiy Storchaka931331a2015-09-07 22:37:02 +030037 self.assertIs(self.parser.ordered_attributes, bool(x))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000038
Guido van Rossumd8faa362007-04-27 19:54:29 +000039 def test_specified_attributes(self):
Serhiy Storchaka931331a2015-09-07 22:37:02 +030040 self.assertIs(self.parser.specified_attributes, False)
41 for x in 0, 1, 2, 0:
Guido van Rossumd8faa362007-04-27 19:54:29 +000042 self.parser.specified_attributes = x
Serhiy Storchaka931331a2015-09-07 22:37:02 +030043 self.assertIs(self.parser.specified_attributes, bool(x))
44
45 def test_specified_attributes(self):
46 self.assertIs(self.parser.specified_attributes, False)
47 for x in 0, 1, 2, 0:
48 self.parser.specified_attributes = x
49 self.assertIs(self.parser.specified_attributes, bool(x))
50
51 def test_invalid_attributes(self):
52 with self.assertRaises(AttributeError):
53 self.parser.returns_unicode = 1
54 with self.assertRaises(AttributeError):
55 self.parser.returns_unicode
56
57 # Issue #25019
58 self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0)
59 self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0)
60 self.assertRaises(TypeError, getattr, self.parser, range(0xF))
Fred Drake8f42e2b2001-04-25 16:03:54 +000061
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000062
Guido van Rossum4ca94712007-07-23 17:42:32 +000063data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000064<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000065<?xml-stylesheet href="stylesheet.css"?>
66<!-- comment data -->
67<!DOCTYPE quotations SYSTEM "quotations.dtd" [
68<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000069<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000070<!NOTATION notation SYSTEM "notation.jpeg">
71<!ENTITY acirc "&#226;">
72<!ENTITY external_entity SYSTEM "entity.file">
73<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
74%unparsed_entity;
75]>
76
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000077<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000078<myns:subelement xmlns:myns="http://www.python.org/namespace">
79 Contents of subelements
80</myns:subelement>
81<sub2><![CDATA[contents of CDATA section]]></sub2>
82&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000083&skipped_entity;
Serhiy Storchaka43536e92013-02-04 18:26:15 +020084\xb5
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000085</root>
Fred Drake265a8042000-09-21 20:32:13 +000086'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000087
Guido van Rossumd8faa362007-04-27 19:54:29 +000088
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000089# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000090class ParseTest(unittest.TestCase):
91 class Outputter:
92 def __init__(self):
93 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000094
Guido van Rossumd8faa362007-04-27 19:54:29 +000095 def StartElementHandler(self, name, attrs):
96 self.out.append('Start element: ' + repr(name) + ' ' +
97 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000098
Guido van Rossumd8faa362007-04-27 19:54:29 +000099 def EndElementHandler(self, name):
100 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000101
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102 def CharacterDataHandler(self, data):
103 data = data.strip()
104 if data:
105 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +0000106
Guido van Rossumd8faa362007-04-27 19:54:29 +0000107 def ProcessingInstructionHandler(self, target, data):
108 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
109
110 def StartNamespaceDeclHandler(self, prefix, uri):
111 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
112
113 def EndNamespaceDeclHandler(self, prefix):
114 self.out.append('End of NS decl: ' + repr(prefix))
115
116 def StartCdataSectionHandler(self):
117 self.out.append('Start of CDATA section')
118
119 def EndCdataSectionHandler(self):
120 self.out.append('End of CDATA section')
121
122 def CommentHandler(self, text):
123 self.out.append('Comment: ' + repr(text))
124
125 def NotationDeclHandler(self, *args):
126 name, base, sysid, pubid = args
127 self.out.append('Notation declared: %s' %(args,))
128
129 def UnparsedEntityDeclHandler(self, *args):
130 entityName, base, systemId, publicId, notationName = args
131 self.out.append('Unparsed entity decl: %s' %(args,))
132
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000133 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000134 self.out.append('Not standalone')
135 return 1
136
137 def ExternalEntityRefHandler(self, *args):
138 context, base, sysId, pubId = args
139 self.out.append('External entity ref: %s' %(args[1:],))
140 return 1
141
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000142 def StartDoctypeDeclHandler(self, *args):
143 self.out.append(('Start doctype', args))
144 return 1
145
146 def EndDoctypeDeclHandler(self):
147 self.out.append("End doctype")
148 return 1
149
150 def EntityDeclHandler(self, *args):
151 self.out.append(('Entity declaration', args))
152 return 1
153
154 def XmlDeclHandler(self, *args):
155 self.out.append(('XML declaration', args))
156 return 1
157
158 def ElementDeclHandler(self, *args):
159 self.out.append(('Element declaration', args))
160 return 1
161
162 def AttlistDeclHandler(self, *args):
163 self.out.append(('Attribute list declaration', args))
164 return 1
165
166 def SkippedEntityHandler(self, *args):
167 self.out.append(("Skipped entity", args))
168 return 1
169
Guido van Rossumd8faa362007-04-27 19:54:29 +0000170 def DefaultHandler(self, userData):
171 pass
172
173 def DefaultHandlerExpand(self, userData):
174 pass
175
176 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000177 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
178 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
179 'NotationDeclHandler', 'StartNamespaceDeclHandler',
180 'EndNamespaceDeclHandler', 'CommentHandler',
181 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
182 'DefaultHandlerExpand', 'NotStandaloneHandler',
183 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
184 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
185 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 ]
187
Antoine Pitrou452196f2011-01-05 18:44:14 +0000188 def _hookup_callbacks(self, parser, handler):
189 """
190 Set each of the callbacks defined on handler and named in
191 self.handler_names on the given parser.
192 """
193 for name in self.handler_names:
194 setattr(parser, name, getattr(handler, name))
195
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000196 def _verify_parse_output(self, operations):
197 expected_operations = [
198 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
199 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
200 "Comment: ' comment data '",
201 "Not standalone",
202 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
203 ('Element declaration', ('root', (2, 0, None, ()))),
204 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
205 1)),
206 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
207 0)),
208 "Notation declared: ('notation', None, 'notation.jpeg', None)",
209 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
210 ('Entity declaration', ('external_entity', 0, None, None,
211 'entity.file', None, None)),
212 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
213 "Not standalone",
214 "End doctype",
215 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
216 "NS decl: 'myns' 'http://www.python.org/namespace'",
217 "Start element: 'http://www.python.org/namespace!subelement' {}",
218 "Character data: 'Contents of subelements'",
219 "End element: 'http://www.python.org/namespace!subelement'",
220 "End of NS decl: 'myns'",
221 "Start element: 'sub2' {}",
222 'Start of CDATA section',
223 "Character data: 'contents of CDATA section'",
224 'End of CDATA section',
225 "End element: 'sub2'",
226 "External entity ref: (None, 'entity.file', None)",
227 ('Skipped entity', ('skipped_entity', 0)),
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200228 "Character data: '\xb5'",
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000229 "End element: 'root'",
230 ]
231 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000232 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000233
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200234 def test_parse_bytes(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000235 out = self.Outputter()
236 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000237 self._hookup_callbacks(parser, out)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000238
239 parser.Parse(data, 1)
240
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000241 operations = out.out
242 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000243 # Issue #6697.
244 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000245
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200246 def test_parse_str(self):
247 out = self.Outputter()
248 parser = expat.ParserCreate(namespace_separator='!')
249 self._hookup_callbacks(parser, out)
250
251 parser.Parse(data.decode('iso-8859-1'), 1)
252
253 operations = out.out
254 self._verify_parse_output(operations)
255
Guido van Rossumd8faa362007-04-27 19:54:29 +0000256 def test_parse_file(self):
257 # Try parsing a file
258 out = self.Outputter()
259 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000260 self._hookup_callbacks(parser, out)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000261 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000262
263 parser.ParseFile(file)
264
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000265 operations = out.out
266 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000267
Ned Deilye7d532f2014-03-27 16:39:58 -0700268 def test_parse_again(self):
269 parser = expat.ParserCreate()
270 file = BytesIO(data)
271 parser.ParseFile(file)
272 # Issue 6676: ensure a meaningful exception is raised when attempting
273 # to parse more than one XML document per xmlparser instance,
274 # a limitation of the Expat library.
275 with self.assertRaises(expat.error) as cm:
276 parser.ParseFile(file)
277 self.assertEqual(expat.ErrorString(cm.exception.code),
278 expat.errors.XML_ERROR_FINISHED)
279
Guido van Rossumd8faa362007-04-27 19:54:29 +0000280class NamespaceSeparatorTest(unittest.TestCase):
281 def test_legal(self):
282 # Tests that make sure we get errors when the namespace_separator value
283 # is illegal, and that we don't for good values:
284 expat.ParserCreate()
285 expat.ParserCreate(namespace_separator=None)
286 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000287
Guido van Rossumd8faa362007-04-27 19:54:29 +0000288 def test_illegal(self):
289 try:
290 expat.ParserCreate(namespace_separator=42)
291 self.fail()
292 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000293 self.assertEqual(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000294 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000295
Guido van Rossumd8faa362007-04-27 19:54:29 +0000296 try:
297 expat.ParserCreate(namespace_separator='too long')
298 self.fail()
299 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000300 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000301 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000302
Guido van Rossumd8faa362007-04-27 19:54:29 +0000303 def test_zero_length(self):
304 # ParserCreate() needs to accept a namespace_separator of zero length
305 # to satisfy the requirements of RDF applications that are required
306 # to simply glue together the namespace URI and the localname. Though
307 # considered a wart of the RDF specifications, it needs to be supported.
308 #
309 # See XML-SIG mailing list thread starting with
310 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
311 #
312 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000313
Fred Drake2a3d7db2002-06-28 22:56:48 +0000314
Guido van Rossumd8faa362007-04-27 19:54:29 +0000315class InterningTest(unittest.TestCase):
316 def test(self):
317 # Test the interning machinery.
318 p = expat.ParserCreate()
319 L = []
320 def collector(name, *args):
321 L.append(name)
322 p.StartElementHandler = collector
323 p.EndElementHandler = collector
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200324 p.Parse(b"<e> <e/> <e></e> </e>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000325 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000326 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000327 for entry in L:
328 # L should have the same string repeated over and over.
329 self.assertTrue(tag is entry)
330
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000331 def test_issue9402(self):
332 # create an ExternalEntityParserCreate with buffer text
333 class ExternalOutputter:
334 def __init__(self, parser):
335 self.parser = parser
336 self.parser_result = None
337
338 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
339 external_parser = self.parser.ExternalEntityParserCreate("")
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200340 self.parser_result = external_parser.Parse(b"", 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000341 return 1
342
343 parser = expat.ParserCreate(namespace_separator='!')
344 parser.buffer_text = 1
345 out = ExternalOutputter(parser)
346 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
347 parser.Parse(data, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000348 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000349
Guido van Rossumd8faa362007-04-27 19:54:29 +0000350
351class BufferTextTest(unittest.TestCase):
352 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000353 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000354 self.parser = expat.ParserCreate()
355 self.parser.buffer_text = 1
356 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000357
358 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000359 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000360 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000361 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000362
363 def CharacterDataHandler(self, text):
364 self.stuff.append(text)
365
366 def StartElementHandler(self, name, attrs):
367 self.stuff.append("<%s>" % name)
368 bt = attrs.get("buffer-text")
369 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000370 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000371 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000372 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000373
374 def EndElementHandler(self, name):
375 self.stuff.append("</%s>" % name)
376
377 def CommentHandler(self, data):
378 self.stuff.append("<!--%s-->" % data)
379
Guido van Rossumd8faa362007-04-27 19:54:29 +0000380 def setHandlers(self, handlers=[]):
381 for name in handlers:
382 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000383
Guido van Rossumd8faa362007-04-27 19:54:29 +0000384 def test_default_to_disabled(self):
385 parser = expat.ParserCreate()
386 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000387
Guido van Rossumd8faa362007-04-27 19:54:29 +0000388 def test_buffering_enabled(self):
389 # Make sure buffering is turned on
390 self.assertTrue(self.parser.buffer_text)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200391 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000392 self.assertEqual(self.stuff, ['123'],
393 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000394
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395 def test1(self):
396 # XXX This test exposes more detail of Expat's text chunking than we
397 # XXX like, but it tests what we need to concisely.
398 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200399 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000400 self.assertEqual(self.stuff,
401 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
402 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000403
Guido van Rossumd8faa362007-04-27 19:54:29 +0000404 def test2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200405 self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000406 self.assertEqual(self.stuff, ["1<2> \n 3"],
407 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000408
Guido van Rossumd8faa362007-04-27 19:54:29 +0000409 def test3(self):
410 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200411 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000412 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
413 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000414
Guido van Rossumd8faa362007-04-27 19:54:29 +0000415 def test4(self):
416 self.setHandlers(["StartElementHandler", "EndElementHandler"])
417 self.parser.CharacterDataHandler = None
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200418 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000419 self.assertEqual(self.stuff,
420 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000421
Guido van Rossumd8faa362007-04-27 19:54:29 +0000422 def test5(self):
423 self.setHandlers(["StartElementHandler", "EndElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200424 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000425 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000426 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000427
Guido van Rossumd8faa362007-04-27 19:54:29 +0000428 def test6(self):
429 self.setHandlers(["CommentHandler", "EndElementHandler",
430 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200431 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000432 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000433 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
434 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000435
Guido van Rossumd8faa362007-04-27 19:54:29 +0000436 def test7(self):
437 self.setHandlers(["CommentHandler", "EndElementHandler",
438 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200439 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000440 self.assertEqual(self.stuff,
441 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
442 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
443 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000444
Fred Draked7ea55b2004-08-13 03:09:07 +0000445
446# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000447class HandlerExceptionTest(unittest.TestCase):
448 def StartElementHandler(self, name, attrs):
449 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000450
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200451 def check_traceback_entry(self, entry, filename, funcname):
452 self.assertEqual(os.path.basename(entry[0]), filename)
453 self.assertEqual(entry[2], funcname)
454
455 def test_exception(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000456 parser = expat.ParserCreate()
457 parser.StartElementHandler = self.StartElementHandler
458 try:
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200459 parser.Parse(b"<a><b><c/></b></a>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 self.fail()
461 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000462 self.assertEqual(e.args[0], 'a',
463 "Expected RuntimeError for element 'a', but" + \
464 " found %r" % e.args[0])
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200465 # Check that the traceback contains the relevant line in pyexpat.c
466 entries = traceback.extract_tb(e.__traceback__)
467 self.assertEqual(len(entries), 3)
468 self.check_traceback_entry(entries[0],
469 "test_pyexpat.py", "test_exception")
470 self.check_traceback_entry(entries[1],
471 "pyexpat.c", "StartElement")
472 self.check_traceback_entry(entries[2],
473 "test_pyexpat.py", "StartElementHandler")
Antoine Pitrou2b3b95b2014-11-29 15:56:07 +0100474 if sysconfig.is_python_build():
475 self.assertIn('call_with_frame("StartElement"', entries[1][3])
Fred Draked7ea55b2004-08-13 03:09:07 +0000476
Dave Cole3203efb2004-08-26 00:37:31 +0000477
478# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000479class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000480 def StartElementHandler(self, name, attrs):
481 self.check_pos('s')
482
483 def EndElementHandler(self, name):
484 self.check_pos('e')
485
486 def check_pos(self, event):
487 pos = (event,
488 self.parser.CurrentByteIndex,
489 self.parser.CurrentLineNumber,
490 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000491 self.assertTrue(self.upto < len(self.expected_list),
492 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000493 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000494 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000495 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000496 self.upto += 1
497
Guido van Rossumd8faa362007-04-27 19:54:29 +0000498 def test(self):
499 self.parser = expat.ParserCreate()
500 self.parser.StartElementHandler = self.StartElementHandler
501 self.parser.EndElementHandler = self.EndElementHandler
502 self.upto = 0
503 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
504 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000505
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200506 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000507 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000508
509
Guido van Rossumd8faa362007-04-27 19:54:29 +0000510class sf1296433Test(unittest.TestCase):
511 def test_parse_only_xml_data(self):
512 # http://python.org/sf/1296433
513 #
514 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
515 # this one doesn't crash
516 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000517
Guido van Rossumd8faa362007-04-27 19:54:29 +0000518 class SpecificException(Exception):
519 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000520
Guido van Rossumd8faa362007-04-27 19:54:29 +0000521 def handler(text):
522 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000523
Guido van Rossumd8faa362007-04-27 19:54:29 +0000524 parser = expat.ParserCreate()
525 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000526
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200527 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000528
Christian Heimes2380ac72008-01-09 00:17:24 +0000529class ChardataBufferTest(unittest.TestCase):
530 """
531 test setting of chardata buffer size
532 """
533
534 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000535 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000536
537 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000538 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000539
540 def test_wrong_size(self):
541 parser = expat.ParserCreate()
542 parser.buffer_text = 1
Serhiy Storchaka931331a2015-09-07 22:37:02 +0300543 with self.assertRaises(ValueError):
544 parser.buffer_size = -1
545 with self.assertRaises(ValueError):
546 parser.buffer_size = 0
Serhiy Storchakade5f9f42015-09-07 22:51:56 +0300547 with self.assertRaises((ValueError, OverflowError)):
548 parser.buffer_size = sys.maxsize + 1
Serhiy Storchaka931331a2015-09-07 22:37:02 +0300549 with self.assertRaises(TypeError):
550 parser.buffer_size = 512.0
Christian Heimes2380ac72008-01-09 00:17:24 +0000551
552 def test_unchanged_size(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200553 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
554 xml2 = b'a'*512 + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000555 parser = expat.ParserCreate()
556 parser.CharacterDataHandler = self.counting_handler
557 parser.buffer_size = 512
558 parser.buffer_text = 1
559
560 # Feed 512 bytes of character data: the handler should be called
561 # once.
562 self.n = 0
563 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000564 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000565
566 # Reassign to buffer_size, but assign the same size.
567 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000568 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000569
570 # Try parsing rest of the document
571 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000572 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000573
574
575 def test_disabling_buffer(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200576 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
577 xml2 = b'b' * 1024
578 xml3 = b'c' * 1024 + b'</a>';
Christian Heimes2380ac72008-01-09 00:17:24 +0000579 parser = expat.ParserCreate()
580 parser.CharacterDataHandler = self.counting_handler
581 parser.buffer_text = 1
582 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000583 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000584
585 # Parse one chunk of XML
586 self.n = 0
587 parser.Parse(xml1, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000588 self.assertEqual(parser.buffer_size, 1024)
589 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000590
591 # Turn off buffering and parse the next chunk.
592 parser.buffer_text = 0
593 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000594 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000595 for i in range(10):
596 parser.Parse(xml2, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000597 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000598
599 parser.buffer_text = 1
600 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000601 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000602 parser.Parse(xml3, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000603 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000604
Christian Heimes2380ac72008-01-09 00:17:24 +0000605 def counting_handler(self, text):
606 self.n += 1
607
608 def small_buffer_test(self, buffer_len):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200609 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000610 parser = expat.ParserCreate()
611 parser.CharacterDataHandler = self.counting_handler
612 parser.buffer_size = 1024
613 parser.buffer_text = 1
614
615 self.n = 0
616 parser.Parse(xml)
617 return self.n
618
619 def test_change_size_1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200620 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
621 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000622 parser = expat.ParserCreate()
623 parser.CharacterDataHandler = self.counting_handler
624 parser.buffer_text = 1
625 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000626 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000627
628 self.n = 0
629 parser.Parse(xml1, 0)
630 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000631 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000632 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000633 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000634
635 def test_change_size_2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200636 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
637 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000638 parser = expat.ParserCreate()
639 parser.CharacterDataHandler = self.counting_handler
640 parser.buffer_text = 1
641 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000642 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000643
644 self.n=0
645 parser.Parse(xml1, 0)
646 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000647 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000648 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000649 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000650
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000651class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000652 def test1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200653 xml = b"\0\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000654 parser = expat.ParserCreate()
655 try:
656 parser.Parse(xml, True)
657 self.fail()
658 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000659 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000660
661 def test2(self):
Serhiy Storchaka1273dfc2013-02-08 11:22:05 +0200662 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200663 xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000664 parser = expat.ParserCreate()
665 try:
666 parser.Parse(xml, True)
667 self.fail()
668 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000669 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000670
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000671class ErrorMessageTest(unittest.TestCase):
672 def test_codes(self):
673 # verify mapping of errors.codes and errors.messages
674 self.assertEqual(errors.XML_ERROR_SYNTAX,
675 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
676
677 def test_expaterror(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200678 xml = b'<'
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000679 parser = expat.ParserCreate()
680 try:
681 parser.Parse(xml, True)
682 self.fail()
683 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000684 self.assertEqual(e.code,
685 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000686
687
Antoine Pitrou452196f2011-01-05 18:44:14 +0000688class ForeignDTDTests(unittest.TestCase):
689 """
690 Tests for the UseForeignDTD method of expat parser objects.
691 """
692 def test_use_foreign_dtd(self):
693 """
694 If UseForeignDTD is passed True and a document without an external
695 entity reference is parsed, ExternalEntityRefHandler is first called
696 with None for the public and system ids.
697 """
698 handler_call_args = []
699 def resolve_entity(context, base, system_id, public_id):
700 handler_call_args.append((public_id, system_id))
701 return 1
702
703 parser = expat.ParserCreate()
704 parser.UseForeignDTD(True)
705 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
706 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200707 parser.Parse(b"<?xml version='1.0'?><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000708 self.assertEqual(handler_call_args, [(None, None)])
709
Christian Heimese26d3af2012-09-24 13:17:08 +0200710 # test UseForeignDTD() is equal to UseForeignDTD(True)
711 handler_call_args[:] = []
712
713 parser = expat.ParserCreate()
714 parser.UseForeignDTD()
715 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
716 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200717 parser.Parse(b"<?xml version='1.0'?><element/>")
Christian Heimese26d3af2012-09-24 13:17:08 +0200718 self.assertEqual(handler_call_args, [(None, None)])
719
Antoine Pitrou452196f2011-01-05 18:44:14 +0000720 def test_ignore_use_foreign_dtd(self):
721 """
722 If UseForeignDTD is passed True and a document with an external
723 entity reference is parsed, ExternalEntityRefHandler is called with
724 the public and system ids from the document.
725 """
726 handler_call_args = []
727 def resolve_entity(context, base, system_id, public_id):
728 handler_call_args.append((public_id, system_id))
729 return 1
730
731 parser = expat.ParserCreate()
732 parser.UseForeignDTD(True)
733 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
734 parser.ExternalEntityRefHandler = resolve_entity
735 parser.Parse(
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200736 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000737 self.assertEqual(handler_call_args, [("bar", "baz")])
738
739
Guido van Rossumd8faa362007-04-27 19:54:29 +0000740if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500741 unittest.main()