blob: 95a614bb9b79a94bd4572118093d2c650c54c1da [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import unittest
6
Fred Drake7fbc85c2000-09-23 04:47:56 +00007from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +00008from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +00009
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test.support import sortdict, run_unittest
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000011
12
Guido van Rossumd8faa362007-04-27 19:54:29 +000013class SetAttributeTest(unittest.TestCase):
14 def setUp(self):
15 self.parser = expat.ParserCreate(namespace_separator='!')
16 self.set_get_pairs = [
17 [0, 0],
18 [1, 1],
19 [2, 1],
20 [0, 0],
21 ]
Fred Drake265a8042000-09-21 20:32:13 +000022
Guido van Rossumd8faa362007-04-27 19:54:29 +000023 def test_ordered_attributes(self):
24 for x, y in self.set_get_pairs:
25 self.parser.ordered_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000026 self.assertEqual(self.parser.ordered_attributes, y)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000027
Guido van Rossumd8faa362007-04-27 19:54:29 +000028 def test_specified_attributes(self):
29 for x, y in self.set_get_pairs:
30 self.parser.specified_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000031 self.assertEqual(self.parser.specified_attributes, y)
Fred Drake8f42e2b2001-04-25 16:03:54 +000032
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000033
Guido van Rossum4ca94712007-07-23 17:42:32 +000034data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000035<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000036<?xml-stylesheet href="stylesheet.css"?>
37<!-- comment data -->
38<!DOCTYPE quotations SYSTEM "quotations.dtd" [
39<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000040<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000041<!NOTATION notation SYSTEM "notation.jpeg">
42<!ENTITY acirc "&#226;">
43<!ENTITY external_entity SYSTEM "entity.file">
44<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
45%unparsed_entity;
46]>
47
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000048<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000049<myns:subelement xmlns:myns="http://www.python.org/namespace">
50 Contents of subelements
51</myns:subelement>
52<sub2><![CDATA[contents of CDATA section]]></sub2>
53&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000054&skipped_entity;
Serhiy Storchaka43536e92013-02-04 18:26:15 +020055\xb5
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000056</root>
Fred Drake265a8042000-09-21 20:32:13 +000057'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000058
Guido van Rossumd8faa362007-04-27 19:54:29 +000059
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000060# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000061class ParseTest(unittest.TestCase):
62 class Outputter:
63 def __init__(self):
64 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000065
Guido van Rossumd8faa362007-04-27 19:54:29 +000066 def StartElementHandler(self, name, attrs):
67 self.out.append('Start element: ' + repr(name) + ' ' +
68 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000069
Guido van Rossumd8faa362007-04-27 19:54:29 +000070 def EndElementHandler(self, name):
71 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000072
Guido van Rossumd8faa362007-04-27 19:54:29 +000073 def CharacterDataHandler(self, data):
74 data = data.strip()
75 if data:
76 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000077
Guido van Rossumd8faa362007-04-27 19:54:29 +000078 def ProcessingInstructionHandler(self, target, data):
79 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
80
81 def StartNamespaceDeclHandler(self, prefix, uri):
82 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
83
84 def EndNamespaceDeclHandler(self, prefix):
85 self.out.append('End of NS decl: ' + repr(prefix))
86
87 def StartCdataSectionHandler(self):
88 self.out.append('Start of CDATA section')
89
90 def EndCdataSectionHandler(self):
91 self.out.append('End of CDATA section')
92
93 def CommentHandler(self, text):
94 self.out.append('Comment: ' + repr(text))
95
96 def NotationDeclHandler(self, *args):
97 name, base, sysid, pubid = args
98 self.out.append('Notation declared: %s' %(args,))
99
100 def UnparsedEntityDeclHandler(self, *args):
101 entityName, base, systemId, publicId, notationName = args
102 self.out.append('Unparsed entity decl: %s' %(args,))
103
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000104 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000105 self.out.append('Not standalone')
106 return 1
107
108 def ExternalEntityRefHandler(self, *args):
109 context, base, sysId, pubId = args
110 self.out.append('External entity ref: %s' %(args[1:],))
111 return 1
112
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000113 def StartDoctypeDeclHandler(self, *args):
114 self.out.append(('Start doctype', args))
115 return 1
116
117 def EndDoctypeDeclHandler(self):
118 self.out.append("End doctype")
119 return 1
120
121 def EntityDeclHandler(self, *args):
122 self.out.append(('Entity declaration', args))
123 return 1
124
125 def XmlDeclHandler(self, *args):
126 self.out.append(('XML declaration', args))
127 return 1
128
129 def ElementDeclHandler(self, *args):
130 self.out.append(('Element declaration', args))
131 return 1
132
133 def AttlistDeclHandler(self, *args):
134 self.out.append(('Attribute list declaration', args))
135 return 1
136
137 def SkippedEntityHandler(self, *args):
138 self.out.append(("Skipped entity", args))
139 return 1
140
Guido van Rossumd8faa362007-04-27 19:54:29 +0000141 def DefaultHandler(self, userData):
142 pass
143
144 def DefaultHandlerExpand(self, userData):
145 pass
146
147 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000148 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
149 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
150 'NotationDeclHandler', 'StartNamespaceDeclHandler',
151 'EndNamespaceDeclHandler', 'CommentHandler',
152 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
153 'DefaultHandlerExpand', 'NotStandaloneHandler',
154 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
155 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
156 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000157 ]
158
Antoine Pitrou452196f2011-01-05 18:44:14 +0000159 def _hookup_callbacks(self, parser, handler):
160 """
161 Set each of the callbacks defined on handler and named in
162 self.handler_names on the given parser.
163 """
164 for name in self.handler_names:
165 setattr(parser, name, getattr(handler, name))
166
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000167 def _verify_parse_output(self, operations):
168 expected_operations = [
169 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
170 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
171 "Comment: ' comment data '",
172 "Not standalone",
173 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
174 ('Element declaration', ('root', (2, 0, None, ()))),
175 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
176 1)),
177 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
178 0)),
179 "Notation declared: ('notation', None, 'notation.jpeg', None)",
180 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
181 ('Entity declaration', ('external_entity', 0, None, None,
182 'entity.file', None, None)),
183 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
184 "Not standalone",
185 "End doctype",
186 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
187 "NS decl: 'myns' 'http://www.python.org/namespace'",
188 "Start element: 'http://www.python.org/namespace!subelement' {}",
189 "Character data: 'Contents of subelements'",
190 "End element: 'http://www.python.org/namespace!subelement'",
191 "End of NS decl: 'myns'",
192 "Start element: 'sub2' {}",
193 'Start of CDATA section',
194 "Character data: 'contents of CDATA section'",
195 'End of CDATA section',
196 "End element: 'sub2'",
197 "External entity ref: (None, 'entity.file', None)",
198 ('Skipped entity', ('skipped_entity', 0)),
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200199 "Character data: '\xb5'",
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000200 "End element: 'root'",
201 ]
202 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000203 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000204
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200205 def test_parse_bytes(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000206 out = self.Outputter()
207 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000208 self._hookup_callbacks(parser, out)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000209
210 parser.Parse(data, 1)
211
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000212 operations = out.out
213 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000214 # Issue #6697.
215 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000216
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200217 def test_parse_str(self):
218 out = self.Outputter()
219 parser = expat.ParserCreate(namespace_separator='!')
220 self._hookup_callbacks(parser, out)
221
222 parser.Parse(data.decode('iso-8859-1'), 1)
223
224 operations = out.out
225 self._verify_parse_output(operations)
226
Guido van Rossumd8faa362007-04-27 19:54:29 +0000227 def test_parse_file(self):
228 # Try parsing a file
229 out = self.Outputter()
230 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000231 self._hookup_callbacks(parser, out)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000232 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000233
234 parser.ParseFile(file)
235
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000236 operations = out.out
237 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000238
Ned Deilye7d532f2014-03-27 16:39:58 -0700239 def test_parse_again(self):
240 parser = expat.ParserCreate()
241 file = BytesIO(data)
242 parser.ParseFile(file)
243 # Issue 6676: ensure a meaningful exception is raised when attempting
244 # to parse more than one XML document per xmlparser instance,
245 # a limitation of the Expat library.
246 with self.assertRaises(expat.error) as cm:
247 parser.ParseFile(file)
248 self.assertEqual(expat.ErrorString(cm.exception.code),
249 expat.errors.XML_ERROR_FINISHED)
250
Guido van Rossumd8faa362007-04-27 19:54:29 +0000251class NamespaceSeparatorTest(unittest.TestCase):
252 def test_legal(self):
253 # Tests that make sure we get errors when the namespace_separator value
254 # is illegal, and that we don't for good values:
255 expat.ParserCreate()
256 expat.ParserCreate(namespace_separator=None)
257 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000258
Guido van Rossumd8faa362007-04-27 19:54:29 +0000259 def test_illegal(self):
260 try:
261 expat.ParserCreate(namespace_separator=42)
262 self.fail()
263 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000264 self.assertEqual(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000265 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000266
Guido van Rossumd8faa362007-04-27 19:54:29 +0000267 try:
268 expat.ParserCreate(namespace_separator='too long')
269 self.fail()
270 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000271 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000272 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000273
Guido van Rossumd8faa362007-04-27 19:54:29 +0000274 def test_zero_length(self):
275 # ParserCreate() needs to accept a namespace_separator of zero length
276 # to satisfy the requirements of RDF applications that are required
277 # to simply glue together the namespace URI and the localname. Though
278 # considered a wart of the RDF specifications, it needs to be supported.
279 #
280 # See XML-SIG mailing list thread starting with
281 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
282 #
283 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000284
Fred Drake2a3d7db2002-06-28 22:56:48 +0000285
Guido van Rossumd8faa362007-04-27 19:54:29 +0000286class InterningTest(unittest.TestCase):
287 def test(self):
288 # Test the interning machinery.
289 p = expat.ParserCreate()
290 L = []
291 def collector(name, *args):
292 L.append(name)
293 p.StartElementHandler = collector
294 p.EndElementHandler = collector
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200295 p.Parse(b"<e> <e/> <e></e> </e>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000296 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000297 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000298 for entry in L:
299 # L should have the same string repeated over and over.
300 self.assertTrue(tag is entry)
301
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000302 def test_issue9402(self):
303 # create an ExternalEntityParserCreate with buffer text
304 class ExternalOutputter:
305 def __init__(self, parser):
306 self.parser = parser
307 self.parser_result = None
308
309 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
310 external_parser = self.parser.ExternalEntityParserCreate("")
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200311 self.parser_result = external_parser.Parse(b"", 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000312 return 1
313
314 parser = expat.ParserCreate(namespace_separator='!')
315 parser.buffer_text = 1
316 out = ExternalOutputter(parser)
317 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
318 parser.Parse(data, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000319 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000320
Guido van Rossumd8faa362007-04-27 19:54:29 +0000321
322class BufferTextTest(unittest.TestCase):
323 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000324 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000325 self.parser = expat.ParserCreate()
326 self.parser.buffer_text = 1
327 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000328
329 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000330 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000331 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000332 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000333
334 def CharacterDataHandler(self, text):
335 self.stuff.append(text)
336
337 def StartElementHandler(self, name, attrs):
338 self.stuff.append("<%s>" % name)
339 bt = attrs.get("buffer-text")
340 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000341 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000342 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000343 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000344
345 def EndElementHandler(self, name):
346 self.stuff.append("</%s>" % name)
347
348 def CommentHandler(self, data):
349 self.stuff.append("<!--%s-->" % data)
350
Guido van Rossumd8faa362007-04-27 19:54:29 +0000351 def setHandlers(self, handlers=[]):
352 for name in handlers:
353 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000354
Guido van Rossumd8faa362007-04-27 19:54:29 +0000355 def test_default_to_disabled(self):
356 parser = expat.ParserCreate()
357 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000358
Guido van Rossumd8faa362007-04-27 19:54:29 +0000359 def test_buffering_enabled(self):
360 # Make sure buffering is turned on
361 self.assertTrue(self.parser.buffer_text)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200362 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000363 self.assertEqual(self.stuff, ['123'],
364 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000365
Guido van Rossumd8faa362007-04-27 19:54:29 +0000366 def test1(self):
367 # XXX This test exposes more detail of Expat's text chunking than we
368 # XXX like, but it tests what we need to concisely.
369 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200370 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000371 self.assertEqual(self.stuff,
372 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
373 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000374
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375 def test2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200376 self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000377 self.assertEqual(self.stuff, ["1<2> \n 3"],
378 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000379
Guido van Rossumd8faa362007-04-27 19:54:29 +0000380 def test3(self):
381 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200382 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000383 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
384 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000385
Guido van Rossumd8faa362007-04-27 19:54:29 +0000386 def test4(self):
387 self.setHandlers(["StartElementHandler", "EndElementHandler"])
388 self.parser.CharacterDataHandler = None
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200389 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000390 self.assertEqual(self.stuff,
391 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000392
Guido van Rossumd8faa362007-04-27 19:54:29 +0000393 def test5(self):
394 self.setHandlers(["StartElementHandler", "EndElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200395 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000396 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000397 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000398
Guido van Rossumd8faa362007-04-27 19:54:29 +0000399 def test6(self):
400 self.setHandlers(["CommentHandler", "EndElementHandler",
401 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200402 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000403 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000404 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
405 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000406
Guido van Rossumd8faa362007-04-27 19:54:29 +0000407 def test7(self):
408 self.setHandlers(["CommentHandler", "EndElementHandler",
409 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200410 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000411 self.assertEqual(self.stuff,
412 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
413 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
414 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000415
Fred Draked7ea55b2004-08-13 03:09:07 +0000416
417# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000418class HandlerExceptionTest(unittest.TestCase):
419 def StartElementHandler(self, name, attrs):
420 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000421
Guido van Rossumd8faa362007-04-27 19:54:29 +0000422 def test(self):
423 parser = expat.ParserCreate()
424 parser.StartElementHandler = self.StartElementHandler
425 try:
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200426 parser.Parse(b"<a><b><c/></b></a>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000427 self.fail()
428 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000429 self.assertEqual(e.args[0], 'a',
430 "Expected RuntimeError for element 'a', but" + \
431 " found %r" % e.args[0])
Fred Draked7ea55b2004-08-13 03:09:07 +0000432
Dave Cole3203efb2004-08-26 00:37:31 +0000433
434# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000435class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000436 def StartElementHandler(self, name, attrs):
437 self.check_pos('s')
438
439 def EndElementHandler(self, name):
440 self.check_pos('e')
441
442 def check_pos(self, event):
443 pos = (event,
444 self.parser.CurrentByteIndex,
445 self.parser.CurrentLineNumber,
446 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000447 self.assertTrue(self.upto < len(self.expected_list),
448 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000449 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000450 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000452 self.upto += 1
453
Guido van Rossumd8faa362007-04-27 19:54:29 +0000454 def test(self):
455 self.parser = expat.ParserCreate()
456 self.parser.StartElementHandler = self.StartElementHandler
457 self.parser.EndElementHandler = self.EndElementHandler
458 self.upto = 0
459 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
460 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000461
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200462 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000463 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000464
465
Guido van Rossumd8faa362007-04-27 19:54:29 +0000466class sf1296433Test(unittest.TestCase):
467 def test_parse_only_xml_data(self):
468 # http://python.org/sf/1296433
469 #
470 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
471 # this one doesn't crash
472 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473
Guido van Rossumd8faa362007-04-27 19:54:29 +0000474 class SpecificException(Exception):
475 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000476
Guido van Rossumd8faa362007-04-27 19:54:29 +0000477 def handler(text):
478 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000479
Guido van Rossumd8faa362007-04-27 19:54:29 +0000480 parser = expat.ParserCreate()
481 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000482
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200483 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000484
Christian Heimes2380ac72008-01-09 00:17:24 +0000485class ChardataBufferTest(unittest.TestCase):
486 """
487 test setting of chardata buffer size
488 """
489
490 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000491 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000492
493 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000494 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000495
496 def test_wrong_size(self):
497 parser = expat.ParserCreate()
498 parser.buffer_text = 1
499 def f(size):
500 parser.buffer_size = size
501
502 self.assertRaises(ValueError, f, -1)
503 self.assertRaises(ValueError, f, 0)
504
505 def test_unchanged_size(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200506 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
507 xml2 = b'a'*512 + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000508 parser = expat.ParserCreate()
509 parser.CharacterDataHandler = self.counting_handler
510 parser.buffer_size = 512
511 parser.buffer_text = 1
512
513 # Feed 512 bytes of character data: the handler should be called
514 # once.
515 self.n = 0
516 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000517 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000518
519 # Reassign to buffer_size, but assign the same size.
520 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000521 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000522
523 # Try parsing rest of the document
524 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000525 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000526
527
528 def test_disabling_buffer(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200529 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
530 xml2 = b'b' * 1024
531 xml3 = b'c' * 1024 + b'</a>';
Christian Heimes2380ac72008-01-09 00:17:24 +0000532 parser = expat.ParserCreate()
533 parser.CharacterDataHandler = self.counting_handler
534 parser.buffer_text = 1
535 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000536 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000537
538 # Parse one chunk of XML
539 self.n = 0
540 parser.Parse(xml1, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000541 self.assertEqual(parser.buffer_size, 1024)
542 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000543
544 # Turn off buffering and parse the next chunk.
545 parser.buffer_text = 0
546 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000547 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000548 for i in range(10):
549 parser.Parse(xml2, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000550 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000551
552 parser.buffer_text = 1
553 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000554 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000555 parser.Parse(xml3, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000556 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000557
Christian Heimes2380ac72008-01-09 00:17:24 +0000558 def counting_handler(self, text):
559 self.n += 1
560
561 def small_buffer_test(self, buffer_len):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200562 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000563 parser = expat.ParserCreate()
564 parser.CharacterDataHandler = self.counting_handler
565 parser.buffer_size = 1024
566 parser.buffer_text = 1
567
568 self.n = 0
569 parser.Parse(xml)
570 return self.n
571
572 def test_change_size_1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200573 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
574 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000575 parser = expat.ParserCreate()
576 parser.CharacterDataHandler = self.counting_handler
577 parser.buffer_text = 1
578 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000579 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000580
581 self.n = 0
582 parser.Parse(xml1, 0)
583 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000584 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000585 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000586 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000587
588 def test_change_size_2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200589 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
590 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000591 parser = expat.ParserCreate()
592 parser.CharacterDataHandler = self.counting_handler
593 parser.buffer_text = 1
594 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000595 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000596
597 self.n=0
598 parser.Parse(xml1, 0)
599 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000600 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000601 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000602 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000603
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000604class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000605 def test1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200606 xml = b"\0\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000607 parser = expat.ParserCreate()
608 try:
609 parser.Parse(xml, True)
610 self.fail()
611 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000612 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000613
614 def test2(self):
Serhiy Storchaka1273dfc2013-02-08 11:22:05 +0200615 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200616 xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000617 parser = expat.ParserCreate()
618 try:
619 parser.Parse(xml, True)
620 self.fail()
621 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000622 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000623
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000624class ErrorMessageTest(unittest.TestCase):
625 def test_codes(self):
626 # verify mapping of errors.codes and errors.messages
627 self.assertEqual(errors.XML_ERROR_SYNTAX,
628 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
629
630 def test_expaterror(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200631 xml = b'<'
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000632 parser = expat.ParserCreate()
633 try:
634 parser.Parse(xml, True)
635 self.fail()
636 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000637 self.assertEqual(e.code,
638 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000639
640
Antoine Pitrou452196f2011-01-05 18:44:14 +0000641class ForeignDTDTests(unittest.TestCase):
642 """
643 Tests for the UseForeignDTD method of expat parser objects.
644 """
645 def test_use_foreign_dtd(self):
646 """
647 If UseForeignDTD is passed True and a document without an external
648 entity reference is parsed, ExternalEntityRefHandler is first called
649 with None for the public and system ids.
650 """
651 handler_call_args = []
652 def resolve_entity(context, base, system_id, public_id):
653 handler_call_args.append((public_id, system_id))
654 return 1
655
656 parser = expat.ParserCreate()
657 parser.UseForeignDTD(True)
658 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
659 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200660 parser.Parse(b"<?xml version='1.0'?><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000661 self.assertEqual(handler_call_args, [(None, None)])
662
Christian Heimese26d3af2012-09-24 13:17:08 +0200663 # test UseForeignDTD() is equal to UseForeignDTD(True)
664 handler_call_args[:] = []
665
666 parser = expat.ParserCreate()
667 parser.UseForeignDTD()
668 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
669 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200670 parser.Parse(b"<?xml version='1.0'?><element/>")
Christian Heimese26d3af2012-09-24 13:17:08 +0200671 self.assertEqual(handler_call_args, [(None, None)])
672
Antoine Pitrou452196f2011-01-05 18:44:14 +0000673 def test_ignore_use_foreign_dtd(self):
674 """
675 If UseForeignDTD is passed True and a document with an external
676 entity reference is parsed, ExternalEntityRefHandler is called with
677 the public and system ids from the document.
678 """
679 handler_call_args = []
680 def resolve_entity(context, base, system_id, public_id):
681 handler_call_args.append((public_id, system_id))
682 return 1
683
684 parser = expat.ParserCreate()
685 parser.UseForeignDTD(True)
686 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
687 parser.ExternalEntityRefHandler = resolve_entity
688 parser.Parse(
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200689 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000690 self.assertEqual(handler_call_args, [("bar", "baz")])
691
692
Guido van Rossumd8faa362007-04-27 19:54:29 +0000693def test_main():
694 run_unittest(SetAttributeTest,
695 ParseTest,
696 NamespaceSeparatorTest,
697 InterningTest,
698 BufferTextTest,
699 HandlerExceptionTest,
700 PositionTest,
Christian Heimes2380ac72008-01-09 00:17:24 +0000701 sf1296433Test,
Brett Cannon2f827382009-08-13 19:58:01 +0000702 ChardataBufferTest,
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000703 MalformedInputTest,
Antoine Pitrou452196f2011-01-05 18:44:14 +0000704 ErrorMessageTest,
705 ForeignDTDTests)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000706
707if __name__ == "__main__":
708 test_main()