blob: c233bc11e3157ff638009e5ef3a2e79db680ffde [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02005import os
Antoine Pitrou2b3b95b2014-11-29 15:56:07 +01006import sysconfig
Guido van Rossumd8faa362007-04-27 19:54:29 +00007import unittest
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02008import traceback
Guido van Rossumd8faa362007-04-27 19:54:29 +00009
Fred Drake7fbc85c2000-09-23 04:47:56 +000010from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +000011from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +000012
Benjamin Petersonee8712c2008-05-20 21:35:26 +000013from test.support import sortdict, run_unittest
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000014
15
Guido van Rossumd8faa362007-04-27 19:54:29 +000016class SetAttributeTest(unittest.TestCase):
17 def setUp(self):
18 self.parser = expat.ParserCreate(namespace_separator='!')
19 self.set_get_pairs = [
20 [0, 0],
21 [1, 1],
22 [2, 1],
23 [0, 0],
24 ]
Fred Drake265a8042000-09-21 20:32:13 +000025
Guido van Rossumd8faa362007-04-27 19:54:29 +000026 def test_ordered_attributes(self):
27 for x, y in self.set_get_pairs:
28 self.parser.ordered_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000029 self.assertEqual(self.parser.ordered_attributes, y)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000030
Guido van Rossumd8faa362007-04-27 19:54:29 +000031 def test_specified_attributes(self):
32 for x, y in self.set_get_pairs:
33 self.parser.specified_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000034 self.assertEqual(self.parser.specified_attributes, y)
Fred Drake8f42e2b2001-04-25 16:03:54 +000035
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000036
Guido van Rossum4ca94712007-07-23 17:42:32 +000037data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000038<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000039<?xml-stylesheet href="stylesheet.css"?>
40<!-- comment data -->
41<!DOCTYPE quotations SYSTEM "quotations.dtd" [
42<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000043<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000044<!NOTATION notation SYSTEM "notation.jpeg">
45<!ENTITY acirc "&#226;">
46<!ENTITY external_entity SYSTEM "entity.file">
47<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
48%unparsed_entity;
49]>
50
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000051<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000052<myns:subelement xmlns:myns="http://www.python.org/namespace">
53 Contents of subelements
54</myns:subelement>
55<sub2><![CDATA[contents of CDATA section]]></sub2>
56&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000057&skipped_entity;
Serhiy Storchaka43536e92013-02-04 18:26:15 +020058\xb5
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000059</root>
Fred Drake265a8042000-09-21 20:32:13 +000060'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000061
Guido van Rossumd8faa362007-04-27 19:54:29 +000062
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000063# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000064class ParseTest(unittest.TestCase):
65 class Outputter:
66 def __init__(self):
67 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000068
Guido van Rossumd8faa362007-04-27 19:54:29 +000069 def StartElementHandler(self, name, attrs):
70 self.out.append('Start element: ' + repr(name) + ' ' +
71 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000072
Guido van Rossumd8faa362007-04-27 19:54:29 +000073 def EndElementHandler(self, name):
74 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000075
Guido van Rossumd8faa362007-04-27 19:54:29 +000076 def CharacterDataHandler(self, data):
77 data = data.strip()
78 if data:
79 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000080
Guido van Rossumd8faa362007-04-27 19:54:29 +000081 def ProcessingInstructionHandler(self, target, data):
82 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
83
84 def StartNamespaceDeclHandler(self, prefix, uri):
85 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
86
87 def EndNamespaceDeclHandler(self, prefix):
88 self.out.append('End of NS decl: ' + repr(prefix))
89
90 def StartCdataSectionHandler(self):
91 self.out.append('Start of CDATA section')
92
93 def EndCdataSectionHandler(self):
94 self.out.append('End of CDATA section')
95
96 def CommentHandler(self, text):
97 self.out.append('Comment: ' + repr(text))
98
99 def NotationDeclHandler(self, *args):
100 name, base, sysid, pubid = args
101 self.out.append('Notation declared: %s' %(args,))
102
103 def UnparsedEntityDeclHandler(self, *args):
104 entityName, base, systemId, publicId, notationName = args
105 self.out.append('Unparsed entity decl: %s' %(args,))
106
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000107 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000108 self.out.append('Not standalone')
109 return 1
110
111 def ExternalEntityRefHandler(self, *args):
112 context, base, sysId, pubId = args
113 self.out.append('External entity ref: %s' %(args[1:],))
114 return 1
115
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000116 def StartDoctypeDeclHandler(self, *args):
117 self.out.append(('Start doctype', args))
118 return 1
119
120 def EndDoctypeDeclHandler(self):
121 self.out.append("End doctype")
122 return 1
123
124 def EntityDeclHandler(self, *args):
125 self.out.append(('Entity declaration', args))
126 return 1
127
128 def XmlDeclHandler(self, *args):
129 self.out.append(('XML declaration', args))
130 return 1
131
132 def ElementDeclHandler(self, *args):
133 self.out.append(('Element declaration', args))
134 return 1
135
136 def AttlistDeclHandler(self, *args):
137 self.out.append(('Attribute list declaration', args))
138 return 1
139
140 def SkippedEntityHandler(self, *args):
141 self.out.append(("Skipped entity", args))
142 return 1
143
Guido van Rossumd8faa362007-04-27 19:54:29 +0000144 def DefaultHandler(self, userData):
145 pass
146
147 def DefaultHandlerExpand(self, userData):
148 pass
149
150 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000151 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
152 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
153 'NotationDeclHandler', 'StartNamespaceDeclHandler',
154 'EndNamespaceDeclHandler', 'CommentHandler',
155 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
156 'DefaultHandlerExpand', 'NotStandaloneHandler',
157 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
158 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
159 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000160 ]
161
Antoine Pitrou452196f2011-01-05 18:44:14 +0000162 def _hookup_callbacks(self, parser, handler):
163 """
164 Set each of the callbacks defined on handler and named in
165 self.handler_names on the given parser.
166 """
167 for name in self.handler_names:
168 setattr(parser, name, getattr(handler, name))
169
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000170 def _verify_parse_output(self, operations):
171 expected_operations = [
172 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
173 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
174 "Comment: ' comment data '",
175 "Not standalone",
176 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
177 ('Element declaration', ('root', (2, 0, None, ()))),
178 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
179 1)),
180 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
181 0)),
182 "Notation declared: ('notation', None, 'notation.jpeg', None)",
183 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
184 ('Entity declaration', ('external_entity', 0, None, None,
185 'entity.file', None, None)),
186 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
187 "Not standalone",
188 "End doctype",
189 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
190 "NS decl: 'myns' 'http://www.python.org/namespace'",
191 "Start element: 'http://www.python.org/namespace!subelement' {}",
192 "Character data: 'Contents of subelements'",
193 "End element: 'http://www.python.org/namespace!subelement'",
194 "End of NS decl: 'myns'",
195 "Start element: 'sub2' {}",
196 'Start of CDATA section',
197 "Character data: 'contents of CDATA section'",
198 'End of CDATA section',
199 "End element: 'sub2'",
200 "External entity ref: (None, 'entity.file', None)",
201 ('Skipped entity', ('skipped_entity', 0)),
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200202 "Character data: '\xb5'",
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000203 "End element: 'root'",
204 ]
205 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000206 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000207
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200208 def test_parse_bytes(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000209 out = self.Outputter()
210 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000211 self._hookup_callbacks(parser, out)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000212
213 parser.Parse(data, 1)
214
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000215 operations = out.out
216 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000217 # Issue #6697.
218 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000219
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200220 def test_parse_str(self):
221 out = self.Outputter()
222 parser = expat.ParserCreate(namespace_separator='!')
223 self._hookup_callbacks(parser, out)
224
225 parser.Parse(data.decode('iso-8859-1'), 1)
226
227 operations = out.out
228 self._verify_parse_output(operations)
229
Guido van Rossumd8faa362007-04-27 19:54:29 +0000230 def test_parse_file(self):
231 # Try parsing a file
232 out = self.Outputter()
233 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000234 self._hookup_callbacks(parser, out)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000235 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000236
237 parser.ParseFile(file)
238
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000239 operations = out.out
240 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000241
Ned Deilye7d532f2014-03-27 16:39:58 -0700242 def test_parse_again(self):
243 parser = expat.ParserCreate()
244 file = BytesIO(data)
245 parser.ParseFile(file)
246 # Issue 6676: ensure a meaningful exception is raised when attempting
247 # to parse more than one XML document per xmlparser instance,
248 # a limitation of the Expat library.
249 with self.assertRaises(expat.error) as cm:
250 parser.ParseFile(file)
251 self.assertEqual(expat.ErrorString(cm.exception.code),
252 expat.errors.XML_ERROR_FINISHED)
253
Guido van Rossumd8faa362007-04-27 19:54:29 +0000254class NamespaceSeparatorTest(unittest.TestCase):
255 def test_legal(self):
256 # Tests that make sure we get errors when the namespace_separator value
257 # is illegal, and that we don't for good values:
258 expat.ParserCreate()
259 expat.ParserCreate(namespace_separator=None)
260 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000261
Guido van Rossumd8faa362007-04-27 19:54:29 +0000262 def test_illegal(self):
263 try:
264 expat.ParserCreate(namespace_separator=42)
265 self.fail()
266 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000267 self.assertEqual(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000268 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000269
Guido van Rossumd8faa362007-04-27 19:54:29 +0000270 try:
271 expat.ParserCreate(namespace_separator='too long')
272 self.fail()
273 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000274 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000275 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000276
Guido van Rossumd8faa362007-04-27 19:54:29 +0000277 def test_zero_length(self):
278 # ParserCreate() needs to accept a namespace_separator of zero length
279 # to satisfy the requirements of RDF applications that are required
280 # to simply glue together the namespace URI and the localname. Though
281 # considered a wart of the RDF specifications, it needs to be supported.
282 #
283 # See XML-SIG mailing list thread starting with
284 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
285 #
286 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000287
Fred Drake2a3d7db2002-06-28 22:56:48 +0000288
Guido van Rossumd8faa362007-04-27 19:54:29 +0000289class InterningTest(unittest.TestCase):
290 def test(self):
291 # Test the interning machinery.
292 p = expat.ParserCreate()
293 L = []
294 def collector(name, *args):
295 L.append(name)
296 p.StartElementHandler = collector
297 p.EndElementHandler = collector
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200298 p.Parse(b"<e> <e/> <e></e> </e>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000299 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000300 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000301 for entry in L:
302 # L should have the same string repeated over and over.
303 self.assertTrue(tag is entry)
304
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000305 def test_issue9402(self):
306 # create an ExternalEntityParserCreate with buffer text
307 class ExternalOutputter:
308 def __init__(self, parser):
309 self.parser = parser
310 self.parser_result = None
311
312 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
313 external_parser = self.parser.ExternalEntityParserCreate("")
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200314 self.parser_result = external_parser.Parse(b"", 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000315 return 1
316
317 parser = expat.ParserCreate(namespace_separator='!')
318 parser.buffer_text = 1
319 out = ExternalOutputter(parser)
320 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
321 parser.Parse(data, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000322 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000323
Guido van Rossumd8faa362007-04-27 19:54:29 +0000324
325class BufferTextTest(unittest.TestCase):
326 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000327 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000328 self.parser = expat.ParserCreate()
329 self.parser.buffer_text = 1
330 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000331
332 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000333 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000334 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000335 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000336
337 def CharacterDataHandler(self, text):
338 self.stuff.append(text)
339
340 def StartElementHandler(self, name, attrs):
341 self.stuff.append("<%s>" % name)
342 bt = attrs.get("buffer-text")
343 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000344 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000345 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000346 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000347
348 def EndElementHandler(self, name):
349 self.stuff.append("</%s>" % name)
350
351 def CommentHandler(self, data):
352 self.stuff.append("<!--%s-->" % data)
353
Guido van Rossumd8faa362007-04-27 19:54:29 +0000354 def setHandlers(self, handlers=[]):
355 for name in handlers:
356 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000357
Guido van Rossumd8faa362007-04-27 19:54:29 +0000358 def test_default_to_disabled(self):
359 parser = expat.ParserCreate()
360 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000361
Guido van Rossumd8faa362007-04-27 19:54:29 +0000362 def test_buffering_enabled(self):
363 # Make sure buffering is turned on
364 self.assertTrue(self.parser.buffer_text)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200365 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000366 self.assertEqual(self.stuff, ['123'],
367 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000368
Guido van Rossumd8faa362007-04-27 19:54:29 +0000369 def test1(self):
370 # XXX This test exposes more detail of Expat's text chunking than we
371 # XXX like, but it tests what we need to concisely.
372 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200373 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000374 self.assertEqual(self.stuff,
375 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
376 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000377
Guido van Rossumd8faa362007-04-27 19:54:29 +0000378 def test2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200379 self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000380 self.assertEqual(self.stuff, ["1<2> \n 3"],
381 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000382
Guido van Rossumd8faa362007-04-27 19:54:29 +0000383 def test3(self):
384 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200385 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000386 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
387 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000388
Guido van Rossumd8faa362007-04-27 19:54:29 +0000389 def test4(self):
390 self.setHandlers(["StartElementHandler", "EndElementHandler"])
391 self.parser.CharacterDataHandler = None
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200392 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000393 self.assertEqual(self.stuff,
394 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000395
Guido van Rossumd8faa362007-04-27 19:54:29 +0000396 def test5(self):
397 self.setHandlers(["StartElementHandler", "EndElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200398 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000399 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000400 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000401
Guido van Rossumd8faa362007-04-27 19:54:29 +0000402 def test6(self):
403 self.setHandlers(["CommentHandler", "EndElementHandler",
404 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200405 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000406 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000407 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
408 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000409
Guido van Rossumd8faa362007-04-27 19:54:29 +0000410 def test7(self):
411 self.setHandlers(["CommentHandler", "EndElementHandler",
412 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200413 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000414 self.assertEqual(self.stuff,
415 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
416 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
417 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000418
Fred Draked7ea55b2004-08-13 03:09:07 +0000419
420# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000421class HandlerExceptionTest(unittest.TestCase):
422 def StartElementHandler(self, name, attrs):
423 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000424
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200425 def check_traceback_entry(self, entry, filename, funcname):
426 self.assertEqual(os.path.basename(entry[0]), filename)
427 self.assertEqual(entry[2], funcname)
428
429 def test_exception(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000430 parser = expat.ParserCreate()
431 parser.StartElementHandler = self.StartElementHandler
432 try:
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200433 parser.Parse(b"<a><b><c/></b></a>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000434 self.fail()
435 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000436 self.assertEqual(e.args[0], 'a',
437 "Expected RuntimeError for element 'a', but" + \
438 " found %r" % e.args[0])
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200439 # Check that the traceback contains the relevant line in pyexpat.c
440 entries = traceback.extract_tb(e.__traceback__)
441 self.assertEqual(len(entries), 3)
442 self.check_traceback_entry(entries[0],
443 "test_pyexpat.py", "test_exception")
444 self.check_traceback_entry(entries[1],
445 "pyexpat.c", "StartElement")
446 self.check_traceback_entry(entries[2],
447 "test_pyexpat.py", "StartElementHandler")
Antoine Pitrou2b3b95b2014-11-29 15:56:07 +0100448 if sysconfig.is_python_build():
449 self.assertIn('call_with_frame("StartElement"', entries[1][3])
Fred Draked7ea55b2004-08-13 03:09:07 +0000450
Dave Cole3203efb2004-08-26 00:37:31 +0000451
452# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000453class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000454 def StartElementHandler(self, name, attrs):
455 self.check_pos('s')
456
457 def EndElementHandler(self, name):
458 self.check_pos('e')
459
460 def check_pos(self, event):
461 pos = (event,
462 self.parser.CurrentByteIndex,
463 self.parser.CurrentLineNumber,
464 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000465 self.assertTrue(self.upto < len(self.expected_list),
466 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000467 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000468 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000470 self.upto += 1
471
Guido van Rossumd8faa362007-04-27 19:54:29 +0000472 def test(self):
473 self.parser = expat.ParserCreate()
474 self.parser.StartElementHandler = self.StartElementHandler
475 self.parser.EndElementHandler = self.EndElementHandler
476 self.upto = 0
477 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
478 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000479
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200480 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000481 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000482
483
Guido van Rossumd8faa362007-04-27 19:54:29 +0000484class sf1296433Test(unittest.TestCase):
485 def test_parse_only_xml_data(self):
486 # http://python.org/sf/1296433
487 #
488 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
489 # this one doesn't crash
490 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000491
Guido van Rossumd8faa362007-04-27 19:54:29 +0000492 class SpecificException(Exception):
493 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000494
Guido van Rossumd8faa362007-04-27 19:54:29 +0000495 def handler(text):
496 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000497
Guido van Rossumd8faa362007-04-27 19:54:29 +0000498 parser = expat.ParserCreate()
499 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000500
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200501 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000502
Christian Heimes2380ac72008-01-09 00:17:24 +0000503class ChardataBufferTest(unittest.TestCase):
504 """
505 test setting of chardata buffer size
506 """
507
508 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000509 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000510
511 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000512 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000513
514 def test_wrong_size(self):
515 parser = expat.ParserCreate()
516 parser.buffer_text = 1
517 def f(size):
518 parser.buffer_size = size
519
520 self.assertRaises(ValueError, f, -1)
521 self.assertRaises(ValueError, f, 0)
522
523 def test_unchanged_size(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200524 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
525 xml2 = b'a'*512 + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000526 parser = expat.ParserCreate()
527 parser.CharacterDataHandler = self.counting_handler
528 parser.buffer_size = 512
529 parser.buffer_text = 1
530
531 # Feed 512 bytes of character data: the handler should be called
532 # once.
533 self.n = 0
534 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000535 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000536
537 # Reassign to buffer_size, but assign the same size.
538 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000539 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000540
541 # Try parsing rest of the document
542 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000543 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000544
545
546 def test_disabling_buffer(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200547 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
548 xml2 = b'b' * 1024
549 xml3 = b'c' * 1024 + b'</a>';
Christian Heimes2380ac72008-01-09 00:17:24 +0000550 parser = expat.ParserCreate()
551 parser.CharacterDataHandler = self.counting_handler
552 parser.buffer_text = 1
553 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000554 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000555
556 # Parse one chunk of XML
557 self.n = 0
558 parser.Parse(xml1, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000559 self.assertEqual(parser.buffer_size, 1024)
560 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000561
562 # Turn off buffering and parse the next chunk.
563 parser.buffer_text = 0
564 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000565 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000566 for i in range(10):
567 parser.Parse(xml2, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000568 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000569
570 parser.buffer_text = 1
571 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000572 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000573 parser.Parse(xml3, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000574 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000575
Christian Heimes2380ac72008-01-09 00:17:24 +0000576 def counting_handler(self, text):
577 self.n += 1
578
579 def small_buffer_test(self, buffer_len):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200580 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000581 parser = expat.ParserCreate()
582 parser.CharacterDataHandler = self.counting_handler
583 parser.buffer_size = 1024
584 parser.buffer_text = 1
585
586 self.n = 0
587 parser.Parse(xml)
588 return self.n
589
590 def test_change_size_1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200591 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
592 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000593 parser = expat.ParserCreate()
594 parser.CharacterDataHandler = self.counting_handler
595 parser.buffer_text = 1
596 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000597 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000598
599 self.n = 0
600 parser.Parse(xml1, 0)
601 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000602 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000603 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000604 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000605
606 def test_change_size_2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200607 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
608 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000609 parser = expat.ParserCreate()
610 parser.CharacterDataHandler = self.counting_handler
611 parser.buffer_text = 1
612 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000613 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000614
615 self.n=0
616 parser.Parse(xml1, 0)
617 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000618 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000619 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000620 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000621
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000622class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000623 def test1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200624 xml = b"\0\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000625 parser = expat.ParserCreate()
626 try:
627 parser.Parse(xml, True)
628 self.fail()
629 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000630 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000631
632 def test2(self):
Serhiy Storchaka1273dfc2013-02-08 11:22:05 +0200633 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200634 xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000635 parser = expat.ParserCreate()
636 try:
637 parser.Parse(xml, True)
638 self.fail()
639 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000640 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000641
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000642class ErrorMessageTest(unittest.TestCase):
643 def test_codes(self):
644 # verify mapping of errors.codes and errors.messages
645 self.assertEqual(errors.XML_ERROR_SYNTAX,
646 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
647
648 def test_expaterror(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200649 xml = b'<'
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000650 parser = expat.ParserCreate()
651 try:
652 parser.Parse(xml, True)
653 self.fail()
654 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000655 self.assertEqual(e.code,
656 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000657
658
Antoine Pitrou452196f2011-01-05 18:44:14 +0000659class ForeignDTDTests(unittest.TestCase):
660 """
661 Tests for the UseForeignDTD method of expat parser objects.
662 """
663 def test_use_foreign_dtd(self):
664 """
665 If UseForeignDTD is passed True and a document without an external
666 entity reference is parsed, ExternalEntityRefHandler is first called
667 with None for the public and system ids.
668 """
669 handler_call_args = []
670 def resolve_entity(context, base, system_id, public_id):
671 handler_call_args.append((public_id, system_id))
672 return 1
673
674 parser = expat.ParserCreate()
675 parser.UseForeignDTD(True)
676 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
677 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200678 parser.Parse(b"<?xml version='1.0'?><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000679 self.assertEqual(handler_call_args, [(None, None)])
680
Christian Heimese26d3af2012-09-24 13:17:08 +0200681 # test UseForeignDTD() is equal to UseForeignDTD(True)
682 handler_call_args[:] = []
683
684 parser = expat.ParserCreate()
685 parser.UseForeignDTD()
686 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
687 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200688 parser.Parse(b"<?xml version='1.0'?><element/>")
Christian Heimese26d3af2012-09-24 13:17:08 +0200689 self.assertEqual(handler_call_args, [(None, None)])
690
Antoine Pitrou452196f2011-01-05 18:44:14 +0000691 def test_ignore_use_foreign_dtd(self):
692 """
693 If UseForeignDTD is passed True and a document with an external
694 entity reference is parsed, ExternalEntityRefHandler is called with
695 the public and system ids from the document.
696 """
697 handler_call_args = []
698 def resolve_entity(context, base, system_id, public_id):
699 handler_call_args.append((public_id, system_id))
700 return 1
701
702 parser = expat.ParserCreate()
703 parser.UseForeignDTD(True)
704 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
705 parser.ExternalEntityRefHandler = resolve_entity
706 parser.Parse(
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200707 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000708 self.assertEqual(handler_call_args, [("bar", "baz")])
709
710
Guido van Rossumd8faa362007-04-27 19:54:29 +0000711def test_main():
712 run_unittest(SetAttributeTest,
713 ParseTest,
714 NamespaceSeparatorTest,
715 InterningTest,
716 BufferTextTest,
717 HandlerExceptionTest,
718 PositionTest,
Christian Heimes2380ac72008-01-09 00:17:24 +0000719 sf1296433Test,
Brett Cannon2f827382009-08-13 19:58:01 +0000720 ChardataBufferTest,
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000721 MalformedInputTest,
Antoine Pitrou452196f2011-01-05 18:44:14 +0000722 ErrorMessageTest,
723 ForeignDTDTests)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000724
725if __name__ == "__main__":
726 test_main()