blob: 26f1961bd91f666ba2ba588f76b04696f0ff0bd8 [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02005import os
Guido van Rossumd8faa362007-04-27 19:54:29 +00006import unittest
Antoine Pitrou0ddbf472014-10-08 20:00:09 +02007import traceback
Guido van Rossumd8faa362007-04-27 19:54:29 +00008
Fred Drake7fbc85c2000-09-23 04:47:56 +00009from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +000010from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +000011
Benjamin Petersonee8712c2008-05-20 21:35:26 +000012from test.support import sortdict, run_unittest
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000013
14
Guido van Rossumd8faa362007-04-27 19:54:29 +000015class SetAttributeTest(unittest.TestCase):
16 def setUp(self):
17 self.parser = expat.ParserCreate(namespace_separator='!')
18 self.set_get_pairs = [
19 [0, 0],
20 [1, 1],
21 [2, 1],
22 [0, 0],
23 ]
Fred Drake265a8042000-09-21 20:32:13 +000024
Guido van Rossumd8faa362007-04-27 19:54:29 +000025 def test_ordered_attributes(self):
26 for x, y in self.set_get_pairs:
27 self.parser.ordered_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000028 self.assertEqual(self.parser.ordered_attributes, y)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000029
Guido van Rossumd8faa362007-04-27 19:54:29 +000030 def test_specified_attributes(self):
31 for x, y in self.set_get_pairs:
32 self.parser.specified_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000033 self.assertEqual(self.parser.specified_attributes, y)
Fred Drake8f42e2b2001-04-25 16:03:54 +000034
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000035
Guido van Rossum4ca94712007-07-23 17:42:32 +000036data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000037<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000038<?xml-stylesheet href="stylesheet.css"?>
39<!-- comment data -->
40<!DOCTYPE quotations SYSTEM "quotations.dtd" [
41<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000042<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000043<!NOTATION notation SYSTEM "notation.jpeg">
44<!ENTITY acirc "&#226;">
45<!ENTITY external_entity SYSTEM "entity.file">
46<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
47%unparsed_entity;
48]>
49
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000050<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000051<myns:subelement xmlns:myns="http://www.python.org/namespace">
52 Contents of subelements
53</myns:subelement>
54<sub2><![CDATA[contents of CDATA section]]></sub2>
55&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000056&skipped_entity;
Serhiy Storchaka43536e92013-02-04 18:26:15 +020057\xb5
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000058</root>
Fred Drake265a8042000-09-21 20:32:13 +000059'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000060
Guido van Rossumd8faa362007-04-27 19:54:29 +000061
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000062# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000063class ParseTest(unittest.TestCase):
64 class Outputter:
65 def __init__(self):
66 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000067
Guido van Rossumd8faa362007-04-27 19:54:29 +000068 def StartElementHandler(self, name, attrs):
69 self.out.append('Start element: ' + repr(name) + ' ' +
70 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000071
Guido van Rossumd8faa362007-04-27 19:54:29 +000072 def EndElementHandler(self, name):
73 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000074
Guido van Rossumd8faa362007-04-27 19:54:29 +000075 def CharacterDataHandler(self, data):
76 data = data.strip()
77 if data:
78 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000079
Guido van Rossumd8faa362007-04-27 19:54:29 +000080 def ProcessingInstructionHandler(self, target, data):
81 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
82
83 def StartNamespaceDeclHandler(self, prefix, uri):
84 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
85
86 def EndNamespaceDeclHandler(self, prefix):
87 self.out.append('End of NS decl: ' + repr(prefix))
88
89 def StartCdataSectionHandler(self):
90 self.out.append('Start of CDATA section')
91
92 def EndCdataSectionHandler(self):
93 self.out.append('End of CDATA section')
94
95 def CommentHandler(self, text):
96 self.out.append('Comment: ' + repr(text))
97
98 def NotationDeclHandler(self, *args):
99 name, base, sysid, pubid = args
100 self.out.append('Notation declared: %s' %(args,))
101
102 def UnparsedEntityDeclHandler(self, *args):
103 entityName, base, systemId, publicId, notationName = args
104 self.out.append('Unparsed entity decl: %s' %(args,))
105
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000106 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000107 self.out.append('Not standalone')
108 return 1
109
110 def ExternalEntityRefHandler(self, *args):
111 context, base, sysId, pubId = args
112 self.out.append('External entity ref: %s' %(args[1:],))
113 return 1
114
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000115 def StartDoctypeDeclHandler(self, *args):
116 self.out.append(('Start doctype', args))
117 return 1
118
119 def EndDoctypeDeclHandler(self):
120 self.out.append("End doctype")
121 return 1
122
123 def EntityDeclHandler(self, *args):
124 self.out.append(('Entity declaration', args))
125 return 1
126
127 def XmlDeclHandler(self, *args):
128 self.out.append(('XML declaration', args))
129 return 1
130
131 def ElementDeclHandler(self, *args):
132 self.out.append(('Element declaration', args))
133 return 1
134
135 def AttlistDeclHandler(self, *args):
136 self.out.append(('Attribute list declaration', args))
137 return 1
138
139 def SkippedEntityHandler(self, *args):
140 self.out.append(("Skipped entity", args))
141 return 1
142
Guido van Rossumd8faa362007-04-27 19:54:29 +0000143 def DefaultHandler(self, userData):
144 pass
145
146 def DefaultHandlerExpand(self, userData):
147 pass
148
149 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000150 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
151 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
152 'NotationDeclHandler', 'StartNamespaceDeclHandler',
153 'EndNamespaceDeclHandler', 'CommentHandler',
154 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
155 'DefaultHandlerExpand', 'NotStandaloneHandler',
156 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
157 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
158 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000159 ]
160
Antoine Pitrou452196f2011-01-05 18:44:14 +0000161 def _hookup_callbacks(self, parser, handler):
162 """
163 Set each of the callbacks defined on handler and named in
164 self.handler_names on the given parser.
165 """
166 for name in self.handler_names:
167 setattr(parser, name, getattr(handler, name))
168
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000169 def _verify_parse_output(self, operations):
170 expected_operations = [
171 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
172 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
173 "Comment: ' comment data '",
174 "Not standalone",
175 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
176 ('Element declaration', ('root', (2, 0, None, ()))),
177 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
178 1)),
179 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
180 0)),
181 "Notation declared: ('notation', None, 'notation.jpeg', None)",
182 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
183 ('Entity declaration', ('external_entity', 0, None, None,
184 'entity.file', None, None)),
185 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
186 "Not standalone",
187 "End doctype",
188 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
189 "NS decl: 'myns' 'http://www.python.org/namespace'",
190 "Start element: 'http://www.python.org/namespace!subelement' {}",
191 "Character data: 'Contents of subelements'",
192 "End element: 'http://www.python.org/namespace!subelement'",
193 "End of NS decl: 'myns'",
194 "Start element: 'sub2' {}",
195 'Start of CDATA section',
196 "Character data: 'contents of CDATA section'",
197 'End of CDATA section',
198 "End element: 'sub2'",
199 "External entity ref: (None, 'entity.file', None)",
200 ('Skipped entity', ('skipped_entity', 0)),
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200201 "Character data: '\xb5'",
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000202 "End element: 'root'",
203 ]
204 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000205 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000206
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200207 def test_parse_bytes(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000208 out = self.Outputter()
209 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000210 self._hookup_callbacks(parser, out)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000211
212 parser.Parse(data, 1)
213
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000214 operations = out.out
215 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000216 # Issue #6697.
217 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000218
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200219 def test_parse_str(self):
220 out = self.Outputter()
221 parser = expat.ParserCreate(namespace_separator='!')
222 self._hookup_callbacks(parser, out)
223
224 parser.Parse(data.decode('iso-8859-1'), 1)
225
226 operations = out.out
227 self._verify_parse_output(operations)
228
Guido van Rossumd8faa362007-04-27 19:54:29 +0000229 def test_parse_file(self):
230 # Try parsing a file
231 out = self.Outputter()
232 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000233 self._hookup_callbacks(parser, out)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000234 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000235
236 parser.ParseFile(file)
237
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000238 operations = out.out
239 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000240
Ned Deilye7d532f2014-03-27 16:39:58 -0700241 def test_parse_again(self):
242 parser = expat.ParserCreate()
243 file = BytesIO(data)
244 parser.ParseFile(file)
245 # Issue 6676: ensure a meaningful exception is raised when attempting
246 # to parse more than one XML document per xmlparser instance,
247 # a limitation of the Expat library.
248 with self.assertRaises(expat.error) as cm:
249 parser.ParseFile(file)
250 self.assertEqual(expat.ErrorString(cm.exception.code),
251 expat.errors.XML_ERROR_FINISHED)
252
Guido van Rossumd8faa362007-04-27 19:54:29 +0000253class NamespaceSeparatorTest(unittest.TestCase):
254 def test_legal(self):
255 # Tests that make sure we get errors when the namespace_separator value
256 # is illegal, and that we don't for good values:
257 expat.ParserCreate()
258 expat.ParserCreate(namespace_separator=None)
259 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000260
Guido van Rossumd8faa362007-04-27 19:54:29 +0000261 def test_illegal(self):
262 try:
263 expat.ParserCreate(namespace_separator=42)
264 self.fail()
265 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000266 self.assertEqual(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000267 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000268
Guido van Rossumd8faa362007-04-27 19:54:29 +0000269 try:
270 expat.ParserCreate(namespace_separator='too long')
271 self.fail()
272 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000273 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000274 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000275
Guido van Rossumd8faa362007-04-27 19:54:29 +0000276 def test_zero_length(self):
277 # ParserCreate() needs to accept a namespace_separator of zero length
278 # to satisfy the requirements of RDF applications that are required
279 # to simply glue together the namespace URI and the localname. Though
280 # considered a wart of the RDF specifications, it needs to be supported.
281 #
282 # See XML-SIG mailing list thread starting with
283 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
284 #
285 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000286
Fred Drake2a3d7db2002-06-28 22:56:48 +0000287
Guido van Rossumd8faa362007-04-27 19:54:29 +0000288class InterningTest(unittest.TestCase):
289 def test(self):
290 # Test the interning machinery.
291 p = expat.ParserCreate()
292 L = []
293 def collector(name, *args):
294 L.append(name)
295 p.StartElementHandler = collector
296 p.EndElementHandler = collector
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200297 p.Parse(b"<e> <e/> <e></e> </e>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000298 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000299 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000300 for entry in L:
301 # L should have the same string repeated over and over.
302 self.assertTrue(tag is entry)
303
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000304 def test_issue9402(self):
305 # create an ExternalEntityParserCreate with buffer text
306 class ExternalOutputter:
307 def __init__(self, parser):
308 self.parser = parser
309 self.parser_result = None
310
311 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
312 external_parser = self.parser.ExternalEntityParserCreate("")
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200313 self.parser_result = external_parser.Parse(b"", 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000314 return 1
315
316 parser = expat.ParserCreate(namespace_separator='!')
317 parser.buffer_text = 1
318 out = ExternalOutputter(parser)
319 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
320 parser.Parse(data, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000321 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000322
Guido van Rossumd8faa362007-04-27 19:54:29 +0000323
324class BufferTextTest(unittest.TestCase):
325 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000326 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000327 self.parser = expat.ParserCreate()
328 self.parser.buffer_text = 1
329 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000330
331 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000332 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000333 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000334 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000335
336 def CharacterDataHandler(self, text):
337 self.stuff.append(text)
338
339 def StartElementHandler(self, name, attrs):
340 self.stuff.append("<%s>" % name)
341 bt = attrs.get("buffer-text")
342 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000343 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000344 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000345 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000346
347 def EndElementHandler(self, name):
348 self.stuff.append("</%s>" % name)
349
350 def CommentHandler(self, data):
351 self.stuff.append("<!--%s-->" % data)
352
Guido van Rossumd8faa362007-04-27 19:54:29 +0000353 def setHandlers(self, handlers=[]):
354 for name in handlers:
355 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000356
Guido van Rossumd8faa362007-04-27 19:54:29 +0000357 def test_default_to_disabled(self):
358 parser = expat.ParserCreate()
359 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000360
Guido van Rossumd8faa362007-04-27 19:54:29 +0000361 def test_buffering_enabled(self):
362 # Make sure buffering is turned on
363 self.assertTrue(self.parser.buffer_text)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200364 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000365 self.assertEqual(self.stuff, ['123'],
366 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000367
Guido van Rossumd8faa362007-04-27 19:54:29 +0000368 def test1(self):
369 # XXX This test exposes more detail of Expat's text chunking than we
370 # XXX like, but it tests what we need to concisely.
371 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200372 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000373 self.assertEqual(self.stuff,
374 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
375 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000376
Guido van Rossumd8faa362007-04-27 19:54:29 +0000377 def test2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200378 self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000379 self.assertEqual(self.stuff, ["1<2> \n 3"],
380 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000381
Guido van Rossumd8faa362007-04-27 19:54:29 +0000382 def test3(self):
383 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200384 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000385 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
386 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000387
Guido van Rossumd8faa362007-04-27 19:54:29 +0000388 def test4(self):
389 self.setHandlers(["StartElementHandler", "EndElementHandler"])
390 self.parser.CharacterDataHandler = None
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200391 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000392 self.assertEqual(self.stuff,
393 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000394
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395 def test5(self):
396 self.setHandlers(["StartElementHandler", "EndElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200397 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000398 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000399 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000400
Guido van Rossumd8faa362007-04-27 19:54:29 +0000401 def test6(self):
402 self.setHandlers(["CommentHandler", "EndElementHandler",
403 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200404 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000405 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000406 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
407 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000408
Guido van Rossumd8faa362007-04-27 19:54:29 +0000409 def test7(self):
410 self.setHandlers(["CommentHandler", "EndElementHandler",
411 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200412 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000413 self.assertEqual(self.stuff,
414 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
415 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
416 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000417
Fred Draked7ea55b2004-08-13 03:09:07 +0000418
419# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000420class HandlerExceptionTest(unittest.TestCase):
421 def StartElementHandler(self, name, attrs):
422 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000423
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200424 def check_traceback_entry(self, entry, filename, funcname):
425 self.assertEqual(os.path.basename(entry[0]), filename)
426 self.assertEqual(entry[2], funcname)
427
428 def test_exception(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000429 parser = expat.ParserCreate()
430 parser.StartElementHandler = self.StartElementHandler
431 try:
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200432 parser.Parse(b"<a><b><c/></b></a>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000433 self.fail()
434 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000435 self.assertEqual(e.args[0], 'a',
436 "Expected RuntimeError for element 'a', but" + \
437 " found %r" % e.args[0])
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200438 # Check that the traceback contains the relevant line in pyexpat.c
439 entries = traceback.extract_tb(e.__traceback__)
440 self.assertEqual(len(entries), 3)
441 self.check_traceback_entry(entries[0],
442 "test_pyexpat.py", "test_exception")
443 self.check_traceback_entry(entries[1],
444 "pyexpat.c", "StartElement")
445 self.check_traceback_entry(entries[2],
446 "test_pyexpat.py", "StartElementHandler")
447 self.assertIn('call_with_frame("StartElement"', entries[1][3])
Fred Draked7ea55b2004-08-13 03:09:07 +0000448
Dave Cole3203efb2004-08-26 00:37:31 +0000449
450# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000452 def StartElementHandler(self, name, attrs):
453 self.check_pos('s')
454
455 def EndElementHandler(self, name):
456 self.check_pos('e')
457
458 def check_pos(self, event):
459 pos = (event,
460 self.parser.CurrentByteIndex,
461 self.parser.CurrentLineNumber,
462 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000463 self.assertTrue(self.upto < len(self.expected_list),
464 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000465 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000466 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000467 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000468 self.upto += 1
469
Guido van Rossumd8faa362007-04-27 19:54:29 +0000470 def test(self):
471 self.parser = expat.ParserCreate()
472 self.parser.StartElementHandler = self.StartElementHandler
473 self.parser.EndElementHandler = self.EndElementHandler
474 self.upto = 0
475 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
476 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000477
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200478 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000479 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000480
481
Guido van Rossumd8faa362007-04-27 19:54:29 +0000482class sf1296433Test(unittest.TestCase):
483 def test_parse_only_xml_data(self):
484 # http://python.org/sf/1296433
485 #
486 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
487 # this one doesn't crash
488 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000489
Guido van Rossumd8faa362007-04-27 19:54:29 +0000490 class SpecificException(Exception):
491 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000492
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493 def handler(text):
494 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000495
Guido van Rossumd8faa362007-04-27 19:54:29 +0000496 parser = expat.ParserCreate()
497 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000498
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200499 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000500
Christian Heimes2380ac72008-01-09 00:17:24 +0000501class ChardataBufferTest(unittest.TestCase):
502 """
503 test setting of chardata buffer size
504 """
505
506 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000507 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000508
509 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000510 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000511
512 def test_wrong_size(self):
513 parser = expat.ParserCreate()
514 parser.buffer_text = 1
515 def f(size):
516 parser.buffer_size = size
517
518 self.assertRaises(ValueError, f, -1)
519 self.assertRaises(ValueError, f, 0)
520
521 def test_unchanged_size(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200522 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
523 xml2 = b'a'*512 + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000524 parser = expat.ParserCreate()
525 parser.CharacterDataHandler = self.counting_handler
526 parser.buffer_size = 512
527 parser.buffer_text = 1
528
529 # Feed 512 bytes of character data: the handler should be called
530 # once.
531 self.n = 0
532 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000533 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000534
535 # Reassign to buffer_size, but assign the same size.
536 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000537 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000538
539 # Try parsing rest of the document
540 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000541 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000542
543
544 def test_disabling_buffer(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200545 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
546 xml2 = b'b' * 1024
547 xml3 = b'c' * 1024 + b'</a>';
Christian Heimes2380ac72008-01-09 00:17:24 +0000548 parser = expat.ParserCreate()
549 parser.CharacterDataHandler = self.counting_handler
550 parser.buffer_text = 1
551 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000552 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000553
554 # Parse one chunk of XML
555 self.n = 0
556 parser.Parse(xml1, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000557 self.assertEqual(parser.buffer_size, 1024)
558 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000559
560 # Turn off buffering and parse the next chunk.
561 parser.buffer_text = 0
562 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000563 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000564 for i in range(10):
565 parser.Parse(xml2, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000566 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000567
568 parser.buffer_text = 1
569 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000570 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000571 parser.Parse(xml3, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000572 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000573
Christian Heimes2380ac72008-01-09 00:17:24 +0000574 def counting_handler(self, text):
575 self.n += 1
576
577 def small_buffer_test(self, buffer_len):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200578 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000579 parser = expat.ParserCreate()
580 parser.CharacterDataHandler = self.counting_handler
581 parser.buffer_size = 1024
582 parser.buffer_text = 1
583
584 self.n = 0
585 parser.Parse(xml)
586 return self.n
587
588 def test_change_size_1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200589 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
590 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000591 parser = expat.ParserCreate()
592 parser.CharacterDataHandler = self.counting_handler
593 parser.buffer_text = 1
594 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000595 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000596
597 self.n = 0
598 parser.Parse(xml1, 0)
599 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000600 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000601 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000602 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000603
604 def test_change_size_2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200605 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
606 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000607 parser = expat.ParserCreate()
608 parser.CharacterDataHandler = self.counting_handler
609 parser.buffer_text = 1
610 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000611 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000612
613 self.n=0
614 parser.Parse(xml1, 0)
615 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000616 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000617 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000618 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000619
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000620class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000621 def test1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200622 xml = b"\0\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000623 parser = expat.ParserCreate()
624 try:
625 parser.Parse(xml, True)
626 self.fail()
627 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000628 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000629
630 def test2(self):
Serhiy Storchaka1273dfc2013-02-08 11:22:05 +0200631 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200632 xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000633 parser = expat.ParserCreate()
634 try:
635 parser.Parse(xml, True)
636 self.fail()
637 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000638 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000639
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000640class ErrorMessageTest(unittest.TestCase):
641 def test_codes(self):
642 # verify mapping of errors.codes and errors.messages
643 self.assertEqual(errors.XML_ERROR_SYNTAX,
644 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
645
646 def test_expaterror(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200647 xml = b'<'
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000648 parser = expat.ParserCreate()
649 try:
650 parser.Parse(xml, True)
651 self.fail()
652 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000653 self.assertEqual(e.code,
654 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000655
656
Antoine Pitrou452196f2011-01-05 18:44:14 +0000657class ForeignDTDTests(unittest.TestCase):
658 """
659 Tests for the UseForeignDTD method of expat parser objects.
660 """
661 def test_use_foreign_dtd(self):
662 """
663 If UseForeignDTD is passed True and a document without an external
664 entity reference is parsed, ExternalEntityRefHandler is first called
665 with None for the public and system ids.
666 """
667 handler_call_args = []
668 def resolve_entity(context, base, system_id, public_id):
669 handler_call_args.append((public_id, system_id))
670 return 1
671
672 parser = expat.ParserCreate()
673 parser.UseForeignDTD(True)
674 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
675 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200676 parser.Parse(b"<?xml version='1.0'?><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000677 self.assertEqual(handler_call_args, [(None, None)])
678
Christian Heimese26d3af2012-09-24 13:17:08 +0200679 # test UseForeignDTD() is equal to UseForeignDTD(True)
680 handler_call_args[:] = []
681
682 parser = expat.ParserCreate()
683 parser.UseForeignDTD()
684 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
685 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200686 parser.Parse(b"<?xml version='1.0'?><element/>")
Christian Heimese26d3af2012-09-24 13:17:08 +0200687 self.assertEqual(handler_call_args, [(None, None)])
688
Antoine Pitrou452196f2011-01-05 18:44:14 +0000689 def test_ignore_use_foreign_dtd(self):
690 """
691 If UseForeignDTD is passed True and a document with an external
692 entity reference is parsed, ExternalEntityRefHandler is called with
693 the public and system ids from the document.
694 """
695 handler_call_args = []
696 def resolve_entity(context, base, system_id, public_id):
697 handler_call_args.append((public_id, system_id))
698 return 1
699
700 parser = expat.ParserCreate()
701 parser.UseForeignDTD(True)
702 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
703 parser.ExternalEntityRefHandler = resolve_entity
704 parser.Parse(
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200705 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000706 self.assertEqual(handler_call_args, [("bar", "baz")])
707
708
Guido van Rossumd8faa362007-04-27 19:54:29 +0000709def test_main():
710 run_unittest(SetAttributeTest,
711 ParseTest,
712 NamespaceSeparatorTest,
713 InterningTest,
714 BufferTextTest,
715 HandlerExceptionTest,
716 PositionTest,
Christian Heimes2380ac72008-01-09 00:17:24 +0000717 sf1296433Test,
Brett Cannon2f827382009-08-13 19:58:01 +0000718 ChardataBufferTest,
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000719 MalformedInputTest,
Antoine Pitrou452196f2011-01-05 18:44:14 +0000720 ErrorMessageTest,
721 ForeignDTDTests)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000722
723if __name__ == "__main__":
724 test_main()