blob: 8ef391791f717255d8876aaccecfb2846d86380c [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import unittest
6
Fred Drake7fbc85c2000-09-23 04:47:56 +00007from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +00008from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +00009
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test.support import sortdict, run_unittest
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000011
12
Guido van Rossumd8faa362007-04-27 19:54:29 +000013class SetAttributeTest(unittest.TestCase):
14 def setUp(self):
15 self.parser = expat.ParserCreate(namespace_separator='!')
16 self.set_get_pairs = [
17 [0, 0],
18 [1, 1],
19 [2, 1],
20 [0, 0],
21 ]
Fred Drake265a8042000-09-21 20:32:13 +000022
Guido van Rossumd8faa362007-04-27 19:54:29 +000023 def test_ordered_attributes(self):
24 for x, y in self.set_get_pairs:
25 self.parser.ordered_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000026 self.assertEqual(self.parser.ordered_attributes, y)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000027
Guido van Rossumd8faa362007-04-27 19:54:29 +000028 def test_specified_attributes(self):
29 for x, y in self.set_get_pairs:
30 self.parser.specified_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000031 self.assertEqual(self.parser.specified_attributes, y)
Fred Drake8f42e2b2001-04-25 16:03:54 +000032
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000033
Guido van Rossum4ca94712007-07-23 17:42:32 +000034data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000035<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000036<?xml-stylesheet href="stylesheet.css"?>
37<!-- comment data -->
38<!DOCTYPE quotations SYSTEM "quotations.dtd" [
39<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000040<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000041<!NOTATION notation SYSTEM "notation.jpeg">
42<!ENTITY acirc "&#226;">
43<!ENTITY external_entity SYSTEM "entity.file">
44<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
45%unparsed_entity;
46]>
47
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000048<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000049<myns:subelement xmlns:myns="http://www.python.org/namespace">
50 Contents of subelements
51</myns:subelement>
52<sub2><![CDATA[contents of CDATA section]]></sub2>
53&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000054&skipped_entity;
Serhiy Storchaka43536e92013-02-04 18:26:15 +020055\xb5
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000056</root>
Fred Drake265a8042000-09-21 20:32:13 +000057'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000058
Guido van Rossumd8faa362007-04-27 19:54:29 +000059
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000060# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000061class ParseTest(unittest.TestCase):
62 class Outputter:
63 def __init__(self):
64 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000065
Guido van Rossumd8faa362007-04-27 19:54:29 +000066 def StartElementHandler(self, name, attrs):
67 self.out.append('Start element: ' + repr(name) + ' ' +
68 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000069
Guido van Rossumd8faa362007-04-27 19:54:29 +000070 def EndElementHandler(self, name):
71 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000072
Guido van Rossumd8faa362007-04-27 19:54:29 +000073 def CharacterDataHandler(self, data):
74 data = data.strip()
75 if data:
76 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000077
Guido van Rossumd8faa362007-04-27 19:54:29 +000078 def ProcessingInstructionHandler(self, target, data):
79 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
80
81 def StartNamespaceDeclHandler(self, prefix, uri):
82 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
83
84 def EndNamespaceDeclHandler(self, prefix):
85 self.out.append('End of NS decl: ' + repr(prefix))
86
87 def StartCdataSectionHandler(self):
88 self.out.append('Start of CDATA section')
89
90 def EndCdataSectionHandler(self):
91 self.out.append('End of CDATA section')
92
93 def CommentHandler(self, text):
94 self.out.append('Comment: ' + repr(text))
95
96 def NotationDeclHandler(self, *args):
97 name, base, sysid, pubid = args
98 self.out.append('Notation declared: %s' %(args,))
99
100 def UnparsedEntityDeclHandler(self, *args):
101 entityName, base, systemId, publicId, notationName = args
102 self.out.append('Unparsed entity decl: %s' %(args,))
103
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000104 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000105 self.out.append('Not standalone')
106 return 1
107
108 def ExternalEntityRefHandler(self, *args):
109 context, base, sysId, pubId = args
110 self.out.append('External entity ref: %s' %(args[1:],))
111 return 1
112
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000113 def StartDoctypeDeclHandler(self, *args):
114 self.out.append(('Start doctype', args))
115 return 1
116
117 def EndDoctypeDeclHandler(self):
118 self.out.append("End doctype")
119 return 1
120
121 def EntityDeclHandler(self, *args):
122 self.out.append(('Entity declaration', args))
123 return 1
124
125 def XmlDeclHandler(self, *args):
126 self.out.append(('XML declaration', args))
127 return 1
128
129 def ElementDeclHandler(self, *args):
130 self.out.append(('Element declaration', args))
131 return 1
132
133 def AttlistDeclHandler(self, *args):
134 self.out.append(('Attribute list declaration', args))
135 return 1
136
137 def SkippedEntityHandler(self, *args):
138 self.out.append(("Skipped entity", args))
139 return 1
140
Guido van Rossumd8faa362007-04-27 19:54:29 +0000141 def DefaultHandler(self, userData):
142 pass
143
144 def DefaultHandlerExpand(self, userData):
145 pass
146
147 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000148 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
149 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
150 'NotationDeclHandler', 'StartNamespaceDeclHandler',
151 'EndNamespaceDeclHandler', 'CommentHandler',
152 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
153 'DefaultHandlerExpand', 'NotStandaloneHandler',
154 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
155 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
156 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000157 ]
158
Antoine Pitrou452196f2011-01-05 18:44:14 +0000159 def _hookup_callbacks(self, parser, handler):
160 """
161 Set each of the callbacks defined on handler and named in
162 self.handler_names on the given parser.
163 """
164 for name in self.handler_names:
165 setattr(parser, name, getattr(handler, name))
166
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000167 def _verify_parse_output(self, operations):
168 expected_operations = [
169 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
170 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
171 "Comment: ' comment data '",
172 "Not standalone",
173 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
174 ('Element declaration', ('root', (2, 0, None, ()))),
175 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
176 1)),
177 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
178 0)),
179 "Notation declared: ('notation', None, 'notation.jpeg', None)",
180 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
181 ('Entity declaration', ('external_entity', 0, None, None,
182 'entity.file', None, None)),
183 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
184 "Not standalone",
185 "End doctype",
186 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
187 "NS decl: 'myns' 'http://www.python.org/namespace'",
188 "Start element: 'http://www.python.org/namespace!subelement' {}",
189 "Character data: 'Contents of subelements'",
190 "End element: 'http://www.python.org/namespace!subelement'",
191 "End of NS decl: 'myns'",
192 "Start element: 'sub2' {}",
193 'Start of CDATA section',
194 "Character data: 'contents of CDATA section'",
195 'End of CDATA section',
196 "End element: 'sub2'",
197 "External entity ref: (None, 'entity.file', None)",
198 ('Skipped entity', ('skipped_entity', 0)),
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200199 "Character data: '\xb5'",
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000200 "End element: 'root'",
201 ]
202 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000203 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000204
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200205 def test_parse_bytes(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000206 out = self.Outputter()
207 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000208 self._hookup_callbacks(parser, out)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000209
210 parser.Parse(data, 1)
211
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000212 operations = out.out
213 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000214 # Issue #6697.
215 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000216
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200217 def test_parse_str(self):
218 out = self.Outputter()
219 parser = expat.ParserCreate(namespace_separator='!')
220 self._hookup_callbacks(parser, out)
221
222 parser.Parse(data.decode('iso-8859-1'), 1)
223
224 operations = out.out
225 self._verify_parse_output(operations)
226
Guido van Rossumd8faa362007-04-27 19:54:29 +0000227 def test_parse_file(self):
228 # Try parsing a file
229 out = self.Outputter()
230 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000231 self._hookup_callbacks(parser, out)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000232 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000233
234 parser.ParseFile(file)
235
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000236 operations = out.out
237 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000238
Guido van Rossumd8faa362007-04-27 19:54:29 +0000239class NamespaceSeparatorTest(unittest.TestCase):
240 def test_legal(self):
241 # Tests that make sure we get errors when the namespace_separator value
242 # is illegal, and that we don't for good values:
243 expat.ParserCreate()
244 expat.ParserCreate(namespace_separator=None)
245 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000246
Guido van Rossumd8faa362007-04-27 19:54:29 +0000247 def test_illegal(self):
248 try:
249 expat.ParserCreate(namespace_separator=42)
250 self.fail()
251 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000252 self.assertEqual(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000253 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000254
Guido van Rossumd8faa362007-04-27 19:54:29 +0000255 try:
256 expat.ParserCreate(namespace_separator='too long')
257 self.fail()
258 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000259 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000260 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000261
Guido van Rossumd8faa362007-04-27 19:54:29 +0000262 def test_zero_length(self):
263 # ParserCreate() needs to accept a namespace_separator of zero length
264 # to satisfy the requirements of RDF applications that are required
265 # to simply glue together the namespace URI and the localname. Though
266 # considered a wart of the RDF specifications, it needs to be supported.
267 #
268 # See XML-SIG mailing list thread starting with
269 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
270 #
271 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000272
Fred Drake2a3d7db2002-06-28 22:56:48 +0000273
Guido van Rossumd8faa362007-04-27 19:54:29 +0000274class InterningTest(unittest.TestCase):
275 def test(self):
276 # Test the interning machinery.
277 p = expat.ParserCreate()
278 L = []
279 def collector(name, *args):
280 L.append(name)
281 p.StartElementHandler = collector
282 p.EndElementHandler = collector
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200283 p.Parse(b"<e> <e/> <e></e> </e>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000284 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000285 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000286 for entry in L:
287 # L should have the same string repeated over and over.
288 self.assertTrue(tag is entry)
289
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000290 def test_issue9402(self):
291 # create an ExternalEntityParserCreate with buffer text
292 class ExternalOutputter:
293 def __init__(self, parser):
294 self.parser = parser
295 self.parser_result = None
296
297 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
298 external_parser = self.parser.ExternalEntityParserCreate("")
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200299 self.parser_result = external_parser.Parse(b"", 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000300 return 1
301
302 parser = expat.ParserCreate(namespace_separator='!')
303 parser.buffer_text = 1
304 out = ExternalOutputter(parser)
305 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
306 parser.Parse(data, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000307 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000308
Guido van Rossumd8faa362007-04-27 19:54:29 +0000309
310class BufferTextTest(unittest.TestCase):
311 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000312 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000313 self.parser = expat.ParserCreate()
314 self.parser.buffer_text = 1
315 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000316
317 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000318 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000319 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000320 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000321
322 def CharacterDataHandler(self, text):
323 self.stuff.append(text)
324
325 def StartElementHandler(self, name, attrs):
326 self.stuff.append("<%s>" % name)
327 bt = attrs.get("buffer-text")
328 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000329 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000330 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000331 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000332
333 def EndElementHandler(self, name):
334 self.stuff.append("</%s>" % name)
335
336 def CommentHandler(self, data):
337 self.stuff.append("<!--%s-->" % data)
338
Guido van Rossumd8faa362007-04-27 19:54:29 +0000339 def setHandlers(self, handlers=[]):
340 for name in handlers:
341 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000342
Guido van Rossumd8faa362007-04-27 19:54:29 +0000343 def test_default_to_disabled(self):
344 parser = expat.ParserCreate()
345 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000346
Guido van Rossumd8faa362007-04-27 19:54:29 +0000347 def test_buffering_enabled(self):
348 # Make sure buffering is turned on
349 self.assertTrue(self.parser.buffer_text)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200350 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000351 self.assertEqual(self.stuff, ['123'],
352 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000353
Guido van Rossumd8faa362007-04-27 19:54:29 +0000354 def test1(self):
355 # XXX This test exposes more detail of Expat's text chunking than we
356 # XXX like, but it tests what we need to concisely.
357 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200358 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000359 self.assertEqual(self.stuff,
360 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
361 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000362
Guido van Rossumd8faa362007-04-27 19:54:29 +0000363 def test2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200364 self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000365 self.assertEqual(self.stuff, ["1<2> \n 3"],
366 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000367
Guido van Rossumd8faa362007-04-27 19:54:29 +0000368 def test3(self):
369 self.setHandlers(["StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200370 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000371 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
372 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000373
Guido van Rossumd8faa362007-04-27 19:54:29 +0000374 def test4(self):
375 self.setHandlers(["StartElementHandler", "EndElementHandler"])
376 self.parser.CharacterDataHandler = None
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200377 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000378 self.assertEqual(self.stuff,
379 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000380
Guido van Rossumd8faa362007-04-27 19:54:29 +0000381 def test5(self):
382 self.setHandlers(["StartElementHandler", "EndElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200383 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000384 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000385 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000386
Guido van Rossumd8faa362007-04-27 19:54:29 +0000387 def test6(self):
388 self.setHandlers(["CommentHandler", "EndElementHandler",
389 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200390 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000391 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000392 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
393 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000394
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395 def test7(self):
396 self.setHandlers(["CommentHandler", "EndElementHandler",
397 "StartElementHandler"])
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200398 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000399 self.assertEqual(self.stuff,
400 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
401 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
402 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000403
Fred Draked7ea55b2004-08-13 03:09:07 +0000404
405# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000406class HandlerExceptionTest(unittest.TestCase):
407 def StartElementHandler(self, name, attrs):
408 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000409
Guido van Rossumd8faa362007-04-27 19:54:29 +0000410 def test(self):
411 parser = expat.ParserCreate()
412 parser.StartElementHandler = self.StartElementHandler
413 try:
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200414 parser.Parse(b"<a><b><c/></b></a>", 1)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000415 self.fail()
416 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000417 self.assertEqual(e.args[0], 'a',
418 "Expected RuntimeError for element 'a', but" + \
419 " found %r" % e.args[0])
Fred Draked7ea55b2004-08-13 03:09:07 +0000420
Dave Cole3203efb2004-08-26 00:37:31 +0000421
422# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000423class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000424 def StartElementHandler(self, name, attrs):
425 self.check_pos('s')
426
427 def EndElementHandler(self, name):
428 self.check_pos('e')
429
430 def check_pos(self, event):
431 pos = (event,
432 self.parser.CurrentByteIndex,
433 self.parser.CurrentLineNumber,
434 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000435 self.assertTrue(self.upto < len(self.expected_list),
436 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000437 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000438 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000439 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000440 self.upto += 1
441
Guido van Rossumd8faa362007-04-27 19:54:29 +0000442 def test(self):
443 self.parser = expat.ParserCreate()
444 self.parser.StartElementHandler = self.StartElementHandler
445 self.parser.EndElementHandler = self.EndElementHandler
446 self.upto = 0
447 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
448 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000449
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200450 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000452
453
Guido van Rossumd8faa362007-04-27 19:54:29 +0000454class sf1296433Test(unittest.TestCase):
455 def test_parse_only_xml_data(self):
456 # http://python.org/sf/1296433
457 #
458 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
459 # this one doesn't crash
460 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000461
Guido van Rossumd8faa362007-04-27 19:54:29 +0000462 class SpecificException(Exception):
463 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000464
Guido van Rossumd8faa362007-04-27 19:54:29 +0000465 def handler(text):
466 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000467
Guido van Rossumd8faa362007-04-27 19:54:29 +0000468 parser = expat.ParserCreate()
469 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000470
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200471 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000472
Christian Heimes2380ac72008-01-09 00:17:24 +0000473class ChardataBufferTest(unittest.TestCase):
474 """
475 test setting of chardata buffer size
476 """
477
478 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000479 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000480
481 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000482 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000483
484 def test_wrong_size(self):
485 parser = expat.ParserCreate()
486 parser.buffer_text = 1
487 def f(size):
488 parser.buffer_size = size
489
490 self.assertRaises(ValueError, f, -1)
491 self.assertRaises(ValueError, f, 0)
492
493 def test_unchanged_size(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200494 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
495 xml2 = b'a'*512 + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000496 parser = expat.ParserCreate()
497 parser.CharacterDataHandler = self.counting_handler
498 parser.buffer_size = 512
499 parser.buffer_text = 1
500
501 # Feed 512 bytes of character data: the handler should be called
502 # once.
503 self.n = 0
504 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000505 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000506
507 # Reassign to buffer_size, but assign the same size.
508 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000509 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000510
511 # Try parsing rest of the document
512 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000513 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000514
515
516 def test_disabling_buffer(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200517 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
518 xml2 = b'b' * 1024
519 xml3 = b'c' * 1024 + b'</a>';
Christian Heimes2380ac72008-01-09 00:17:24 +0000520 parser = expat.ParserCreate()
521 parser.CharacterDataHandler = self.counting_handler
522 parser.buffer_text = 1
523 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000524 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000525
526 # Parse one chunk of XML
527 self.n = 0
528 parser.Parse(xml1, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000529 self.assertEqual(parser.buffer_size, 1024)
530 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000531
532 # Turn off buffering and parse the next chunk.
533 parser.buffer_text = 0
534 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000535 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000536 for i in range(10):
537 parser.Parse(xml2, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000538 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000539
540 parser.buffer_text = 1
541 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000542 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000543 parser.Parse(xml3, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000544 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000545
Christian Heimes2380ac72008-01-09 00:17:24 +0000546 def counting_handler(self, text):
547 self.n += 1
548
549 def small_buffer_test(self, buffer_len):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200550 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000551 parser = expat.ParserCreate()
552 parser.CharacterDataHandler = self.counting_handler
553 parser.buffer_size = 1024
554 parser.buffer_text = 1
555
556 self.n = 0
557 parser.Parse(xml)
558 return self.n
559
560 def test_change_size_1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200561 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
562 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000563 parser = expat.ParserCreate()
564 parser.CharacterDataHandler = self.counting_handler
565 parser.buffer_text = 1
566 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000567 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000568
569 self.n = 0
570 parser.Parse(xml1, 0)
571 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000572 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000573 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000574 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000575
576 def test_change_size_2(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200577 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
578 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
Christian Heimes2380ac72008-01-09 00:17:24 +0000579 parser = expat.ParserCreate()
580 parser.CharacterDataHandler = self.counting_handler
581 parser.buffer_text = 1
582 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000583 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000584
585 self.n=0
586 parser.Parse(xml1, 0)
587 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000588 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000589 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000590 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000591
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000592class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000593 def test1(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200594 xml = b"\0\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000595 parser = expat.ParserCreate()
596 try:
597 parser.Parse(xml, True)
598 self.fail()
599 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000600 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000601
602 def test2(self):
Serhiy Storchaka1273dfc2013-02-08 11:22:05 +0200603 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200604 xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
Brett Cannon2f827382009-08-13 19:58:01 +0000605 parser = expat.ParserCreate()
606 try:
607 parser.Parse(xml, True)
608 self.fail()
609 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000610 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000611
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000612class ErrorMessageTest(unittest.TestCase):
613 def test_codes(self):
614 # verify mapping of errors.codes and errors.messages
615 self.assertEqual(errors.XML_ERROR_SYNTAX,
616 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
617
618 def test_expaterror(self):
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200619 xml = b'<'
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000620 parser = expat.ParserCreate()
621 try:
622 parser.Parse(xml, True)
623 self.fail()
624 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000625 self.assertEqual(e.code,
626 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000627
628
Antoine Pitrou452196f2011-01-05 18:44:14 +0000629class ForeignDTDTests(unittest.TestCase):
630 """
631 Tests for the UseForeignDTD method of expat parser objects.
632 """
633 def test_use_foreign_dtd(self):
634 """
635 If UseForeignDTD is passed True and a document without an external
636 entity reference is parsed, ExternalEntityRefHandler is first called
637 with None for the public and system ids.
638 """
639 handler_call_args = []
640 def resolve_entity(context, base, system_id, public_id):
641 handler_call_args.append((public_id, system_id))
642 return 1
643
644 parser = expat.ParserCreate()
645 parser.UseForeignDTD(True)
646 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
647 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200648 parser.Parse(b"<?xml version='1.0'?><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000649 self.assertEqual(handler_call_args, [(None, None)])
650
Christian Heimese26d3af2012-09-24 13:17:08 +0200651 # test UseForeignDTD() is equal to UseForeignDTD(True)
652 handler_call_args[:] = []
653
654 parser = expat.ParserCreate()
655 parser.UseForeignDTD()
656 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
657 parser.ExternalEntityRefHandler = resolve_entity
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200658 parser.Parse(b"<?xml version='1.0'?><element/>")
Christian Heimese26d3af2012-09-24 13:17:08 +0200659 self.assertEqual(handler_call_args, [(None, None)])
660
Antoine Pitrou452196f2011-01-05 18:44:14 +0000661 def test_ignore_use_foreign_dtd(self):
662 """
663 If UseForeignDTD is passed True and a document with an external
664 entity reference is parsed, ExternalEntityRefHandler is called with
665 the public and system ids from the document.
666 """
667 handler_call_args = []
668 def resolve_entity(context, base, system_id, public_id):
669 handler_call_args.append((public_id, system_id))
670 return 1
671
672 parser = expat.ParserCreate()
673 parser.UseForeignDTD(True)
674 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
675 parser.ExternalEntityRefHandler = resolve_entity
676 parser.Parse(
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200677 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
Antoine Pitrou452196f2011-01-05 18:44:14 +0000678 self.assertEqual(handler_call_args, [("bar", "baz")])
679
680
Guido van Rossumd8faa362007-04-27 19:54:29 +0000681def test_main():
682 run_unittest(SetAttributeTest,
683 ParseTest,
684 NamespaceSeparatorTest,
685 InterningTest,
686 BufferTextTest,
687 HandlerExceptionTest,
688 PositionTest,
Christian Heimes2380ac72008-01-09 00:17:24 +0000689 sf1296433Test,
Brett Cannon2f827382009-08-13 19:58:01 +0000690 ChardataBufferTest,
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000691 MalformedInputTest,
Antoine Pitrou452196f2011-01-05 18:44:14 +0000692 ErrorMessageTest,
693 ForeignDTDTests)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000694
695if __name__ == "__main__":
696 test_main()