blob: 27eecb8d8387c5e93adf6844459a750540117c7b [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import unittest
6
Fred Drake7fbc85c2000-09-23 04:47:56 +00007from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +00008from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +00009
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test.support import sortdict, run_unittest
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000011
12
Guido van Rossumd8faa362007-04-27 19:54:29 +000013class SetAttributeTest(unittest.TestCase):
14 def setUp(self):
15 self.parser = expat.ParserCreate(namespace_separator='!')
16 self.set_get_pairs = [
17 [0, 0],
18 [1, 1],
19 [2, 1],
20 [0, 0],
21 ]
Fred Drake265a8042000-09-21 20:32:13 +000022
Guido van Rossumd8faa362007-04-27 19:54:29 +000023 def test_ordered_attributes(self):
24 for x, y in self.set_get_pairs:
25 self.parser.ordered_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000026 self.assertEqual(self.parser.ordered_attributes, y)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000027
Guido van Rossumd8faa362007-04-27 19:54:29 +000028 def test_specified_attributes(self):
29 for x, y in self.set_get_pairs:
30 self.parser.specified_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000031 self.assertEqual(self.parser.specified_attributes, y)
Fred Drake8f42e2b2001-04-25 16:03:54 +000032
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000033
Guido van Rossum4ca94712007-07-23 17:42:32 +000034data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000035<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000036<?xml-stylesheet href="stylesheet.css"?>
37<!-- comment data -->
38<!DOCTYPE quotations SYSTEM "quotations.dtd" [
39<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000040<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000041<!NOTATION notation SYSTEM "notation.jpeg">
42<!ENTITY acirc "&#226;">
43<!ENTITY external_entity SYSTEM "entity.file">
44<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
45%unparsed_entity;
46]>
47
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000048<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000049<myns:subelement xmlns:myns="http://www.python.org/namespace">
50 Contents of subelements
51</myns:subelement>
52<sub2><![CDATA[contents of CDATA section]]></sub2>
53&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000054&skipped_entity;
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000055</root>
Fred Drake265a8042000-09-21 20:32:13 +000056'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000057
Guido van Rossumd8faa362007-04-27 19:54:29 +000058
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000059# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000060class ParseTest(unittest.TestCase):
61 class Outputter:
62 def __init__(self):
63 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000064
Guido van Rossumd8faa362007-04-27 19:54:29 +000065 def StartElementHandler(self, name, attrs):
66 self.out.append('Start element: ' + repr(name) + ' ' +
67 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000068
Guido van Rossumd8faa362007-04-27 19:54:29 +000069 def EndElementHandler(self, name):
70 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000071
Guido van Rossumd8faa362007-04-27 19:54:29 +000072 def CharacterDataHandler(self, data):
73 data = data.strip()
74 if data:
75 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000076
Guido van Rossumd8faa362007-04-27 19:54:29 +000077 def ProcessingInstructionHandler(self, target, data):
78 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
79
80 def StartNamespaceDeclHandler(self, prefix, uri):
81 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
82
83 def EndNamespaceDeclHandler(self, prefix):
84 self.out.append('End of NS decl: ' + repr(prefix))
85
86 def StartCdataSectionHandler(self):
87 self.out.append('Start of CDATA section')
88
89 def EndCdataSectionHandler(self):
90 self.out.append('End of CDATA section')
91
92 def CommentHandler(self, text):
93 self.out.append('Comment: ' + repr(text))
94
95 def NotationDeclHandler(self, *args):
96 name, base, sysid, pubid = args
97 self.out.append('Notation declared: %s' %(args,))
98
99 def UnparsedEntityDeclHandler(self, *args):
100 entityName, base, systemId, publicId, notationName = args
101 self.out.append('Unparsed entity decl: %s' %(args,))
102
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000103 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104 self.out.append('Not standalone')
105 return 1
106
107 def ExternalEntityRefHandler(self, *args):
108 context, base, sysId, pubId = args
109 self.out.append('External entity ref: %s' %(args[1:],))
110 return 1
111
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000112 def StartDoctypeDeclHandler(self, *args):
113 self.out.append(('Start doctype', args))
114 return 1
115
116 def EndDoctypeDeclHandler(self):
117 self.out.append("End doctype")
118 return 1
119
120 def EntityDeclHandler(self, *args):
121 self.out.append(('Entity declaration', args))
122 return 1
123
124 def XmlDeclHandler(self, *args):
125 self.out.append(('XML declaration', args))
126 return 1
127
128 def ElementDeclHandler(self, *args):
129 self.out.append(('Element declaration', args))
130 return 1
131
132 def AttlistDeclHandler(self, *args):
133 self.out.append(('Attribute list declaration', args))
134 return 1
135
136 def SkippedEntityHandler(self, *args):
137 self.out.append(("Skipped entity", args))
138 return 1
139
Guido van Rossumd8faa362007-04-27 19:54:29 +0000140 def DefaultHandler(self, userData):
141 pass
142
143 def DefaultHandlerExpand(self, userData):
144 pass
145
146 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000147 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
148 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
149 'NotationDeclHandler', 'StartNamespaceDeclHandler',
150 'EndNamespaceDeclHandler', 'CommentHandler',
151 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
152 'DefaultHandlerExpand', 'NotStandaloneHandler',
153 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
154 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
155 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000156 ]
157
Antoine Pitrou452196f2011-01-05 18:44:14 +0000158 def _hookup_callbacks(self, parser, handler):
159 """
160 Set each of the callbacks defined on handler and named in
161 self.handler_names on the given parser.
162 """
163 for name in self.handler_names:
164 setattr(parser, name, getattr(handler, name))
165
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000166 def _verify_parse_output(self, operations):
167 expected_operations = [
168 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
169 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
170 "Comment: ' comment data '",
171 "Not standalone",
172 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
173 ('Element declaration', ('root', (2, 0, None, ()))),
174 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
175 1)),
176 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
177 0)),
178 "Notation declared: ('notation', None, 'notation.jpeg', None)",
179 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
180 ('Entity declaration', ('external_entity', 0, None, None,
181 'entity.file', None, None)),
182 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
183 "Not standalone",
184 "End doctype",
185 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
186 "NS decl: 'myns' 'http://www.python.org/namespace'",
187 "Start element: 'http://www.python.org/namespace!subelement' {}",
188 "Character data: 'Contents of subelements'",
189 "End element: 'http://www.python.org/namespace!subelement'",
190 "End of NS decl: 'myns'",
191 "Start element: 'sub2' {}",
192 'Start of CDATA section',
193 "Character data: 'contents of CDATA section'",
194 'End of CDATA section',
195 "End element: 'sub2'",
196 "External entity ref: (None, 'entity.file', None)",
197 ('Skipped entity', ('skipped_entity', 0)),
198 "End element: 'root'",
199 ]
200 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000201 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000202
Guido van Rossumd8faa362007-04-27 19:54:29 +0000203 def test_unicode(self):
204 # Try the parse again, this time producing Unicode output
205 out = self.Outputter()
206 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000207 self._hookup_callbacks(parser, out)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000208
209 parser.Parse(data, 1)
210
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000211 operations = out.out
212 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000213 # Issue #6697.
214 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000215
216 def test_parse_file(self):
217 # Try parsing a file
218 out = self.Outputter()
219 parser = expat.ParserCreate(namespace_separator='!')
Antoine Pitrou452196f2011-01-05 18:44:14 +0000220 self._hookup_callbacks(parser, out)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000221 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000222
223 parser.ParseFile(file)
224
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000225 operations = out.out
226 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000227
Guido van Rossumd8faa362007-04-27 19:54:29 +0000228class NamespaceSeparatorTest(unittest.TestCase):
229 def test_legal(self):
230 # Tests that make sure we get errors when the namespace_separator value
231 # is illegal, and that we don't for good values:
232 expat.ParserCreate()
233 expat.ParserCreate(namespace_separator=None)
234 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000235
Guido van Rossumd8faa362007-04-27 19:54:29 +0000236 def test_illegal(self):
237 try:
238 expat.ParserCreate(namespace_separator=42)
239 self.fail()
240 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000241 self.assertEqual(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000242 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000243
Guido van Rossumd8faa362007-04-27 19:54:29 +0000244 try:
245 expat.ParserCreate(namespace_separator='too long')
246 self.fail()
247 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000248 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000249 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000250
Guido van Rossumd8faa362007-04-27 19:54:29 +0000251 def test_zero_length(self):
252 # ParserCreate() needs to accept a namespace_separator of zero length
253 # to satisfy the requirements of RDF applications that are required
254 # to simply glue together the namespace URI and the localname. Though
255 # considered a wart of the RDF specifications, it needs to be supported.
256 #
257 # See XML-SIG mailing list thread starting with
258 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
259 #
260 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000261
Fred Drake2a3d7db2002-06-28 22:56:48 +0000262
Guido van Rossumd8faa362007-04-27 19:54:29 +0000263class InterningTest(unittest.TestCase):
264 def test(self):
265 # Test the interning machinery.
266 p = expat.ParserCreate()
267 L = []
268 def collector(name, *args):
269 L.append(name)
270 p.StartElementHandler = collector
271 p.EndElementHandler = collector
272 p.Parse("<e> <e/> <e></e> </e>", 1)
273 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000274 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000275 for entry in L:
276 # L should have the same string repeated over and over.
277 self.assertTrue(tag is entry)
278
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000279 def test_issue9402(self):
280 # create an ExternalEntityParserCreate with buffer text
281 class ExternalOutputter:
282 def __init__(self, parser):
283 self.parser = parser
284 self.parser_result = None
285
286 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
287 external_parser = self.parser.ExternalEntityParserCreate("")
288 self.parser_result = external_parser.Parse("", 1)
289 return 1
290
291 parser = expat.ParserCreate(namespace_separator='!')
292 parser.buffer_text = 1
293 out = ExternalOutputter(parser)
294 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
295 parser.Parse(data, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000296 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000297
Guido van Rossumd8faa362007-04-27 19:54:29 +0000298
299class BufferTextTest(unittest.TestCase):
300 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000301 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000302 self.parser = expat.ParserCreate()
303 self.parser.buffer_text = 1
304 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000305
306 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000307 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000308 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000309 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000310
311 def CharacterDataHandler(self, text):
312 self.stuff.append(text)
313
314 def StartElementHandler(self, name, attrs):
315 self.stuff.append("<%s>" % name)
316 bt = attrs.get("buffer-text")
317 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000318 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000319 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000320 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000321
322 def EndElementHandler(self, name):
323 self.stuff.append("</%s>" % name)
324
325 def CommentHandler(self, data):
326 self.stuff.append("<!--%s-->" % data)
327
Guido van Rossumd8faa362007-04-27 19:54:29 +0000328 def setHandlers(self, handlers=[]):
329 for name in handlers:
330 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000331
Guido van Rossumd8faa362007-04-27 19:54:29 +0000332 def test_default_to_disabled(self):
333 parser = expat.ParserCreate()
334 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000335
Guido van Rossumd8faa362007-04-27 19:54:29 +0000336 def test_buffering_enabled(self):
337 # Make sure buffering is turned on
338 self.assertTrue(self.parser.buffer_text)
339 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000340 self.assertEqual(self.stuff, ['123'],
341 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000342
Guido van Rossumd8faa362007-04-27 19:54:29 +0000343 def test1(self):
344 # XXX This test exposes more detail of Expat's text chunking than we
345 # XXX like, but it tests what we need to concisely.
346 self.setHandlers(["StartElementHandler"])
347 self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000348 self.assertEqual(self.stuff,
349 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
350 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000351
Guido van Rossumd8faa362007-04-27 19:54:29 +0000352 def test2(self):
353 self.parser.Parse("<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000354 self.assertEqual(self.stuff, ["1<2> \n 3"],
355 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000356
Guido van Rossumd8faa362007-04-27 19:54:29 +0000357 def test3(self):
358 self.setHandlers(["StartElementHandler"])
359 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000360 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
361 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000362
Guido van Rossumd8faa362007-04-27 19:54:29 +0000363 def test4(self):
364 self.setHandlers(["StartElementHandler", "EndElementHandler"])
365 self.parser.CharacterDataHandler = None
366 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000367 self.assertEqual(self.stuff,
368 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000369
Guido van Rossumd8faa362007-04-27 19:54:29 +0000370 def test5(self):
371 self.setHandlers(["StartElementHandler", "EndElementHandler"])
372 self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000373 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000374 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000375
Guido van Rossumd8faa362007-04-27 19:54:29 +0000376 def test6(self):
377 self.setHandlers(["CommentHandler", "EndElementHandler",
378 "StartElementHandler"])
379 self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000380 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000381 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
382 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000383
Guido van Rossumd8faa362007-04-27 19:54:29 +0000384 def test7(self):
385 self.setHandlers(["CommentHandler", "EndElementHandler",
386 "StartElementHandler"])
387 self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000388 self.assertEqual(self.stuff,
389 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
390 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
391 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000392
Fred Draked7ea55b2004-08-13 03:09:07 +0000393
394# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395class HandlerExceptionTest(unittest.TestCase):
396 def StartElementHandler(self, name, attrs):
397 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000398
Guido van Rossumd8faa362007-04-27 19:54:29 +0000399 def test(self):
400 parser = expat.ParserCreate()
401 parser.StartElementHandler = self.StartElementHandler
402 try:
403 parser.Parse("<a><b><c/></b></a>", 1)
404 self.fail()
405 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000406 self.assertEqual(e.args[0], 'a',
407 "Expected RuntimeError for element 'a', but" + \
408 " found %r" % e.args[0])
Fred Draked7ea55b2004-08-13 03:09:07 +0000409
Dave Cole3203efb2004-08-26 00:37:31 +0000410
411# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000412class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000413 def StartElementHandler(self, name, attrs):
414 self.check_pos('s')
415
416 def EndElementHandler(self, name):
417 self.check_pos('e')
418
419 def check_pos(self, event):
420 pos = (event,
421 self.parser.CurrentByteIndex,
422 self.parser.CurrentLineNumber,
423 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000424 self.assertTrue(self.upto < len(self.expected_list),
425 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000426 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000427 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000428 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000429 self.upto += 1
430
Guido van Rossumd8faa362007-04-27 19:54:29 +0000431 def test(self):
432 self.parser = expat.ParserCreate()
433 self.parser.StartElementHandler = self.StartElementHandler
434 self.parser.EndElementHandler = self.EndElementHandler
435 self.upto = 0
436 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
437 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000438
Guido van Rossumd8faa362007-04-27 19:54:29 +0000439 xml = '<a>\n <b>\n <c/>\n </b>\n</a>'
440 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000441
442
Guido van Rossumd8faa362007-04-27 19:54:29 +0000443class sf1296433Test(unittest.TestCase):
444 def test_parse_only_xml_data(self):
445 # http://python.org/sf/1296433
446 #
447 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
448 # this one doesn't crash
449 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451 class SpecificException(Exception):
452 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
Guido van Rossumd8faa362007-04-27 19:54:29 +0000454 def handler(text):
455 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000456
Guido van Rossumd8faa362007-04-27 19:54:29 +0000457 parser = expat.ParserCreate()
458 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 self.assertRaises(Exception, parser.Parse, xml)
461
Christian Heimes2380ac72008-01-09 00:17:24 +0000462class ChardataBufferTest(unittest.TestCase):
463 """
464 test setting of chardata buffer size
465 """
466
467 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000468 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000469
470 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000471 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000472
473 def test_wrong_size(self):
474 parser = expat.ParserCreate()
475 parser.buffer_text = 1
476 def f(size):
477 parser.buffer_size = size
478
479 self.assertRaises(ValueError, f, -1)
480 self.assertRaises(ValueError, f, 0)
481
482 def test_unchanged_size(self):
483 xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
484 xml2 = 'a'*512 + '</s>'
485 parser = expat.ParserCreate()
486 parser.CharacterDataHandler = self.counting_handler
487 parser.buffer_size = 512
488 parser.buffer_text = 1
489
490 # Feed 512 bytes of character data: the handler should be called
491 # once.
492 self.n = 0
493 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000494 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000495
496 # Reassign to buffer_size, but assign the same size.
497 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000498 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000499
500 # Try parsing rest of the document
501 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000502 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000503
504
505 def test_disabling_buffer(self):
506 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
507 xml2 = ('b' * 1024)
508 xml3 = "%s</a>" % ('c' * 1024)
509 parser = expat.ParserCreate()
510 parser.CharacterDataHandler = self.counting_handler
511 parser.buffer_text = 1
512 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000513 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000514
515 # Parse one chunk of XML
516 self.n = 0
517 parser.Parse(xml1, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000518 self.assertEqual(parser.buffer_size, 1024)
519 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000520
521 # Turn off buffering and parse the next chunk.
522 parser.buffer_text = 0
523 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000524 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000525 for i in range(10):
526 parser.Parse(xml2, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000527 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000528
529 parser.buffer_text = 1
530 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000531 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000532 parser.Parse(xml3, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000533 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000534
535
536
537 def make_document(self, bytes):
538 return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
539
540 def counting_handler(self, text):
541 self.n += 1
542
543 def small_buffer_test(self, buffer_len):
544 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
545 parser = expat.ParserCreate()
546 parser.CharacterDataHandler = self.counting_handler
547 parser.buffer_size = 1024
548 parser.buffer_text = 1
549
550 self.n = 0
551 parser.Parse(xml)
552 return self.n
553
554 def test_change_size_1(self):
555 xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
556 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
557 parser = expat.ParserCreate()
558 parser.CharacterDataHandler = self.counting_handler
559 parser.buffer_text = 1
560 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000561 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000562
563 self.n = 0
564 parser.Parse(xml1, 0)
565 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000566 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000567 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000568 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000569
570 def test_change_size_2(self):
571 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
572 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
573 parser = expat.ParserCreate()
574 parser.CharacterDataHandler = self.counting_handler
575 parser.buffer_text = 1
576 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000577 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000578
579 self.n=0
580 parser.Parse(xml1, 0)
581 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000582 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000583 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000584 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000585
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000586class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000587 def test1(self):
588 xml = "\0\r\n"
589 parser = expat.ParserCreate()
590 try:
591 parser.Parse(xml, True)
592 self.fail()
593 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000594 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000595
596 def test2(self):
597 xml = "<?xml version\xc2\x85='1.0'?>\r\n"
598 parser = expat.ParserCreate()
599 try:
600 parser.Parse(xml, True)
601 self.fail()
602 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000603 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000604
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000605class ErrorMessageTest(unittest.TestCase):
606 def test_codes(self):
607 # verify mapping of errors.codes and errors.messages
608 self.assertEqual(errors.XML_ERROR_SYNTAX,
609 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
610
611 def test_expaterror(self):
612 xml = '<'
613 parser = expat.ParserCreate()
614 try:
615 parser.Parse(xml, True)
616 self.fail()
617 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000618 self.assertEqual(e.code,
619 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000620
621
Antoine Pitrou452196f2011-01-05 18:44:14 +0000622class ForeignDTDTests(unittest.TestCase):
623 """
624 Tests for the UseForeignDTD method of expat parser objects.
625 """
626 def test_use_foreign_dtd(self):
627 """
628 If UseForeignDTD is passed True and a document without an external
629 entity reference is parsed, ExternalEntityRefHandler is first called
630 with None for the public and system ids.
631 """
632 handler_call_args = []
633 def resolve_entity(context, base, system_id, public_id):
634 handler_call_args.append((public_id, system_id))
635 return 1
636
637 parser = expat.ParserCreate()
638 parser.UseForeignDTD(True)
639 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
640 parser.ExternalEntityRefHandler = resolve_entity
641 parser.Parse("<?xml version='1.0'?><element/>")
642 self.assertEqual(handler_call_args, [(None, None)])
643
644 def test_ignore_use_foreign_dtd(self):
645 """
646 If UseForeignDTD is passed True and a document with an external
647 entity reference is parsed, ExternalEntityRefHandler is called with
648 the public and system ids from the document.
649 """
650 handler_call_args = []
651 def resolve_entity(context, base, system_id, public_id):
652 handler_call_args.append((public_id, system_id))
653 return 1
654
655 parser = expat.ParserCreate()
656 parser.UseForeignDTD(True)
657 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
658 parser.ExternalEntityRefHandler = resolve_entity
659 parser.Parse(
660 "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
661 self.assertEqual(handler_call_args, [("bar", "baz")])
662
663
Guido van Rossumd8faa362007-04-27 19:54:29 +0000664def test_main():
665 run_unittest(SetAttributeTest,
666 ParseTest,
667 NamespaceSeparatorTest,
668 InterningTest,
669 BufferTextTest,
670 HandlerExceptionTest,
671 PositionTest,
Christian Heimes2380ac72008-01-09 00:17:24 +0000672 sf1296433Test,
Brett Cannon2f827382009-08-13 19:58:01 +0000673 ChardataBufferTest,
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000674 MalformedInputTest,
Antoine Pitrou452196f2011-01-05 18:44:14 +0000675 ErrorMessageTest,
676 ForeignDTDTests)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000677
678if __name__ == "__main__":
679 test_main()