blob: 5bb8c9718f05b87638db56990be7906a254de283 [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import unittest
6
Fred Drake7fbc85c2000-09-23 04:47:56 +00007from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +00008from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +00009
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test.support import sortdict, run_unittest
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000011
12
Guido van Rossumd8faa362007-04-27 19:54:29 +000013class SetAttributeTest(unittest.TestCase):
14 def setUp(self):
15 self.parser = expat.ParserCreate(namespace_separator='!')
16 self.set_get_pairs = [
17 [0, 0],
18 [1, 1],
19 [2, 1],
20 [0, 0],
21 ]
Fred Drake265a8042000-09-21 20:32:13 +000022
Guido van Rossumd8faa362007-04-27 19:54:29 +000023 def test_ordered_attributes(self):
24 for x, y in self.set_get_pairs:
25 self.parser.ordered_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000026 self.assertEqual(self.parser.ordered_attributes, y)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000027
Guido van Rossumd8faa362007-04-27 19:54:29 +000028 def test_specified_attributes(self):
29 for x, y in self.set_get_pairs:
30 self.parser.specified_attributes = x
Ezio Melottib3aedd42010-11-20 19:04:17 +000031 self.assertEqual(self.parser.specified_attributes, y)
Fred Drake8f42e2b2001-04-25 16:03:54 +000032
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000033
Guido van Rossum4ca94712007-07-23 17:42:32 +000034data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000035<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000036<?xml-stylesheet href="stylesheet.css"?>
37<!-- comment data -->
38<!DOCTYPE quotations SYSTEM "quotations.dtd" [
39<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000040<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000041<!NOTATION notation SYSTEM "notation.jpeg">
42<!ENTITY acirc "&#226;">
43<!ENTITY external_entity SYSTEM "entity.file">
44<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
45%unparsed_entity;
46]>
47
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000048<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000049<myns:subelement xmlns:myns="http://www.python.org/namespace">
50 Contents of subelements
51</myns:subelement>
52<sub2><![CDATA[contents of CDATA section]]></sub2>
53&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000054&skipped_entity;
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000055</root>
Fred Drake265a8042000-09-21 20:32:13 +000056'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000057
Guido van Rossumd8faa362007-04-27 19:54:29 +000058
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000059# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000060class ParseTest(unittest.TestCase):
61 class Outputter:
62 def __init__(self):
63 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000064
Guido van Rossumd8faa362007-04-27 19:54:29 +000065 def StartElementHandler(self, name, attrs):
66 self.out.append('Start element: ' + repr(name) + ' ' +
67 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000068
Guido van Rossumd8faa362007-04-27 19:54:29 +000069 def EndElementHandler(self, name):
70 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000071
Guido van Rossumd8faa362007-04-27 19:54:29 +000072 def CharacterDataHandler(self, data):
73 data = data.strip()
74 if data:
75 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000076
Guido van Rossumd8faa362007-04-27 19:54:29 +000077 def ProcessingInstructionHandler(self, target, data):
78 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
79
80 def StartNamespaceDeclHandler(self, prefix, uri):
81 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
82
83 def EndNamespaceDeclHandler(self, prefix):
84 self.out.append('End of NS decl: ' + repr(prefix))
85
86 def StartCdataSectionHandler(self):
87 self.out.append('Start of CDATA section')
88
89 def EndCdataSectionHandler(self):
90 self.out.append('End of CDATA section')
91
92 def CommentHandler(self, text):
93 self.out.append('Comment: ' + repr(text))
94
95 def NotationDeclHandler(self, *args):
96 name, base, sysid, pubid = args
97 self.out.append('Notation declared: %s' %(args,))
98
99 def UnparsedEntityDeclHandler(self, *args):
100 entityName, base, systemId, publicId, notationName = args
101 self.out.append('Unparsed entity decl: %s' %(args,))
102
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000103 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104 self.out.append('Not standalone')
105 return 1
106
107 def ExternalEntityRefHandler(self, *args):
108 context, base, sysId, pubId = args
109 self.out.append('External entity ref: %s' %(args[1:],))
110 return 1
111
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000112 def StartDoctypeDeclHandler(self, *args):
113 self.out.append(('Start doctype', args))
114 return 1
115
116 def EndDoctypeDeclHandler(self):
117 self.out.append("End doctype")
118 return 1
119
120 def EntityDeclHandler(self, *args):
121 self.out.append(('Entity declaration', args))
122 return 1
123
124 def XmlDeclHandler(self, *args):
125 self.out.append(('XML declaration', args))
126 return 1
127
128 def ElementDeclHandler(self, *args):
129 self.out.append(('Element declaration', args))
130 return 1
131
132 def AttlistDeclHandler(self, *args):
133 self.out.append(('Attribute list declaration', args))
134 return 1
135
136 def SkippedEntityHandler(self, *args):
137 self.out.append(("Skipped entity", args))
138 return 1
139
Guido van Rossumd8faa362007-04-27 19:54:29 +0000140 def DefaultHandler(self, userData):
141 pass
142
143 def DefaultHandlerExpand(self, userData):
144 pass
145
146 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000147 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
148 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
149 'NotationDeclHandler', 'StartNamespaceDeclHandler',
150 'EndNamespaceDeclHandler', 'CommentHandler',
151 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
152 'DefaultHandlerExpand', 'NotStandaloneHandler',
153 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
154 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
155 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000156 ]
157
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000158 def _verify_parse_output(self, operations):
159 expected_operations = [
160 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
161 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
162 "Comment: ' comment data '",
163 "Not standalone",
164 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
165 ('Element declaration', ('root', (2, 0, None, ()))),
166 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
167 1)),
168 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
169 0)),
170 "Notation declared: ('notation', None, 'notation.jpeg', None)",
171 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
172 ('Entity declaration', ('external_entity', 0, None, None,
173 'entity.file', None, None)),
174 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
175 "Not standalone",
176 "End doctype",
177 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
178 "NS decl: 'myns' 'http://www.python.org/namespace'",
179 "Start element: 'http://www.python.org/namespace!subelement' {}",
180 "Character data: 'Contents of subelements'",
181 "End element: 'http://www.python.org/namespace!subelement'",
182 "End of NS decl: 'myns'",
183 "Start element: 'sub2' {}",
184 'Start of CDATA section',
185 "Character data: 'contents of CDATA section'",
186 'End of CDATA section',
187 "End element: 'sub2'",
188 "External entity ref: (None, 'entity.file', None)",
189 ('Skipped entity', ('skipped_entity', 0)),
190 "End element: 'root'",
191 ]
192 for operation, expected_operation in zip(operations, expected_operations):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000193 self.assertEqual(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000194
Guido van Rossumd8faa362007-04-27 19:54:29 +0000195 def test_unicode(self):
196 # Try the parse again, this time producing Unicode output
197 out = self.Outputter()
198 parser = expat.ParserCreate(namespace_separator='!')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000199 for name in self.handler_names:
200 setattr(parser, name, getattr(out, name))
201
202 parser.Parse(data, 1)
203
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000204 operations = out.out
205 self._verify_parse_output(operations)
Alexander Belopolskye239d232010-12-08 23:31:48 +0000206 # Issue #6697.
207 self.assertRaises(AttributeError, getattr, parser, '\uD800')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000208
209 def test_parse_file(self):
210 # Try parsing a file
211 out = self.Outputter()
212 parser = expat.ParserCreate(namespace_separator='!')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000213 for name in self.handler_names:
214 setattr(parser, name, getattr(out, name))
Guido van Rossum4ca94712007-07-23 17:42:32 +0000215 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000216
217 parser.ParseFile(file)
218
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000219 operations = out.out
220 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000221
Guido van Rossumd8faa362007-04-27 19:54:29 +0000222class NamespaceSeparatorTest(unittest.TestCase):
223 def test_legal(self):
224 # Tests that make sure we get errors when the namespace_separator value
225 # is illegal, and that we don't for good values:
226 expat.ParserCreate()
227 expat.ParserCreate(namespace_separator=None)
228 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000229
Guido van Rossumd8faa362007-04-27 19:54:29 +0000230 def test_illegal(self):
231 try:
232 expat.ParserCreate(namespace_separator=42)
233 self.fail()
234 except TypeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000235 self.assertEqual(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000236 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000237
Guido van Rossumd8faa362007-04-27 19:54:29 +0000238 try:
239 expat.ParserCreate(namespace_separator='too long')
240 self.fail()
241 except ValueError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000242 self.assertEqual(str(e),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000243 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000244
Guido van Rossumd8faa362007-04-27 19:54:29 +0000245 def test_zero_length(self):
246 # ParserCreate() needs to accept a namespace_separator of zero length
247 # to satisfy the requirements of RDF applications that are required
248 # to simply glue together the namespace URI and the localname. Though
249 # considered a wart of the RDF specifications, it needs to be supported.
250 #
251 # See XML-SIG mailing list thread starting with
252 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
253 #
254 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000255
Fred Drake2a3d7db2002-06-28 22:56:48 +0000256
Guido van Rossumd8faa362007-04-27 19:54:29 +0000257class InterningTest(unittest.TestCase):
258 def test(self):
259 # Test the interning machinery.
260 p = expat.ParserCreate()
261 L = []
262 def collector(name, *args):
263 L.append(name)
264 p.StartElementHandler = collector
265 p.EndElementHandler = collector
266 p.Parse("<e> <e/> <e></e> </e>", 1)
267 tag = L[0]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000268 self.assertEqual(len(L), 6)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000269 for entry in L:
270 # L should have the same string repeated over and over.
271 self.assertTrue(tag is entry)
272
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000273 def test_issue9402(self):
274 # create an ExternalEntityParserCreate with buffer text
275 class ExternalOutputter:
276 def __init__(self, parser):
277 self.parser = parser
278 self.parser_result = None
279
280 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
281 external_parser = self.parser.ExternalEntityParserCreate("")
282 self.parser_result = external_parser.Parse("", 1)
283 return 1
284
285 parser = expat.ParserCreate(namespace_separator='!')
286 parser.buffer_text = 1
287 out = ExternalOutputter(parser)
288 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
289 parser.Parse(data, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000290 self.assertEqual(out.parser_result, 1)
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000291
Guido van Rossumd8faa362007-04-27 19:54:29 +0000292
293class BufferTextTest(unittest.TestCase):
294 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000295 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000296 self.parser = expat.ParserCreate()
297 self.parser.buffer_text = 1
298 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000299
300 def check(self, expected, label):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000301 self.assertEqual(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000302 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000303 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000304
305 def CharacterDataHandler(self, text):
306 self.stuff.append(text)
307
308 def StartElementHandler(self, name, attrs):
309 self.stuff.append("<%s>" % name)
310 bt = attrs.get("buffer-text")
311 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000312 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000313 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000314 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000315
316 def EndElementHandler(self, name):
317 self.stuff.append("</%s>" % name)
318
319 def CommentHandler(self, data):
320 self.stuff.append("<!--%s-->" % data)
321
Guido van Rossumd8faa362007-04-27 19:54:29 +0000322 def setHandlers(self, handlers=[]):
323 for name in handlers:
324 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000325
Guido van Rossumd8faa362007-04-27 19:54:29 +0000326 def test_default_to_disabled(self):
327 parser = expat.ParserCreate()
328 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000329
Guido van Rossumd8faa362007-04-27 19:54:29 +0000330 def test_buffering_enabled(self):
331 # Make sure buffering is turned on
332 self.assertTrue(self.parser.buffer_text)
333 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000334 self.assertEqual(self.stuff, ['123'],
335 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000336
Guido van Rossumd8faa362007-04-27 19:54:29 +0000337 def test1(self):
338 # XXX This test exposes more detail of Expat's text chunking than we
339 # XXX like, but it tests what we need to concisely.
340 self.setHandlers(["StartElementHandler"])
341 self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000342 self.assertEqual(self.stuff,
343 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
344 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000345
Guido van Rossumd8faa362007-04-27 19:54:29 +0000346 def test2(self):
347 self.parser.Parse("<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000348 self.assertEqual(self.stuff, ["1<2> \n 3"],
349 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000350
Guido van Rossumd8faa362007-04-27 19:54:29 +0000351 def test3(self):
352 self.setHandlers(["StartElementHandler"])
353 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000354 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
355 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000356
Guido van Rossumd8faa362007-04-27 19:54:29 +0000357 def test4(self):
358 self.setHandlers(["StartElementHandler", "EndElementHandler"])
359 self.parser.CharacterDataHandler = None
360 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000361 self.assertEqual(self.stuff,
362 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000363
Guido van Rossumd8faa362007-04-27 19:54:29 +0000364 def test5(self):
365 self.setHandlers(["StartElementHandler", "EndElementHandler"])
366 self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000367 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000368 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000369
Guido van Rossumd8faa362007-04-27 19:54:29 +0000370 def test6(self):
371 self.setHandlers(["CommentHandler", "EndElementHandler",
372 "StartElementHandler"])
373 self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000374 self.assertEqual(self.stuff,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
376 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000377
Guido van Rossumd8faa362007-04-27 19:54:29 +0000378 def test7(self):
379 self.setHandlers(["CommentHandler", "EndElementHandler",
380 "StartElementHandler"])
381 self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000382 self.assertEqual(self.stuff,
383 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
384 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
385 "buffered text not properly split")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000386
Fred Draked7ea55b2004-08-13 03:09:07 +0000387
388# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000389class HandlerExceptionTest(unittest.TestCase):
390 def StartElementHandler(self, name, attrs):
391 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000392
Guido van Rossumd8faa362007-04-27 19:54:29 +0000393 def test(self):
394 parser = expat.ParserCreate()
395 parser.StartElementHandler = self.StartElementHandler
396 try:
397 parser.Parse("<a><b><c/></b></a>", 1)
398 self.fail()
399 except RuntimeError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000400 self.assertEqual(e.args[0], 'a',
401 "Expected RuntimeError for element 'a', but" + \
402 " found %r" % e.args[0])
Fred Draked7ea55b2004-08-13 03:09:07 +0000403
Dave Cole3203efb2004-08-26 00:37:31 +0000404
405# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000406class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000407 def StartElementHandler(self, name, attrs):
408 self.check_pos('s')
409
410 def EndElementHandler(self, name):
411 self.check_pos('e')
412
413 def check_pos(self, event):
414 pos = (event,
415 self.parser.CurrentByteIndex,
416 self.parser.CurrentLineNumber,
417 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000418 self.assertTrue(self.upto < len(self.expected_list),
419 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000420 expected = self.expected_list[self.upto]
Ezio Melottib3aedd42010-11-20 19:04:17 +0000421 self.assertEqual(pos, expected,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000422 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000423 self.upto += 1
424
Guido van Rossumd8faa362007-04-27 19:54:29 +0000425 def test(self):
426 self.parser = expat.ParserCreate()
427 self.parser.StartElementHandler = self.StartElementHandler
428 self.parser.EndElementHandler = self.EndElementHandler
429 self.upto = 0
430 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
431 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000432
Guido van Rossumd8faa362007-04-27 19:54:29 +0000433 xml = '<a>\n <b>\n <c/>\n </b>\n</a>'
434 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000435
436
Guido van Rossumd8faa362007-04-27 19:54:29 +0000437class sf1296433Test(unittest.TestCase):
438 def test_parse_only_xml_data(self):
439 # http://python.org/sf/1296433
440 #
441 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
442 # this one doesn't crash
443 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444
Guido van Rossumd8faa362007-04-27 19:54:29 +0000445 class SpecificException(Exception):
446 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000447
Guido van Rossumd8faa362007-04-27 19:54:29 +0000448 def handler(text):
449 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451 parser = expat.ParserCreate()
452 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
Guido van Rossumd8faa362007-04-27 19:54:29 +0000454 self.assertRaises(Exception, parser.Parse, xml)
455
Christian Heimes2380ac72008-01-09 00:17:24 +0000456class ChardataBufferTest(unittest.TestCase):
457 """
458 test setting of chardata buffer size
459 """
460
461 def test_1025_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000462 self.assertEqual(self.small_buffer_test(1025), 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000463
464 def test_1000_bytes(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +0000465 self.assertEqual(self.small_buffer_test(1000), 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000466
467 def test_wrong_size(self):
468 parser = expat.ParserCreate()
469 parser.buffer_text = 1
470 def f(size):
471 parser.buffer_size = size
472
473 self.assertRaises(ValueError, f, -1)
474 self.assertRaises(ValueError, f, 0)
475
476 def test_unchanged_size(self):
477 xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
478 xml2 = 'a'*512 + '</s>'
479 parser = expat.ParserCreate()
480 parser.CharacterDataHandler = self.counting_handler
481 parser.buffer_size = 512
482 parser.buffer_text = 1
483
484 # Feed 512 bytes of character data: the handler should be called
485 # once.
486 self.n = 0
487 parser.Parse(xml1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000488 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000489
490 # Reassign to buffer_size, but assign the same size.
491 parser.buffer_size = parser.buffer_size
Ezio Melottib3aedd42010-11-20 19:04:17 +0000492 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000493
494 # Try parsing rest of the document
495 parser.Parse(xml2)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000496 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000497
498
499 def test_disabling_buffer(self):
500 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
501 xml2 = ('b' * 1024)
502 xml3 = "%s</a>" % ('c' * 1024)
503 parser = expat.ParserCreate()
504 parser.CharacterDataHandler = self.counting_handler
505 parser.buffer_text = 1
506 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000507 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000508
509 # Parse one chunk of XML
510 self.n = 0
511 parser.Parse(xml1, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000512 self.assertEqual(parser.buffer_size, 1024)
513 self.assertEqual(self.n, 1)
Christian Heimes2380ac72008-01-09 00:17:24 +0000514
515 # Turn off buffering and parse the next chunk.
516 parser.buffer_text = 0
517 self.assertFalse(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000518 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000519 for i in range(10):
520 parser.Parse(xml2, 0)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000521 self.assertEqual(self.n, 11)
Christian Heimes2380ac72008-01-09 00:17:24 +0000522
523 parser.buffer_text = 1
524 self.assertTrue(parser.buffer_text)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000525 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000526 parser.Parse(xml3, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000527 self.assertEqual(self.n, 12)
Christian Heimes2380ac72008-01-09 00:17:24 +0000528
529
530
531 def make_document(self, bytes):
532 return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
533
534 def counting_handler(self, text):
535 self.n += 1
536
537 def small_buffer_test(self, buffer_len):
538 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
539 parser = expat.ParserCreate()
540 parser.CharacterDataHandler = self.counting_handler
541 parser.buffer_size = 1024
542 parser.buffer_text = 1
543
544 self.n = 0
545 parser.Parse(xml)
546 return self.n
547
548 def test_change_size_1(self):
549 xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
550 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
551 parser = expat.ParserCreate()
552 parser.CharacterDataHandler = self.counting_handler
553 parser.buffer_text = 1
554 parser.buffer_size = 1024
Ezio Melottib3aedd42010-11-20 19:04:17 +0000555 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000556
557 self.n = 0
558 parser.Parse(xml1, 0)
559 parser.buffer_size *= 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000560 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000561 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000562 self.assertEqual(self.n, 2)
Christian Heimes2380ac72008-01-09 00:17:24 +0000563
564 def test_change_size_2(self):
565 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
566 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
567 parser = expat.ParserCreate()
568 parser.CharacterDataHandler = self.counting_handler
569 parser.buffer_text = 1
570 parser.buffer_size = 2048
Ezio Melottib3aedd42010-11-20 19:04:17 +0000571 self.assertEqual(parser.buffer_size, 2048)
Christian Heimes2380ac72008-01-09 00:17:24 +0000572
573 self.n=0
574 parser.Parse(xml1, 0)
575 parser.buffer_size = parser.buffer_size // 2
Ezio Melottib3aedd42010-11-20 19:04:17 +0000576 self.assertEqual(parser.buffer_size, 1024)
Christian Heimes2380ac72008-01-09 00:17:24 +0000577 parser.Parse(xml2, 1)
Ezio Melottib3aedd42010-11-20 19:04:17 +0000578 self.assertEqual(self.n, 4)
Christian Heimes2380ac72008-01-09 00:17:24 +0000579
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000580class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000581 def test1(self):
582 xml = "\0\r\n"
583 parser = expat.ParserCreate()
584 try:
585 parser.Parse(xml, True)
586 self.fail()
587 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000588 self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000589
590 def test2(self):
591 xml = "<?xml version\xc2\x85='1.0'?>\r\n"
592 parser = expat.ParserCreate()
593 try:
594 parser.Parse(xml, True)
595 self.fail()
596 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000597 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000598
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000599class ErrorMessageTest(unittest.TestCase):
600 def test_codes(self):
601 # verify mapping of errors.codes and errors.messages
602 self.assertEqual(errors.XML_ERROR_SYNTAX,
603 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
604
605 def test_expaterror(self):
606 xml = '<'
607 parser = expat.ParserCreate()
608 try:
609 parser.Parse(xml, True)
610 self.fail()
611 except expat.ExpatError as e:
Ezio Melottib3aedd42010-11-20 19:04:17 +0000612 self.assertEqual(e.code,
613 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000614
615
Guido van Rossumd8faa362007-04-27 19:54:29 +0000616def test_main():
617 run_unittest(SetAttributeTest,
618 ParseTest,
619 NamespaceSeparatorTest,
620 InterningTest,
621 BufferTextTest,
622 HandlerExceptionTest,
623 PositionTest,
Christian Heimes2380ac72008-01-09 00:17:24 +0000624 sf1296433Test,
Brett Cannon2f827382009-08-13 19:58:01 +0000625 ChardataBufferTest,
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000626 MalformedInputTest,
627 ErrorMessageTest)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000628
629if __name__ == "__main__":
630 test_main()