blob: 49ac23d3f0096c600fa671c9b7468951c024641c [file] [log] [blame]
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +00001# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
Fred Drake004d5e62000-10-23 17:22:08 +00003
Guido van Rossum4ca94712007-07-23 17:42:32 +00004from io import BytesIO
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import unittest
6
Fred Drake7fbc85c2000-09-23 04:47:56 +00007from xml.parsers import expat
Georg Brandl91d2a3f2010-10-15 15:25:23 +00008from xml.parsers.expat import errors
Fred Drake004d5e62000-10-23 17:22:08 +00009
Benjamin Petersonee8712c2008-05-20 21:35:26 +000010from test.support import sortdict, run_unittest
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000011
12
Guido van Rossumd8faa362007-04-27 19:54:29 +000013class SetAttributeTest(unittest.TestCase):
14 def setUp(self):
15 self.parser = expat.ParserCreate(namespace_separator='!')
16 self.set_get_pairs = [
17 [0, 0],
18 [1, 1],
19 [2, 1],
20 [0, 0],
21 ]
Fred Drake265a8042000-09-21 20:32:13 +000022
Guido van Rossumd8faa362007-04-27 19:54:29 +000023 def test_ordered_attributes(self):
24 for x, y in self.set_get_pairs:
25 self.parser.ordered_attributes = x
26 self.assertEquals(self.parser.ordered_attributes, y)
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000027
Guido van Rossumd8faa362007-04-27 19:54:29 +000028 def test_specified_attributes(self):
29 for x, y in self.set_get_pairs:
30 self.parser.specified_attributes = x
31 self.assertEquals(self.parser.specified_attributes, y)
Fred Drake8f42e2b2001-04-25 16:03:54 +000032
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000033
Guido van Rossum4ca94712007-07-23 17:42:32 +000034data = b'''\
Fred Drake265a8042000-09-21 20:32:13 +000035<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000036<?xml-stylesheet href="stylesheet.css"?>
37<!-- comment data -->
38<!DOCTYPE quotations SYSTEM "quotations.dtd" [
39<!ELEMENT root ANY>
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000040<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000041<!NOTATION notation SYSTEM "notation.jpeg">
42<!ENTITY acirc "&#226;">
43<!ENTITY external_entity SYSTEM "entity.file">
44<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
45%unparsed_entity;
46]>
47
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000048<root attr1="value1" attr2="value2&#8000;">
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000049<myns:subelement xmlns:myns="http://www.python.org/namespace">
50 Contents of subelements
51</myns:subelement>
52<sub2><![CDATA[contents of CDATA section]]></sub2>
53&external_entity;
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +000054&skipped_entity;
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000055</root>
Fred Drake265a8042000-09-21 20:32:13 +000056'''
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000057
Guido van Rossumd8faa362007-04-27 19:54:29 +000058
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000059# Produce UTF-8 output
Guido van Rossumd8faa362007-04-27 19:54:29 +000060class ParseTest(unittest.TestCase):
61 class Outputter:
62 def __init__(self):
63 self.out = []
Andrew M. Kuchlingb17664d2000-03-31 15:44:52 +000064
Guido van Rossumd8faa362007-04-27 19:54:29 +000065 def StartElementHandler(self, name, attrs):
66 self.out.append('Start element: ' + repr(name) + ' ' +
67 sortdict(attrs))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000068
Guido van Rossumd8faa362007-04-27 19:54:29 +000069 def EndElementHandler(self, name):
70 self.out.append('End element: ' + repr(name))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000071
Guido van Rossumd8faa362007-04-27 19:54:29 +000072 def CharacterDataHandler(self, data):
73 data = data.strip()
74 if data:
75 self.out.append('Character data: ' + repr(data))
Andrew M. Kuchling7fd7e362000-06-27 00:37:25 +000076
Guido van Rossumd8faa362007-04-27 19:54:29 +000077 def ProcessingInstructionHandler(self, target, data):
78 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
79
80 def StartNamespaceDeclHandler(self, prefix, uri):
81 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
82
83 def EndNamespaceDeclHandler(self, prefix):
84 self.out.append('End of NS decl: ' + repr(prefix))
85
86 def StartCdataSectionHandler(self):
87 self.out.append('Start of CDATA section')
88
89 def EndCdataSectionHandler(self):
90 self.out.append('End of CDATA section')
91
92 def CommentHandler(self, text):
93 self.out.append('Comment: ' + repr(text))
94
95 def NotationDeclHandler(self, *args):
96 name, base, sysid, pubid = args
97 self.out.append('Notation declared: %s' %(args,))
98
99 def UnparsedEntityDeclHandler(self, *args):
100 entityName, base, systemId, publicId, notationName = args
101 self.out.append('Unparsed entity decl: %s' %(args,))
102
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000103 def NotStandaloneHandler(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104 self.out.append('Not standalone')
105 return 1
106
107 def ExternalEntityRefHandler(self, *args):
108 context, base, sysId, pubId = args
109 self.out.append('External entity ref: %s' %(args[1:],))
110 return 1
111
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000112 def StartDoctypeDeclHandler(self, *args):
113 self.out.append(('Start doctype', args))
114 return 1
115
116 def EndDoctypeDeclHandler(self):
117 self.out.append("End doctype")
118 return 1
119
120 def EntityDeclHandler(self, *args):
121 self.out.append(('Entity declaration', args))
122 return 1
123
124 def XmlDeclHandler(self, *args):
125 self.out.append(('XML declaration', args))
126 return 1
127
128 def ElementDeclHandler(self, *args):
129 self.out.append(('Element declaration', args))
130 return 1
131
132 def AttlistDeclHandler(self, *args):
133 self.out.append(('Attribute list declaration', args))
134 return 1
135
136 def SkippedEntityHandler(self, *args):
137 self.out.append(("Skipped entity", args))
138 return 1
139
Guido van Rossumd8faa362007-04-27 19:54:29 +0000140 def DefaultHandler(self, userData):
141 pass
142
143 def DefaultHandlerExpand(self, userData):
144 pass
145
146 handler_names = [
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000147 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
148 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
149 'NotationDeclHandler', 'StartNamespaceDeclHandler',
150 'EndNamespaceDeclHandler', 'CommentHandler',
151 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
152 'DefaultHandlerExpand', 'NotStandaloneHandler',
153 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
154 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
155 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
Guido van Rossumd8faa362007-04-27 19:54:29 +0000156 ]
157
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000158 def _verify_parse_output(self, operations):
159 expected_operations = [
160 ('XML declaration', ('1.0', 'iso-8859-1', 0)),
161 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
162 "Comment: ' comment data '",
163 "Not standalone",
164 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
165 ('Element declaration', ('root', (2, 0, None, ()))),
166 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
167 1)),
168 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
169 0)),
170 "Notation declared: ('notation', None, 'notation.jpeg', None)",
171 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
172 ('Entity declaration', ('external_entity', 0, None, None,
173 'entity.file', None, None)),
174 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
175 "Not standalone",
176 "End doctype",
177 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
178 "NS decl: 'myns' 'http://www.python.org/namespace'",
179 "Start element: 'http://www.python.org/namespace!subelement' {}",
180 "Character data: 'Contents of subelements'",
181 "End element: 'http://www.python.org/namespace!subelement'",
182 "End of NS decl: 'myns'",
183 "Start element: 'sub2' {}",
184 'Start of CDATA section',
185 "Character data: 'contents of CDATA section'",
186 'End of CDATA section',
187 "End element: 'sub2'",
188 "External entity ref: (None, 'entity.file', None)",
189 ('Skipped entity', ('skipped_entity', 0)),
190 "End element: 'root'",
191 ]
192 for operation, expected_operation in zip(operations, expected_operations):
193 self.assertEquals(operation, expected_operation)
Guido van Rossum4ca94712007-07-23 17:42:32 +0000194
Guido van Rossumd8faa362007-04-27 19:54:29 +0000195 def test_unicode(self):
196 # Try the parse again, this time producing Unicode output
197 out = self.Outputter()
198 parser = expat.ParserCreate(namespace_separator='!')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000199 for name in self.handler_names:
200 setattr(parser, name, getattr(out, name))
201
202 parser.Parse(data, 1)
203
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000204 operations = out.out
205 self._verify_parse_output(operations)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000206
207 def test_parse_file(self):
208 # Try parsing a file
209 out = self.Outputter()
210 parser = expat.ParserCreate(namespace_separator='!')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000211 for name in self.handler_names:
212 setattr(parser, name, getattr(out, name))
Guido van Rossum4ca94712007-07-23 17:42:32 +0000213 file = BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000214
215 parser.ParseFile(file)
216
Amaury Forgeot d'Arcb4415542010-10-05 23:14:47 +0000217 operations = out.out
218 self._verify_parse_output(operations)
Fred Drake1e0611b2000-12-23 22:12:07 +0000219
Guido van Rossumd8faa362007-04-27 19:54:29 +0000220class NamespaceSeparatorTest(unittest.TestCase):
221 def test_legal(self):
222 # Tests that make sure we get errors when the namespace_separator value
223 # is illegal, and that we don't for good values:
224 expat.ParserCreate()
225 expat.ParserCreate(namespace_separator=None)
226 expat.ParserCreate(namespace_separator=' ')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000227
Guido van Rossumd8faa362007-04-27 19:54:29 +0000228 def test_illegal(self):
229 try:
230 expat.ParserCreate(namespace_separator=42)
231 self.fail()
232 except TypeError as e:
233 self.assertEquals(str(e),
Victor Stinner3c9e6e92010-06-24 22:31:12 +0000234 'ParserCreate() argument 2 must be str or None, not int')
Fred Drake8f42e2b2001-04-25 16:03:54 +0000235
Guido van Rossumd8faa362007-04-27 19:54:29 +0000236 try:
237 expat.ParserCreate(namespace_separator='too long')
238 self.fail()
239 except ValueError as e:
240 self.assertEquals(str(e),
241 'namespace_separator must be at most one character, omitted, or None')
Fred Drake1add0232002-06-27 19:41:51 +0000242
Guido van Rossumd8faa362007-04-27 19:54:29 +0000243 def test_zero_length(self):
244 # ParserCreate() needs to accept a namespace_separator of zero length
245 # to satisfy the requirements of RDF applications that are required
246 # to simply glue together the namespace URI and the localname. Though
247 # considered a wart of the RDF specifications, it needs to be supported.
248 #
249 # See XML-SIG mailing list thread starting with
250 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
251 #
252 expat.ParserCreate(namespace_separator='') # too short
Fred Drake2a3d7db2002-06-28 22:56:48 +0000253
Fred Drake2a3d7db2002-06-28 22:56:48 +0000254
Guido van Rossumd8faa362007-04-27 19:54:29 +0000255class InterningTest(unittest.TestCase):
256 def test(self):
257 # Test the interning machinery.
258 p = expat.ParserCreate()
259 L = []
260 def collector(name, *args):
261 L.append(name)
262 p.StartElementHandler = collector
263 p.EndElementHandler = collector
264 p.Parse("<e> <e/> <e></e> </e>", 1)
265 tag = L[0]
266 self.assertEquals(len(L), 6)
267 for entry in L:
268 # L should have the same string repeated over and over.
269 self.assertTrue(tag is entry)
270
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000271 def test_issue9402(self):
272 # create an ExternalEntityParserCreate with buffer text
273 class ExternalOutputter:
274 def __init__(self, parser):
275 self.parser = parser
276 self.parser_result = None
277
278 def ExternalEntityRefHandler(self, context, base, sysId, pubId):
279 external_parser = self.parser.ExternalEntityParserCreate("")
280 self.parser_result = external_parser.Parse("", 1)
281 return 1
282
283 parser = expat.ParserCreate(namespace_separator='!')
284 parser.buffer_text = 1
285 out = ExternalOutputter(parser)
286 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
287 parser.Parse(data, 1)
288 self.assertEquals(out.parser_result, 1)
289
Guido van Rossumd8faa362007-04-27 19:54:29 +0000290
291class BufferTextTest(unittest.TestCase):
292 def setUp(self):
Fred Drake2a3d7db2002-06-28 22:56:48 +0000293 self.stuff = []
Guido van Rossumd8faa362007-04-27 19:54:29 +0000294 self.parser = expat.ParserCreate()
295 self.parser.buffer_text = 1
296 self.parser.CharacterDataHandler = self.CharacterDataHandler
Fred Drake2a3d7db2002-06-28 22:56:48 +0000297
298 def check(self, expected, label):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000299 self.assertEquals(self.stuff, expected,
Walter Dörwald70a6b492004-02-12 17:35:32 +0000300 "%s\nstuff = %r\nexpected = %r"
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000301 % (label, self.stuff, map(str, expected)))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000302
303 def CharacterDataHandler(self, text):
304 self.stuff.append(text)
305
306 def StartElementHandler(self, name, attrs):
307 self.stuff.append("<%s>" % name)
308 bt = attrs.get("buffer-text")
309 if bt == "yes":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000310 self.parser.buffer_text = 1
Fred Drake2a3d7db2002-06-28 22:56:48 +0000311 elif bt == "no":
Guido van Rossumd8faa362007-04-27 19:54:29 +0000312 self.parser.buffer_text = 0
Fred Drake2a3d7db2002-06-28 22:56:48 +0000313
314 def EndElementHandler(self, name):
315 self.stuff.append("</%s>" % name)
316
317 def CommentHandler(self, data):
318 self.stuff.append("<!--%s-->" % data)
319
Guido van Rossumd8faa362007-04-27 19:54:29 +0000320 def setHandlers(self, handlers=[]):
321 for name in handlers:
322 setattr(self.parser, name, getattr(self, name))
Fred Drake2a3d7db2002-06-28 22:56:48 +0000323
Guido van Rossumd8faa362007-04-27 19:54:29 +0000324 def test_default_to_disabled(self):
325 parser = expat.ParserCreate()
326 self.assertFalse(parser.buffer_text)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000327
Guido van Rossumd8faa362007-04-27 19:54:29 +0000328 def test_buffering_enabled(self):
329 # Make sure buffering is turned on
330 self.assertTrue(self.parser.buffer_text)
331 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
332 self.assertEquals(self.stuff, ['123'],
333 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000334
Guido van Rossumd8faa362007-04-27 19:54:29 +0000335 def test1(self):
336 # XXX This test exposes more detail of Expat's text chunking than we
337 # XXX like, but it tests what we need to concisely.
338 self.setHandlers(["StartElementHandler"])
339 self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
340 self.assertEquals(self.stuff,
341 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
342 "buffering control not reacting as expected")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000343
Guido van Rossumd8faa362007-04-27 19:54:29 +0000344 def test2(self):
345 self.parser.Parse("<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
346 self.assertEquals(self.stuff, ["1<2> \n 3"],
347 "buffered text not properly collapsed")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000348
Guido van Rossumd8faa362007-04-27 19:54:29 +0000349 def test3(self):
350 self.setHandlers(["StartElementHandler"])
351 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
352 self.assertEquals(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
353 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000354
Guido van Rossumd8faa362007-04-27 19:54:29 +0000355 def test4(self):
356 self.setHandlers(["StartElementHandler", "EndElementHandler"])
357 self.parser.CharacterDataHandler = None
358 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
359 self.assertEquals(self.stuff,
360 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000361
Guido van Rossumd8faa362007-04-27 19:54:29 +0000362 def test5(self):
363 self.setHandlers(["StartElementHandler", "EndElementHandler"])
364 self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1)
365 self.assertEquals(self.stuff,
366 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
Fred Drake2a3d7db2002-06-28 22:56:48 +0000367
Guido van Rossumd8faa362007-04-27 19:54:29 +0000368 def test6(self):
369 self.setHandlers(["CommentHandler", "EndElementHandler",
370 "StartElementHandler"])
371 self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1)
372 self.assertEquals(self.stuff,
373 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
374 "buffered text not properly split")
Fred Drake2a3d7db2002-06-28 22:56:48 +0000375
Guido van Rossumd8faa362007-04-27 19:54:29 +0000376 def test7(self):
377 self.setHandlers(["CommentHandler", "EndElementHandler",
378 "StartElementHandler"])
379 self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
380 self.assertEquals(self.stuff,
381 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
382 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
383 "buffered text not properly split")
384
Fred Draked7ea55b2004-08-13 03:09:07 +0000385
386# Test handling of exception from callback:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000387class HandlerExceptionTest(unittest.TestCase):
388 def StartElementHandler(self, name, attrs):
389 raise RuntimeError(name)
Fred Draked7ea55b2004-08-13 03:09:07 +0000390
Guido van Rossumd8faa362007-04-27 19:54:29 +0000391 def test(self):
392 parser = expat.ParserCreate()
393 parser.StartElementHandler = self.StartElementHandler
394 try:
395 parser.Parse("<a><b><c/></b></a>", 1)
396 self.fail()
397 except RuntimeError as e:
398 self.assertEquals(e.args[0], 'a',
399 "Expected RuntimeError for element 'a', but" + \
400 " found %r" % e.args[0])
Fred Draked7ea55b2004-08-13 03:09:07 +0000401
Dave Cole3203efb2004-08-26 00:37:31 +0000402
403# Test Current* members:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000404class PositionTest(unittest.TestCase):
Dave Cole3203efb2004-08-26 00:37:31 +0000405 def StartElementHandler(self, name, attrs):
406 self.check_pos('s')
407
408 def EndElementHandler(self, name):
409 self.check_pos('e')
410
411 def check_pos(self, event):
412 pos = (event,
413 self.parser.CurrentByteIndex,
414 self.parser.CurrentLineNumber,
415 self.parser.CurrentColumnNumber)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000416 self.assertTrue(self.upto < len(self.expected_list),
417 'too many parser events')
Dave Cole3203efb2004-08-26 00:37:31 +0000418 expected = self.expected_list[self.upto]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000419 self.assertEquals(pos, expected,
420 'Expected position %s, got position %s' %(pos, expected))
Dave Cole3203efb2004-08-26 00:37:31 +0000421 self.upto += 1
422
Guido van Rossumd8faa362007-04-27 19:54:29 +0000423 def test(self):
424 self.parser = expat.ParserCreate()
425 self.parser.StartElementHandler = self.StartElementHandler
426 self.parser.EndElementHandler = self.EndElementHandler
427 self.upto = 0
428 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
429 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
Dave Cole3203efb2004-08-26 00:37:31 +0000430
Guido van Rossumd8faa362007-04-27 19:54:29 +0000431 xml = '<a>\n <b>\n <c/>\n </b>\n</a>'
432 self.parser.Parse(xml, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000433
434
Guido van Rossumd8faa362007-04-27 19:54:29 +0000435class sf1296433Test(unittest.TestCase):
436 def test_parse_only_xml_data(self):
437 # http://python.org/sf/1296433
438 #
439 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
440 # this one doesn't crash
441 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442
Guido van Rossumd8faa362007-04-27 19:54:29 +0000443 class SpecificException(Exception):
444 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445
Guido van Rossumd8faa362007-04-27 19:54:29 +0000446 def handler(text):
447 raise SpecificException
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448
Guido van Rossumd8faa362007-04-27 19:54:29 +0000449 parser = expat.ParserCreate()
450 parser.CharacterDataHandler = handler
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000451
Guido van Rossumd8faa362007-04-27 19:54:29 +0000452 self.assertRaises(Exception, parser.Parse, xml)
453
Christian Heimes2380ac72008-01-09 00:17:24 +0000454class ChardataBufferTest(unittest.TestCase):
455 """
456 test setting of chardata buffer size
457 """
458
459 def test_1025_bytes(self):
460 self.assertEquals(self.small_buffer_test(1025), 2)
461
462 def test_1000_bytes(self):
463 self.assertEquals(self.small_buffer_test(1000), 1)
464
465 def test_wrong_size(self):
466 parser = expat.ParserCreate()
467 parser.buffer_text = 1
468 def f(size):
469 parser.buffer_size = size
470
471 self.assertRaises(ValueError, f, -1)
472 self.assertRaises(ValueError, f, 0)
473
474 def test_unchanged_size(self):
475 xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
476 xml2 = 'a'*512 + '</s>'
477 parser = expat.ParserCreate()
478 parser.CharacterDataHandler = self.counting_handler
479 parser.buffer_size = 512
480 parser.buffer_text = 1
481
482 # Feed 512 bytes of character data: the handler should be called
483 # once.
484 self.n = 0
485 parser.Parse(xml1)
486 self.assertEquals(self.n, 1)
487
488 # Reassign to buffer_size, but assign the same size.
489 parser.buffer_size = parser.buffer_size
490 self.assertEquals(self.n, 1)
491
492 # Try parsing rest of the document
493 parser.Parse(xml2)
494 self.assertEquals(self.n, 2)
495
496
497 def test_disabling_buffer(self):
498 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
499 xml2 = ('b' * 1024)
500 xml3 = "%s</a>" % ('c' * 1024)
501 parser = expat.ParserCreate()
502 parser.CharacterDataHandler = self.counting_handler
503 parser.buffer_text = 1
504 parser.buffer_size = 1024
505 self.assertEquals(parser.buffer_size, 1024)
506
507 # Parse one chunk of XML
508 self.n = 0
509 parser.Parse(xml1, 0)
510 self.assertEquals(parser.buffer_size, 1024)
511 self.assertEquals(self.n, 1)
512
513 # Turn off buffering and parse the next chunk.
514 parser.buffer_text = 0
515 self.assertFalse(parser.buffer_text)
516 self.assertEquals(parser.buffer_size, 1024)
517 for i in range(10):
518 parser.Parse(xml2, 0)
519 self.assertEquals(self.n, 11)
520
521 parser.buffer_text = 1
522 self.assertTrue(parser.buffer_text)
523 self.assertEquals(parser.buffer_size, 1024)
524 parser.Parse(xml3, 1)
525 self.assertEquals(self.n, 12)
526
527
528
529 def make_document(self, bytes):
530 return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
531
532 def counting_handler(self, text):
533 self.n += 1
534
535 def small_buffer_test(self, buffer_len):
536 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
537 parser = expat.ParserCreate()
538 parser.CharacterDataHandler = self.counting_handler
539 parser.buffer_size = 1024
540 parser.buffer_text = 1
541
542 self.n = 0
543 parser.Parse(xml)
544 return self.n
545
546 def test_change_size_1(self):
547 xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
548 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
549 parser = expat.ParserCreate()
550 parser.CharacterDataHandler = self.counting_handler
551 parser.buffer_text = 1
552 parser.buffer_size = 1024
553 self.assertEquals(parser.buffer_size, 1024)
554
555 self.n = 0
556 parser.Parse(xml1, 0)
557 parser.buffer_size *= 2
558 self.assertEquals(parser.buffer_size, 2048)
559 parser.Parse(xml2, 1)
560 self.assertEquals(self.n, 2)
561
562 def test_change_size_2(self):
563 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
564 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
565 parser = expat.ParserCreate()
566 parser.CharacterDataHandler = self.counting_handler
567 parser.buffer_text = 1
568 parser.buffer_size = 2048
569 self.assertEquals(parser.buffer_size, 2048)
570
571 self.n=0
572 parser.Parse(xml1, 0)
573 parser.buffer_size = parser.buffer_size // 2
574 self.assertEquals(parser.buffer_size, 1024)
575 parser.Parse(xml2, 1)
576 self.assertEquals(self.n, 4)
577
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000578class MalformedInputTest(unittest.TestCase):
Brett Cannon2f827382009-08-13 19:58:01 +0000579 def test1(self):
580 xml = "\0\r\n"
581 parser = expat.ParserCreate()
582 try:
583 parser.Parse(xml, True)
584 self.fail()
585 except expat.ExpatError as e:
Ezio Melottie4c7ce32010-01-22 17:36:17 +0000586 self.assertEquals(str(e), 'unclosed token: line 2, column 0')
Brett Cannon2f827382009-08-13 19:58:01 +0000587
588 def test2(self):
589 xml = "<?xml version\xc2\x85='1.0'?>\r\n"
590 parser = expat.ParserCreate()
591 try:
592 parser.Parse(xml, True)
593 self.fail()
594 except expat.ExpatError as e:
595 self.assertEquals(str(e), 'XML declaration not well-formed: line 1, column 14')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000596
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000597class ErrorMessageTest(unittest.TestCase):
598 def test_codes(self):
599 # verify mapping of errors.codes and errors.messages
600 self.assertEqual(errors.XML_ERROR_SYNTAX,
601 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
602
603 def test_expaterror(self):
604 xml = '<'
605 parser = expat.ParserCreate()
606 try:
607 parser.Parse(xml, True)
608 self.fail()
609 except expat.ExpatError as e:
610 self.assertEquals(e.code,
611 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
612
613
Guido van Rossumd8faa362007-04-27 19:54:29 +0000614def test_main():
615 run_unittest(SetAttributeTest,
616 ParseTest,
617 NamespaceSeparatorTest,
618 InterningTest,
619 BufferTextTest,
620 HandlerExceptionTest,
621 PositionTest,
Christian Heimes2380ac72008-01-09 00:17:24 +0000622 sf1296433Test,
Brett Cannon2f827382009-08-13 19:58:01 +0000623 ChardataBufferTest,
Georg Brandl91d2a3f2010-10-15 15:25:23 +0000624 MalformedInputTest,
625 ErrorMessageTest)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000626
627if __name__ == "__main__":
628 test_main()