Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 1 | # XXX TypeErrors on calling handlers, or on bad return values from a |
| 2 | # handler, are obscure and unhelpful. |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 3 | |
Guido van Rossum | 4ca9471 | 2007-07-23 17:42:32 +0000 | [diff] [blame] | 4 | from io import BytesIO |
Antoine Pitrou | 0ddbf47 | 2014-10-08 20:00:09 +0200 | [diff] [blame] | 5 | import os |
Paul Monson | f355069 | 2019-06-19 13:09:54 -0700 | [diff] [blame] | 6 | import platform |
Serhiy Storchaka | de5f9f4 | 2015-09-07 22:51:56 +0300 | [diff] [blame] | 7 | import sys |
Antoine Pitrou | 2b3b95b | 2014-11-29 15:56:07 +0100 | [diff] [blame] | 8 | import sysconfig |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 9 | import unittest |
Antoine Pitrou | 0ddbf47 | 2014-10-08 20:00:09 +0200 | [diff] [blame] | 10 | import traceback |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 11 | |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 12 | from xml.parsers import expat |
Georg Brandl | 91d2a3f | 2010-10-15 15:25:23 +0000 | [diff] [blame] | 13 | from xml.parsers.expat import errors |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 14 | |
Zachary Ware | 38c707e | 2015-04-13 15:00:43 -0500 | [diff] [blame] | 15 | from test.support import sortdict |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 16 | |
| 17 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 18 | class SetAttributeTest(unittest.TestCase): |
| 19 | def setUp(self): |
| 20 | self.parser = expat.ParserCreate(namespace_separator='!') |
Serhiy Storchaka | 931331a | 2015-09-07 22:37:02 +0300 | [diff] [blame] | 21 | |
| 22 | def test_buffer_text(self): |
| 23 | self.assertIs(self.parser.buffer_text, False) |
| 24 | for x in 0, 1, 2, 0: |
| 25 | self.parser.buffer_text = x |
| 26 | self.assertIs(self.parser.buffer_text, bool(x)) |
| 27 | |
| 28 | def test_namespace_prefixes(self): |
| 29 | self.assertIs(self.parser.namespace_prefixes, False) |
| 30 | for x in 0, 1, 2, 0: |
| 31 | self.parser.namespace_prefixes = x |
| 32 | self.assertIs(self.parser.namespace_prefixes, bool(x)) |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 33 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 34 | def test_ordered_attributes(self): |
Serhiy Storchaka | 931331a | 2015-09-07 22:37:02 +0300 | [diff] [blame] | 35 | self.assertIs(self.parser.ordered_attributes, False) |
| 36 | for x in 0, 1, 2, 0: |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 37 | self.parser.ordered_attributes = x |
Serhiy Storchaka | 931331a | 2015-09-07 22:37:02 +0300 | [diff] [blame] | 38 | self.assertIs(self.parser.ordered_attributes, bool(x)) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 39 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 40 | def test_specified_attributes(self): |
Serhiy Storchaka | 931331a | 2015-09-07 22:37:02 +0300 | [diff] [blame] | 41 | self.assertIs(self.parser.specified_attributes, False) |
| 42 | for x in 0, 1, 2, 0: |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 43 | self.parser.specified_attributes = x |
Serhiy Storchaka | 931331a | 2015-09-07 22:37:02 +0300 | [diff] [blame] | 44 | self.assertIs(self.parser.specified_attributes, bool(x)) |
| 45 | |
Serhiy Storchaka | 931331a | 2015-09-07 22:37:02 +0300 | [diff] [blame] | 46 | def test_invalid_attributes(self): |
| 47 | with self.assertRaises(AttributeError): |
| 48 | self.parser.returns_unicode = 1 |
| 49 | with self.assertRaises(AttributeError): |
| 50 | self.parser.returns_unicode |
| 51 | |
| 52 | # Issue #25019 |
| 53 | self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0) |
| 54 | self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0) |
| 55 | self.assertRaises(TypeError, getattr, self.parser, range(0xF)) |
Fred Drake | 8f42e2b | 2001-04-25 16:03:54 +0000 | [diff] [blame] | 56 | |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 57 | |
Guido van Rossum | 4ca9471 | 2007-07-23 17:42:32 +0000 | [diff] [blame] | 58 | data = b'''\ |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 59 | <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 60 | <?xml-stylesheet href="stylesheet.css"?> |
| 61 | <!-- comment data --> |
| 62 | <!DOCTYPE quotations SYSTEM "quotations.dtd" [ |
| 63 | <!ELEMENT root ANY> |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 64 | <!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED> |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 65 | <!NOTATION notation SYSTEM "notation.jpeg"> |
| 66 | <!ENTITY acirc "â"> |
| 67 | <!ENTITY external_entity SYSTEM "entity.file"> |
| 68 | <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> |
| 69 | %unparsed_entity; |
| 70 | ]> |
| 71 | |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 72 | <root attr1="value1" attr2="value2ὀ"> |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 73 | <myns:subelement xmlns:myns="http://www.python.org/namespace"> |
| 74 | Contents of subelements |
| 75 | </myns:subelement> |
| 76 | <sub2><![CDATA[contents of CDATA section]]></sub2> |
| 77 | &external_entity; |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 78 | &skipped_entity; |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 79 | \xb5 |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 80 | </root> |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 81 | ''' |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 82 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 83 | |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 84 | # Produce UTF-8 output |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 85 | class ParseTest(unittest.TestCase): |
| 86 | class Outputter: |
| 87 | def __init__(self): |
| 88 | self.out = [] |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 89 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 90 | def StartElementHandler(self, name, attrs): |
| 91 | self.out.append('Start element: ' + repr(name) + ' ' + |
| 92 | sortdict(attrs)) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 93 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 94 | def EndElementHandler(self, name): |
| 95 | self.out.append('End element: ' + repr(name)) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 96 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 97 | def CharacterDataHandler(self, data): |
| 98 | data = data.strip() |
| 99 | if data: |
| 100 | self.out.append('Character data: ' + repr(data)) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 101 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 102 | def ProcessingInstructionHandler(self, target, data): |
| 103 | self.out.append('PI: ' + repr(target) + ' ' + repr(data)) |
| 104 | |
| 105 | def StartNamespaceDeclHandler(self, prefix, uri): |
| 106 | self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) |
| 107 | |
| 108 | def EndNamespaceDeclHandler(self, prefix): |
| 109 | self.out.append('End of NS decl: ' + repr(prefix)) |
| 110 | |
| 111 | def StartCdataSectionHandler(self): |
| 112 | self.out.append('Start of CDATA section') |
| 113 | |
| 114 | def EndCdataSectionHandler(self): |
| 115 | self.out.append('End of CDATA section') |
| 116 | |
| 117 | def CommentHandler(self, text): |
| 118 | self.out.append('Comment: ' + repr(text)) |
| 119 | |
| 120 | def NotationDeclHandler(self, *args): |
| 121 | name, base, sysid, pubid = args |
| 122 | self.out.append('Notation declared: %s' %(args,)) |
| 123 | |
| 124 | def UnparsedEntityDeclHandler(self, *args): |
| 125 | entityName, base, systemId, publicId, notationName = args |
| 126 | self.out.append('Unparsed entity decl: %s' %(args,)) |
| 127 | |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 128 | def NotStandaloneHandler(self): |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 129 | self.out.append('Not standalone') |
| 130 | return 1 |
| 131 | |
| 132 | def ExternalEntityRefHandler(self, *args): |
| 133 | context, base, sysId, pubId = args |
| 134 | self.out.append('External entity ref: %s' %(args[1:],)) |
| 135 | return 1 |
| 136 | |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 137 | def StartDoctypeDeclHandler(self, *args): |
| 138 | self.out.append(('Start doctype', args)) |
| 139 | return 1 |
| 140 | |
| 141 | def EndDoctypeDeclHandler(self): |
| 142 | self.out.append("End doctype") |
| 143 | return 1 |
| 144 | |
| 145 | def EntityDeclHandler(self, *args): |
| 146 | self.out.append(('Entity declaration', args)) |
| 147 | return 1 |
| 148 | |
| 149 | def XmlDeclHandler(self, *args): |
| 150 | self.out.append(('XML declaration', args)) |
| 151 | return 1 |
| 152 | |
| 153 | def ElementDeclHandler(self, *args): |
| 154 | self.out.append(('Element declaration', args)) |
| 155 | return 1 |
| 156 | |
| 157 | def AttlistDeclHandler(self, *args): |
| 158 | self.out.append(('Attribute list declaration', args)) |
| 159 | return 1 |
| 160 | |
| 161 | def SkippedEntityHandler(self, *args): |
| 162 | self.out.append(("Skipped entity", args)) |
| 163 | return 1 |
| 164 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 165 | def DefaultHandler(self, userData): |
| 166 | pass |
| 167 | |
| 168 | def DefaultHandlerExpand(self, userData): |
| 169 | pass |
| 170 | |
| 171 | handler_names = [ |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 172 | 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler', |
| 173 | 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler', |
| 174 | 'NotationDeclHandler', 'StartNamespaceDeclHandler', |
| 175 | 'EndNamespaceDeclHandler', 'CommentHandler', |
| 176 | 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler', |
| 177 | 'DefaultHandlerExpand', 'NotStandaloneHandler', |
| 178 | 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler', |
| 179 | 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler', |
| 180 | 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler', |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 181 | ] |
| 182 | |
Antoine Pitrou | 452196f | 2011-01-05 18:44:14 +0000 | [diff] [blame] | 183 | def _hookup_callbacks(self, parser, handler): |
| 184 | """ |
| 185 | Set each of the callbacks defined on handler and named in |
| 186 | self.handler_names on the given parser. |
| 187 | """ |
| 188 | for name in self.handler_names: |
| 189 | setattr(parser, name, getattr(handler, name)) |
| 190 | |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 191 | def _verify_parse_output(self, operations): |
| 192 | expected_operations = [ |
| 193 | ('XML declaration', ('1.0', 'iso-8859-1', 0)), |
| 194 | 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'', |
| 195 | "Comment: ' comment data '", |
| 196 | "Not standalone", |
| 197 | ("Start doctype", ('quotations', 'quotations.dtd', None, 1)), |
| 198 | ('Element declaration', ('root', (2, 0, None, ()))), |
| 199 | ('Attribute list declaration', ('root', 'attr1', 'CDATA', None, |
| 200 | 1)), |
| 201 | ('Attribute list declaration', ('root', 'attr2', 'CDATA', None, |
| 202 | 0)), |
| 203 | "Notation declared: ('notation', None, 'notation.jpeg', None)", |
| 204 | ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)), |
| 205 | ('Entity declaration', ('external_entity', 0, None, None, |
| 206 | 'entity.file', None, None)), |
| 207 | "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')", |
| 208 | "Not standalone", |
| 209 | "End doctype", |
| 210 | "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}", |
| 211 | "NS decl: 'myns' 'http://www.python.org/namespace'", |
| 212 | "Start element: 'http://www.python.org/namespace!subelement' {}", |
| 213 | "Character data: 'Contents of subelements'", |
| 214 | "End element: 'http://www.python.org/namespace!subelement'", |
| 215 | "End of NS decl: 'myns'", |
| 216 | "Start element: 'sub2' {}", |
| 217 | 'Start of CDATA section', |
| 218 | "Character data: 'contents of CDATA section'", |
| 219 | 'End of CDATA section', |
| 220 | "End element: 'sub2'", |
| 221 | "External entity ref: (None, 'entity.file', None)", |
| 222 | ('Skipped entity', ('skipped_entity', 0)), |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 223 | "Character data: '\xb5'", |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 224 | "End element: 'root'", |
| 225 | ] |
| 226 | for operation, expected_operation in zip(operations, expected_operations): |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 227 | self.assertEqual(operation, expected_operation) |
Guido van Rossum | 4ca9471 | 2007-07-23 17:42:32 +0000 | [diff] [blame] | 228 | |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 229 | def test_parse_bytes(self): |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 230 | out = self.Outputter() |
| 231 | parser = expat.ParserCreate(namespace_separator='!') |
Antoine Pitrou | 452196f | 2011-01-05 18:44:14 +0000 | [diff] [blame] | 232 | self._hookup_callbacks(parser, out) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 233 | |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 234 | parser.Parse(data, True) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 235 | |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 236 | operations = out.out |
| 237 | self._verify_parse_output(operations) |
Alexander Belopolsky | e239d23 | 2010-12-08 23:31:48 +0000 | [diff] [blame] | 238 | # Issue #6697. |
| 239 | self.assertRaises(AttributeError, getattr, parser, '\uD800') |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 240 | |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 241 | def test_parse_str(self): |
| 242 | out = self.Outputter() |
| 243 | parser = expat.ParserCreate(namespace_separator='!') |
| 244 | self._hookup_callbacks(parser, out) |
| 245 | |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 246 | parser.Parse(data.decode('iso-8859-1'), True) |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 247 | |
| 248 | operations = out.out |
| 249 | self._verify_parse_output(operations) |
| 250 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 251 | def test_parse_file(self): |
| 252 | # Try parsing a file |
| 253 | out = self.Outputter() |
| 254 | parser = expat.ParserCreate(namespace_separator='!') |
Antoine Pitrou | 452196f | 2011-01-05 18:44:14 +0000 | [diff] [blame] | 255 | self._hookup_callbacks(parser, out) |
Guido van Rossum | 4ca9471 | 2007-07-23 17:42:32 +0000 | [diff] [blame] | 256 | file = BytesIO(data) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 257 | |
| 258 | parser.ParseFile(file) |
| 259 | |
Amaury Forgeot d'Arc | b441554 | 2010-10-05 23:14:47 +0000 | [diff] [blame] | 260 | operations = out.out |
| 261 | self._verify_parse_output(operations) |
Fred Drake | 1e0611b | 2000-12-23 22:12:07 +0000 | [diff] [blame] | 262 | |
Ned Deily | e7d532f | 2014-03-27 16:39:58 -0700 | [diff] [blame] | 263 | def test_parse_again(self): |
| 264 | parser = expat.ParserCreate() |
| 265 | file = BytesIO(data) |
| 266 | parser.ParseFile(file) |
| 267 | # Issue 6676: ensure a meaningful exception is raised when attempting |
| 268 | # to parse more than one XML document per xmlparser instance, |
| 269 | # a limitation of the Expat library. |
| 270 | with self.assertRaises(expat.error) as cm: |
| 271 | parser.ParseFile(file) |
| 272 | self.assertEqual(expat.ErrorString(cm.exception.code), |
| 273 | expat.errors.XML_ERROR_FINISHED) |
| 274 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 275 | class NamespaceSeparatorTest(unittest.TestCase): |
| 276 | def test_legal(self): |
| 277 | # Tests that make sure we get errors when the namespace_separator value |
| 278 | # is illegal, and that we don't for good values: |
| 279 | expat.ParserCreate() |
| 280 | expat.ParserCreate(namespace_separator=None) |
| 281 | expat.ParserCreate(namespace_separator=' ') |
Fred Drake | 8f42e2b | 2001-04-25 16:03:54 +0000 | [diff] [blame] | 282 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 283 | def test_illegal(self): |
| 284 | try: |
| 285 | expat.ParserCreate(namespace_separator=42) |
| 286 | self.fail() |
| 287 | except TypeError as e: |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 288 | self.assertEqual(str(e), |
Rémi Lapeyre | 4901fe2 | 2019-08-29 16:49:08 +0200 | [diff] [blame] | 289 | "ParserCreate() argument 'namespace_separator' must be str or None, not int") |
Fred Drake | 8f42e2b | 2001-04-25 16:03:54 +0000 | [diff] [blame] | 290 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 291 | try: |
| 292 | expat.ParserCreate(namespace_separator='too long') |
| 293 | self.fail() |
| 294 | except ValueError as e: |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 295 | self.assertEqual(str(e), |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 296 | 'namespace_separator must be at most one character, omitted, or None') |
Fred Drake | 1add023 | 2002-06-27 19:41:51 +0000 | [diff] [blame] | 297 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 298 | def test_zero_length(self): |
| 299 | # ParserCreate() needs to accept a namespace_separator of zero length |
| 300 | # to satisfy the requirements of RDF applications that are required |
| 301 | # to simply glue together the namespace URI and the localname. Though |
| 302 | # considered a wart of the RDF specifications, it needs to be supported. |
| 303 | # |
| 304 | # See XML-SIG mailing list thread starting with |
| 305 | # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html |
| 306 | # |
| 307 | expat.ParserCreate(namespace_separator='') # too short |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 308 | |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 309 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 310 | class InterningTest(unittest.TestCase): |
| 311 | def test(self): |
| 312 | # Test the interning machinery. |
| 313 | p = expat.ParserCreate() |
| 314 | L = [] |
| 315 | def collector(name, *args): |
| 316 | L.append(name) |
| 317 | p.StartElementHandler = collector |
| 318 | p.EndElementHandler = collector |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 319 | p.Parse(b"<e> <e/> <e></e> </e>", True) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 320 | tag = L[0] |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 321 | self.assertEqual(len(L), 6) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 322 | for entry in L: |
| 323 | # L should have the same string repeated over and over. |
| 324 | self.assertTrue(tag is entry) |
| 325 | |
Victor Stinner | b4ba986 | 2010-09-10 22:25:19 +0000 | [diff] [blame] | 326 | def test_issue9402(self): |
| 327 | # create an ExternalEntityParserCreate with buffer text |
| 328 | class ExternalOutputter: |
| 329 | def __init__(self, parser): |
| 330 | self.parser = parser |
| 331 | self.parser_result = None |
| 332 | |
| 333 | def ExternalEntityRefHandler(self, context, base, sysId, pubId): |
| 334 | external_parser = self.parser.ExternalEntityParserCreate("") |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 335 | self.parser_result = external_parser.Parse(b"", True) |
Victor Stinner | b4ba986 | 2010-09-10 22:25:19 +0000 | [diff] [blame] | 336 | return 1 |
| 337 | |
| 338 | parser = expat.ParserCreate(namespace_separator='!') |
| 339 | parser.buffer_text = 1 |
| 340 | out = ExternalOutputter(parser) |
| 341 | parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 342 | parser.Parse(data, True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 343 | self.assertEqual(out.parser_result, 1) |
Victor Stinner | b4ba986 | 2010-09-10 22:25:19 +0000 | [diff] [blame] | 344 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 345 | |
| 346 | class BufferTextTest(unittest.TestCase): |
| 347 | def setUp(self): |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 348 | self.stuff = [] |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 349 | self.parser = expat.ParserCreate() |
| 350 | self.parser.buffer_text = 1 |
| 351 | self.parser.CharacterDataHandler = self.CharacterDataHandler |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 352 | |
| 353 | def check(self, expected, label): |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 354 | self.assertEqual(self.stuff, expected, |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 355 | "%s\nstuff = %r\nexpected = %r" |
Guido van Rossum | ef87d6e | 2007-05-02 19:09:54 +0000 | [diff] [blame] | 356 | % (label, self.stuff, map(str, expected))) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 357 | |
| 358 | def CharacterDataHandler(self, text): |
| 359 | self.stuff.append(text) |
| 360 | |
| 361 | def StartElementHandler(self, name, attrs): |
| 362 | self.stuff.append("<%s>" % name) |
| 363 | bt = attrs.get("buffer-text") |
| 364 | if bt == "yes": |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 365 | self.parser.buffer_text = 1 |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 366 | elif bt == "no": |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 367 | self.parser.buffer_text = 0 |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 368 | |
| 369 | def EndElementHandler(self, name): |
| 370 | self.stuff.append("</%s>" % name) |
| 371 | |
| 372 | def CommentHandler(self, data): |
| 373 | self.stuff.append("<!--%s-->" % data) |
| 374 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 375 | def setHandlers(self, handlers=[]): |
| 376 | for name in handlers: |
| 377 | setattr(self.parser, name, getattr(self, name)) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 378 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 379 | def test_default_to_disabled(self): |
| 380 | parser = expat.ParserCreate() |
| 381 | self.assertFalse(parser.buffer_text) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 382 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 383 | def test_buffering_enabled(self): |
| 384 | # Make sure buffering is turned on |
| 385 | self.assertTrue(self.parser.buffer_text) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 386 | self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 387 | self.assertEqual(self.stuff, ['123'], |
| 388 | "buffered text not properly collapsed") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 389 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 390 | def test1(self): |
| 391 | # XXX This test exposes more detail of Expat's text chunking than we |
| 392 | # XXX like, but it tests what we need to concisely. |
| 393 | self.setHandlers(["StartElementHandler"]) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 394 | self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 395 | self.assertEqual(self.stuff, |
| 396 | ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], |
| 397 | "buffering control not reacting as expected") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 398 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 399 | def test2(self): |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 400 | self.parser.Parse(b"<a>1<b/><2><c/> \n 3</a>", True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 401 | self.assertEqual(self.stuff, ["1<2> \n 3"], |
| 402 | "buffered text not properly collapsed") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 403 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 404 | def test3(self): |
| 405 | self.setHandlers(["StartElementHandler"]) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 406 | self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 407 | self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"], |
| 408 | "buffered text not properly split") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 409 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 410 | def test4(self): |
| 411 | self.setHandlers(["StartElementHandler", "EndElementHandler"]) |
| 412 | self.parser.CharacterDataHandler = None |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 413 | self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 414 | self.assertEqual(self.stuff, |
| 415 | ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"]) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 416 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 417 | def test5(self): |
| 418 | self.setHandlers(["StartElementHandler", "EndElementHandler"]) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 419 | self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 420 | self.assertEqual(self.stuff, |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 421 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"]) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 422 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 423 | def test6(self): |
| 424 | self.setHandlers(["CommentHandler", "EndElementHandler", |
| 425 | "StartElementHandler"]) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 426 | self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 427 | self.assertEqual(self.stuff, |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 428 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], |
| 429 | "buffered text not properly split") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 430 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 431 | def test7(self): |
| 432 | self.setHandlers(["CommentHandler", "EndElementHandler", |
| 433 | "StartElementHandler"]) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 434 | self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 435 | self.assertEqual(self.stuff, |
| 436 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", |
| 437 | "<!--abc-->", "4", "<!--def-->", "5", "</a>"], |
| 438 | "buffered text not properly split") |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 439 | |
Fred Drake | d7ea55b | 2004-08-13 03:09:07 +0000 | [diff] [blame] | 440 | |
| 441 | # Test handling of exception from callback: |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 442 | class HandlerExceptionTest(unittest.TestCase): |
| 443 | def StartElementHandler(self, name, attrs): |
| 444 | raise RuntimeError(name) |
Fred Drake | d7ea55b | 2004-08-13 03:09:07 +0000 | [diff] [blame] | 445 | |
Antoine Pitrou | 0ddbf47 | 2014-10-08 20:00:09 +0200 | [diff] [blame] | 446 | def check_traceback_entry(self, entry, filename, funcname): |
| 447 | self.assertEqual(os.path.basename(entry[0]), filename) |
| 448 | self.assertEqual(entry[2], funcname) |
| 449 | |
| 450 | def test_exception(self): |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 451 | parser = expat.ParserCreate() |
| 452 | parser.StartElementHandler = self.StartElementHandler |
| 453 | try: |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 454 | parser.Parse(b"<a><b><c/></b></a>", True) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 455 | self.fail() |
| 456 | except RuntimeError as e: |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 457 | self.assertEqual(e.args[0], 'a', |
| 458 | "Expected RuntimeError for element 'a', but" + \ |
| 459 | " found %r" % e.args[0]) |
Antoine Pitrou | 0ddbf47 | 2014-10-08 20:00:09 +0200 | [diff] [blame] | 460 | # Check that the traceback contains the relevant line in pyexpat.c |
| 461 | entries = traceback.extract_tb(e.__traceback__) |
| 462 | self.assertEqual(len(entries), 3) |
| 463 | self.check_traceback_entry(entries[0], |
| 464 | "test_pyexpat.py", "test_exception") |
| 465 | self.check_traceback_entry(entries[1], |
| 466 | "pyexpat.c", "StartElement") |
| 467 | self.check_traceback_entry(entries[2], |
| 468 | "test_pyexpat.py", "StartElementHandler") |
Paul Monson | f355069 | 2019-06-19 13:09:54 -0700 | [diff] [blame] | 469 | if sysconfig.is_python_build() and not (sys.platform == 'win32' and platform.machine() == 'ARM'): |
Antoine Pitrou | 2b3b95b | 2014-11-29 15:56:07 +0100 | [diff] [blame] | 470 | self.assertIn('call_with_frame("StartElement"', entries[1][3]) |
Fred Drake | d7ea55b | 2004-08-13 03:09:07 +0000 | [diff] [blame] | 471 | |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 472 | |
| 473 | # Test Current* members: |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 474 | class PositionTest(unittest.TestCase): |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 475 | def StartElementHandler(self, name, attrs): |
| 476 | self.check_pos('s') |
| 477 | |
| 478 | def EndElementHandler(self, name): |
| 479 | self.check_pos('e') |
| 480 | |
| 481 | def check_pos(self, event): |
| 482 | pos = (event, |
| 483 | self.parser.CurrentByteIndex, |
| 484 | self.parser.CurrentLineNumber, |
| 485 | self.parser.CurrentColumnNumber) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 486 | self.assertTrue(self.upto < len(self.expected_list), |
| 487 | 'too many parser events') |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 488 | expected = self.expected_list[self.upto] |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 489 | self.assertEqual(pos, expected, |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 490 | 'Expected position %s, got position %s' %(pos, expected)) |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 491 | self.upto += 1 |
| 492 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 493 | def test(self): |
| 494 | self.parser = expat.ParserCreate() |
| 495 | self.parser.StartElementHandler = self.StartElementHandler |
| 496 | self.parser.EndElementHandler = self.EndElementHandler |
| 497 | self.upto = 0 |
| 498 | self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), |
| 499 | ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 500 | |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 501 | xml = b'<a>\n <b>\n <c/>\n </b>\n</a>' |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 502 | self.parser.Parse(xml, True) |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 503 | |
| 504 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 505 | class sf1296433Test(unittest.TestCase): |
| 506 | def test_parse_only_xml_data(self): |
| 507 | # http://python.org/sf/1296433 |
| 508 | # |
| 509 | xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) |
| 510 | # this one doesn't crash |
| 511 | #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 512 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 513 | class SpecificException(Exception): |
| 514 | pass |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 515 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 516 | def handler(text): |
| 517 | raise SpecificException |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 518 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 519 | parser = expat.ParserCreate() |
| 520 | parser.CharacterDataHandler = handler |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 521 | |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 522 | self.assertRaises(Exception, parser.Parse, xml.encode('iso8859')) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 523 | |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 524 | class ChardataBufferTest(unittest.TestCase): |
| 525 | """ |
| 526 | test setting of chardata buffer size |
| 527 | """ |
| 528 | |
| 529 | def test_1025_bytes(self): |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 530 | self.assertEqual(self.small_buffer_test(1025), 2) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 531 | |
| 532 | def test_1000_bytes(self): |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 533 | self.assertEqual(self.small_buffer_test(1000), 1) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 534 | |
| 535 | def test_wrong_size(self): |
| 536 | parser = expat.ParserCreate() |
| 537 | parser.buffer_text = 1 |
Serhiy Storchaka | 931331a | 2015-09-07 22:37:02 +0300 | [diff] [blame] | 538 | with self.assertRaises(ValueError): |
| 539 | parser.buffer_size = -1 |
| 540 | with self.assertRaises(ValueError): |
| 541 | parser.buffer_size = 0 |
Serhiy Storchaka | de5f9f4 | 2015-09-07 22:51:56 +0300 | [diff] [blame] | 542 | with self.assertRaises((ValueError, OverflowError)): |
| 543 | parser.buffer_size = sys.maxsize + 1 |
Serhiy Storchaka | 931331a | 2015-09-07 22:37:02 +0300 | [diff] [blame] | 544 | with self.assertRaises(TypeError): |
| 545 | parser.buffer_size = 512.0 |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 546 | |
| 547 | def test_unchanged_size(self): |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 548 | xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512 |
| 549 | xml2 = b'a'*512 + b'</s>' |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 550 | parser = expat.ParserCreate() |
| 551 | parser.CharacterDataHandler = self.counting_handler |
| 552 | parser.buffer_size = 512 |
| 553 | parser.buffer_text = 1 |
| 554 | |
| 555 | # Feed 512 bytes of character data: the handler should be called |
| 556 | # once. |
| 557 | self.n = 0 |
| 558 | parser.Parse(xml1) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 559 | self.assertEqual(self.n, 1) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 560 | |
| 561 | # Reassign to buffer_size, but assign the same size. |
| 562 | parser.buffer_size = parser.buffer_size |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 563 | self.assertEqual(self.n, 1) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 564 | |
| 565 | # Try parsing rest of the document |
| 566 | parser.Parse(xml2) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 567 | self.assertEqual(self.n, 2) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 568 | |
| 569 | |
| 570 | def test_disabling_buffer(self): |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 571 | xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512 |
| 572 | xml2 = b'b' * 1024 |
| 573 | xml3 = b'c' * 1024 + b'</a>'; |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 574 | parser = expat.ParserCreate() |
| 575 | parser.CharacterDataHandler = self.counting_handler |
| 576 | parser.buffer_text = 1 |
| 577 | parser.buffer_size = 1024 |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 578 | self.assertEqual(parser.buffer_size, 1024) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 579 | |
| 580 | # Parse one chunk of XML |
| 581 | self.n = 0 |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 582 | parser.Parse(xml1, False) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 583 | self.assertEqual(parser.buffer_size, 1024) |
| 584 | self.assertEqual(self.n, 1) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 585 | |
| 586 | # Turn off buffering and parse the next chunk. |
| 587 | parser.buffer_text = 0 |
| 588 | self.assertFalse(parser.buffer_text) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 589 | self.assertEqual(parser.buffer_size, 1024) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 590 | for i in range(10): |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 591 | parser.Parse(xml2, False) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 592 | self.assertEqual(self.n, 11) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 593 | |
| 594 | parser.buffer_text = 1 |
| 595 | self.assertTrue(parser.buffer_text) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 596 | self.assertEqual(parser.buffer_size, 1024) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 597 | parser.Parse(xml3, True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 598 | self.assertEqual(self.n, 12) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 599 | |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 600 | def counting_handler(self, text): |
| 601 | self.n += 1 |
| 602 | |
| 603 | def small_buffer_test(self, buffer_len): |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 604 | xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>' |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 605 | parser = expat.ParserCreate() |
| 606 | parser.CharacterDataHandler = self.counting_handler |
| 607 | parser.buffer_size = 1024 |
| 608 | parser.buffer_text = 1 |
| 609 | |
| 610 | self.n = 0 |
| 611 | parser.Parse(xml) |
| 612 | return self.n |
| 613 | |
| 614 | def test_change_size_1(self): |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 615 | xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024 |
| 616 | xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>' |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 617 | parser = expat.ParserCreate() |
| 618 | parser.CharacterDataHandler = self.counting_handler |
| 619 | parser.buffer_text = 1 |
| 620 | parser.buffer_size = 1024 |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 621 | self.assertEqual(parser.buffer_size, 1024) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 622 | |
| 623 | self.n = 0 |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 624 | parser.Parse(xml1, False) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 625 | parser.buffer_size *= 2 |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 626 | self.assertEqual(parser.buffer_size, 2048) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 627 | parser.Parse(xml2, True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 628 | self.assertEqual(self.n, 2) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 629 | |
| 630 | def test_change_size_2(self): |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 631 | xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023 |
| 632 | xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>' |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 633 | parser = expat.ParserCreate() |
| 634 | parser.CharacterDataHandler = self.counting_handler |
| 635 | parser.buffer_text = 1 |
| 636 | parser.buffer_size = 2048 |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 637 | self.assertEqual(parser.buffer_size, 2048) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 638 | |
| 639 | self.n=0 |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 640 | parser.Parse(xml1, False) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 641 | parser.buffer_size = parser.buffer_size // 2 |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 642 | self.assertEqual(parser.buffer_size, 1024) |
Serhiy Storchaka | eb89746 | 2019-09-01 12:11:43 +0300 | [diff] [blame] | 643 | parser.Parse(xml2, True) |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 644 | self.assertEqual(self.n, 4) |
Christian Heimes | 2380ac7 | 2008-01-09 00:17:24 +0000 | [diff] [blame] | 645 | |
Georg Brandl | 91d2a3f | 2010-10-15 15:25:23 +0000 | [diff] [blame] | 646 | class MalformedInputTest(unittest.TestCase): |
Brett Cannon | 2f82738 | 2009-08-13 19:58:01 +0000 | [diff] [blame] | 647 | def test1(self): |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 648 | xml = b"\0\r\n" |
Brett Cannon | 2f82738 | 2009-08-13 19:58:01 +0000 | [diff] [blame] | 649 | parser = expat.ParserCreate() |
| 650 | try: |
| 651 | parser.Parse(xml, True) |
| 652 | self.fail() |
| 653 | except expat.ExpatError as e: |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 654 | self.assertEqual(str(e), 'unclosed token: line 2, column 0') |
Brett Cannon | 2f82738 | 2009-08-13 19:58:01 +0000 | [diff] [blame] | 655 | |
| 656 | def test2(self): |
Serhiy Storchaka | 1273dfc | 2013-02-08 11:22:05 +0200 | [diff] [blame] | 657 | # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE) |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 658 | xml = b"<?xml version\xc2\x85='1.0'?>\r\n" |
Brett Cannon | 2f82738 | 2009-08-13 19:58:01 +0000 | [diff] [blame] | 659 | parser = expat.ParserCreate() |
Martin Panter | 076ca6c | 2016-07-14 01:31:46 +0000 | [diff] [blame] | 660 | err_pattern = r'XML declaration not well-formed: line 1, column \d+' |
| 661 | with self.assertRaisesRegex(expat.ExpatError, err_pattern): |
Brett Cannon | 2f82738 | 2009-08-13 19:58:01 +0000 | [diff] [blame] | 662 | parser.Parse(xml, True) |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 663 | |
Georg Brandl | 91d2a3f | 2010-10-15 15:25:23 +0000 | [diff] [blame] | 664 | class ErrorMessageTest(unittest.TestCase): |
| 665 | def test_codes(self): |
| 666 | # verify mapping of errors.codes and errors.messages |
| 667 | self.assertEqual(errors.XML_ERROR_SYNTAX, |
| 668 | errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]]) |
| 669 | |
| 670 | def test_expaterror(self): |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 671 | xml = b'<' |
Georg Brandl | 91d2a3f | 2010-10-15 15:25:23 +0000 | [diff] [blame] | 672 | parser = expat.ParserCreate() |
| 673 | try: |
| 674 | parser.Parse(xml, True) |
| 675 | self.fail() |
| 676 | except expat.ExpatError as e: |
Ezio Melotti | b3aedd4 | 2010-11-20 19:04:17 +0000 | [diff] [blame] | 677 | self.assertEqual(e.code, |
| 678 | errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]) |
Georg Brandl | 91d2a3f | 2010-10-15 15:25:23 +0000 | [diff] [blame] | 679 | |
| 680 | |
Antoine Pitrou | 452196f | 2011-01-05 18:44:14 +0000 | [diff] [blame] | 681 | class ForeignDTDTests(unittest.TestCase): |
| 682 | """ |
| 683 | Tests for the UseForeignDTD method of expat parser objects. |
| 684 | """ |
| 685 | def test_use_foreign_dtd(self): |
| 686 | """ |
| 687 | If UseForeignDTD is passed True and a document without an external |
| 688 | entity reference is parsed, ExternalEntityRefHandler is first called |
| 689 | with None for the public and system ids. |
| 690 | """ |
| 691 | handler_call_args = [] |
| 692 | def resolve_entity(context, base, system_id, public_id): |
| 693 | handler_call_args.append((public_id, system_id)) |
| 694 | return 1 |
| 695 | |
| 696 | parser = expat.ParserCreate() |
| 697 | parser.UseForeignDTD(True) |
| 698 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) |
| 699 | parser.ExternalEntityRefHandler = resolve_entity |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 700 | parser.Parse(b"<?xml version='1.0'?><element/>") |
Antoine Pitrou | 452196f | 2011-01-05 18:44:14 +0000 | [diff] [blame] | 701 | self.assertEqual(handler_call_args, [(None, None)]) |
| 702 | |
Christian Heimes | e26d3af | 2012-09-24 13:17:08 +0200 | [diff] [blame] | 703 | # test UseForeignDTD() is equal to UseForeignDTD(True) |
| 704 | handler_call_args[:] = [] |
| 705 | |
| 706 | parser = expat.ParserCreate() |
| 707 | parser.UseForeignDTD() |
| 708 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) |
| 709 | parser.ExternalEntityRefHandler = resolve_entity |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 710 | parser.Parse(b"<?xml version='1.0'?><element/>") |
Christian Heimes | e26d3af | 2012-09-24 13:17:08 +0200 | [diff] [blame] | 711 | self.assertEqual(handler_call_args, [(None, None)]) |
| 712 | |
Antoine Pitrou | 452196f | 2011-01-05 18:44:14 +0000 | [diff] [blame] | 713 | def test_ignore_use_foreign_dtd(self): |
| 714 | """ |
| 715 | If UseForeignDTD is passed True and a document with an external |
| 716 | entity reference is parsed, ExternalEntityRefHandler is called with |
| 717 | the public and system ids from the document. |
| 718 | """ |
| 719 | handler_call_args = [] |
| 720 | def resolve_entity(context, base, system_id, public_id): |
| 721 | handler_call_args.append((public_id, system_id)) |
| 722 | return 1 |
| 723 | |
| 724 | parser = expat.ParserCreate() |
| 725 | parser.UseForeignDTD(True) |
| 726 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) |
| 727 | parser.ExternalEntityRefHandler = resolve_entity |
| 728 | parser.Parse( |
Serhiy Storchaka | 43536e9 | 2013-02-04 18:26:15 +0200 | [diff] [blame] | 729 | b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") |
Antoine Pitrou | 452196f | 2011-01-05 18:44:14 +0000 | [diff] [blame] | 730 | self.assertEqual(handler_call_args, [("bar", "baz")]) |
| 731 | |
| 732 | |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 733 | if __name__ == "__main__": |
Zachary Ware | 38c707e | 2015-04-13 15:00:43 -0500 | [diff] [blame] | 734 | unittest.main() |