Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 1 | # XXX TypeErrors on calling handlers, or on bad return values from a |
| 2 | # handler, are obscure and unhelpful. |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 3 | |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 4 | import StringIO, sys |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 5 | import unittest |
| 6 | |
Fred Drake | 7fbc85c | 2000-09-23 04:47:56 +0000 | [diff] [blame] | 7 | from xml.parsers import expat |
Fred Drake | 004d5e6 | 2000-10-23 17:22:08 +0000 | [diff] [blame] | 8 | |
Ezio Melotti | 8b4367e | 2011-04-11 03:44:28 +0300 | [diff] [blame] | 9 | from test import test_support |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 10 | from test.test_support import sortdict, run_unittest |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 11 | |
| 12 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 13 | class SetAttributeTest(unittest.TestCase): |
| 14 | def setUp(self): |
| 15 | self.parser = expat.ParserCreate(namespace_separator='!') |
Serhiy Storchaka | f3bdc10 | 2015-09-07 22:42:12 +0300 | [diff] [blame] | 16 | |
| 17 | def test_buffer_text(self): |
| 18 | self.assertIs(self.parser.buffer_text, False) |
| 19 | for x in 0, 1, 2, 0: |
| 20 | self.parser.buffer_text = x |
| 21 | self.assertIs(self.parser.buffer_text, bool(x)) |
| 22 | |
| 23 | def test_namespace_prefixes(self): |
| 24 | self.assertIs(self.parser.namespace_prefixes, False) |
| 25 | for x in 0, 1, 2, 0: |
| 26 | self.parser.namespace_prefixes = x |
| 27 | self.assertIs(self.parser.namespace_prefixes, bool(x)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 28 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 29 | def test_returns_unicode(self): |
Serhiy Storchaka | f3bdc10 | 2015-09-07 22:42:12 +0300 | [diff] [blame] | 30 | self.assertIs(self.parser.returns_unicode, test_support.have_unicode) |
| 31 | for x in 0, 1, 2, 0: |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 32 | self.parser.returns_unicode = x |
Serhiy Storchaka | f3bdc10 | 2015-09-07 22:42:12 +0300 | [diff] [blame] | 33 | self.assertIs(self.parser.returns_unicode, bool(x)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 34 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 35 | def test_ordered_attributes(self): |
Serhiy Storchaka | f3bdc10 | 2015-09-07 22:42:12 +0300 | [diff] [blame] | 36 | self.assertIs(self.parser.ordered_attributes, False) |
| 37 | for x in 0, 1, 2, 0: |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 38 | self.parser.ordered_attributes = x |
Serhiy Storchaka | f3bdc10 | 2015-09-07 22:42:12 +0300 | [diff] [blame] | 39 | self.assertIs(self.parser.ordered_attributes, bool(x)) |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 40 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 41 | def test_specified_attributes(self): |
Serhiy Storchaka | f3bdc10 | 2015-09-07 22:42:12 +0300 | [diff] [blame] | 42 | self.assertIs(self.parser.specified_attributes, False) |
| 43 | for x in 0, 1, 2, 0: |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 44 | self.parser.specified_attributes = x |
Serhiy Storchaka | f3bdc10 | 2015-09-07 22:42:12 +0300 | [diff] [blame] | 45 | self.assertIs(self.parser.specified_attributes, bool(x)) |
| 46 | |
| 47 | def test_invalid_attributes(self): |
| 48 | with self.assertRaises(AttributeError): |
| 49 | self.parser.foo = 1 |
| 50 | with self.assertRaises(AttributeError): |
| 51 | self.parser.foo |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 52 | |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 53 | |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 54 | data = '''\ |
| 55 | <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 56 | <?xml-stylesheet href="stylesheet.css"?> |
| 57 | <!-- comment data --> |
| 58 | <!DOCTYPE quotations SYSTEM "quotations.dtd" [ |
| 59 | <!ELEMENT root ANY> |
| 60 | <!NOTATION notation SYSTEM "notation.jpeg"> |
| 61 | <!ENTITY acirc "â"> |
| 62 | <!ENTITY external_entity SYSTEM "entity.file"> |
| 63 | <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> |
| 64 | %unparsed_entity; |
| 65 | ]> |
| 66 | |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 67 | <root attr1="value1" attr2="value2ὀ"> |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 68 | <myns:subelement xmlns:myns="http://www.python.org/namespace"> |
| 69 | Contents of subelements |
| 70 | </myns:subelement> |
| 71 | <sub2><![CDATA[contents of CDATA section]]></sub2> |
| 72 | &external_entity; |
| 73 | </root> |
Fred Drake | 265a804 | 2000-09-21 20:32:13 +0000 | [diff] [blame] | 74 | ''' |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 75 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 76 | |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 77 | # Produce UTF-8 output |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 78 | class ParseTest(unittest.TestCase): |
| 79 | class Outputter: |
| 80 | def __init__(self): |
| 81 | self.out = [] |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 82 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 83 | def StartElementHandler(self, name, attrs): |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 84 | self.out.append('Start element: ' + repr(name) + ' ' + |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 85 | sortdict(attrs)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 86 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 87 | def EndElementHandler(self, name): |
| 88 | self.out.append('End element: ' + repr(name)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 89 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 90 | def CharacterDataHandler(self, data): |
| 91 | data = data.strip() |
| 92 | if data: |
| 93 | self.out.append('Character data: ' + repr(data)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 94 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 95 | def ProcessingInstructionHandler(self, target, data): |
| 96 | self.out.append('PI: ' + repr(target) + ' ' + repr(data)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 97 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 98 | def StartNamespaceDeclHandler(self, prefix, uri): |
| 99 | self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 100 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 101 | def EndNamespaceDeclHandler(self, prefix): |
| 102 | self.out.append('End of NS decl: ' + repr(prefix)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 103 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 104 | def StartCdataSectionHandler(self): |
| 105 | self.out.append('Start of CDATA section') |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 106 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 107 | def EndCdataSectionHandler(self): |
| 108 | self.out.append('End of CDATA section') |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 109 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 110 | def CommentHandler(self, text): |
| 111 | self.out.append('Comment: ' + repr(text)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 112 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 113 | def NotationDeclHandler(self, *args): |
| 114 | name, base, sysid, pubid = args |
| 115 | self.out.append('Notation declared: %s' %(args,)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 116 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 117 | def UnparsedEntityDeclHandler(self, *args): |
| 118 | entityName, base, systemId, publicId, notationName = args |
| 119 | self.out.append('Unparsed entity decl: %s' %(args,)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 120 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 121 | def NotStandaloneHandler(self, userData): |
| 122 | self.out.append('Not standalone') |
| 123 | return 1 |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 124 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 125 | def ExternalEntityRefHandler(self, *args): |
| 126 | context, base, sysId, pubId = args |
| 127 | self.out.append('External entity ref: %s' %(args[1:],)) |
| 128 | return 1 |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 129 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 130 | def DefaultHandler(self, userData): |
| 131 | pass |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 132 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 133 | def DefaultHandlerExpand(self, userData): |
| 134 | pass |
Andrew M. Kuchling | b17664d | 2000-03-31 15:44:52 +0000 | [diff] [blame] | 135 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 136 | handler_names = [ |
| 137 | 'StartElementHandler', 'EndElementHandler', |
| 138 | 'CharacterDataHandler', 'ProcessingInstructionHandler', |
| 139 | 'UnparsedEntityDeclHandler', 'NotationDeclHandler', |
| 140 | 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', |
| 141 | 'CommentHandler', 'StartCdataSectionHandler', |
| 142 | 'EndCdataSectionHandler', |
| 143 | 'DefaultHandler', 'DefaultHandlerExpand', |
| 144 | #'NotStandaloneHandler', |
| 145 | 'ExternalEntityRefHandler' |
| 146 | ] |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 147 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 148 | def test_utf8(self): |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 149 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 150 | out = self.Outputter() |
| 151 | parser = expat.ParserCreate(namespace_separator='!') |
| 152 | for name in self.handler_names: |
| 153 | setattr(parser, name, getattr(out, name)) |
| 154 | parser.returns_unicode = 0 |
| 155 | parser.Parse(data, 1) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 156 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 157 | # Verify output |
| 158 | op = out.out |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 159 | self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'') |
| 160 | self.assertEqual(op[1], "Comment: ' comment data '") |
| 161 | self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)") |
| 162 | self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')") |
| 163 | self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}") |
| 164 | self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'") |
| 165 | self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}") |
| 166 | self.assertEqual(op[7], "Character data: 'Contents of subelements'") |
| 167 | self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'") |
| 168 | self.assertEqual(op[9], "End of NS decl: 'myns'") |
| 169 | self.assertEqual(op[10], "Start element: 'sub2' {}") |
| 170 | self.assertEqual(op[11], 'Start of CDATA section') |
| 171 | self.assertEqual(op[12], "Character data: 'contents of CDATA section'") |
| 172 | self.assertEqual(op[13], 'End of CDATA section') |
| 173 | self.assertEqual(op[14], "End element: 'sub2'") |
| 174 | self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)") |
| 175 | self.assertEqual(op[16], "End element: 'root'") |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 176 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 177 | def test_unicode(self): |
| 178 | # Try the parse again, this time producing Unicode output |
| 179 | out = self.Outputter() |
| 180 | parser = expat.ParserCreate(namespace_separator='!') |
| 181 | parser.returns_unicode = 1 |
| 182 | for name in self.handler_names: |
| 183 | setattr(parser, name, getattr(out, name)) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 184 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 185 | parser.Parse(data, 1) |
Andrew M. Kuchling | 7fd7e36 | 2000-06-27 00:37:25 +0000 | [diff] [blame] | 186 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 187 | op = out.out |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 188 | self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') |
| 189 | self.assertEqual(op[1], "Comment: u' comment data '") |
| 190 | self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") |
| 191 | self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") |
| 192 | self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") |
| 193 | self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") |
| 194 | self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") |
| 195 | self.assertEqual(op[7], "Character data: u'Contents of subelements'") |
| 196 | self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") |
| 197 | self.assertEqual(op[9], "End of NS decl: u'myns'") |
| 198 | self.assertEqual(op[10], "Start element: u'sub2' {}") |
| 199 | self.assertEqual(op[11], 'Start of CDATA section') |
| 200 | self.assertEqual(op[12], "Character data: u'contents of CDATA section'") |
| 201 | self.assertEqual(op[13], 'End of CDATA section') |
| 202 | self.assertEqual(op[14], "End element: u'sub2'") |
| 203 | self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") |
| 204 | self.assertEqual(op[16], "End element: u'root'") |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 205 | |
| 206 | def test_parse_file(self): |
| 207 | # Try parsing a file |
| 208 | out = self.Outputter() |
| 209 | parser = expat.ParserCreate(namespace_separator='!') |
| 210 | parser.returns_unicode = 1 |
| 211 | for name in self.handler_names: |
| 212 | setattr(parser, name, getattr(out, name)) |
| 213 | file = StringIO.StringIO(data) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 214 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 215 | parser.ParseFile(file) |
| 216 | |
| 217 | op = out.out |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 218 | self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') |
| 219 | self.assertEqual(op[1], "Comment: u' comment data '") |
| 220 | self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") |
| 221 | self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") |
| 222 | self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") |
| 223 | self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") |
| 224 | self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") |
| 225 | self.assertEqual(op[7], "Character data: u'Contents of subelements'") |
| 226 | self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") |
| 227 | self.assertEqual(op[9], "End of NS decl: u'myns'") |
| 228 | self.assertEqual(op[10], "Start element: u'sub2' {}") |
| 229 | self.assertEqual(op[11], 'Start of CDATA section') |
| 230 | self.assertEqual(op[12], "Character data: u'contents of CDATA section'") |
| 231 | self.assertEqual(op[13], 'End of CDATA section') |
| 232 | self.assertEqual(op[14], "End element: u'sub2'") |
| 233 | self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") |
| 234 | self.assertEqual(op[16], "End element: u'root'") |
Fred Drake | 1e0611b | 2000-12-23 22:12:07 +0000 | [diff] [blame] | 235 | |
Ezio Melotti | 8b4367e | 2011-04-11 03:44:28 +0300 | [diff] [blame] | 236 | # Issue 4877: expat.ParseFile causes segfault on a closed file. |
| 237 | fp = open(test_support.TESTFN, 'wb') |
| 238 | try: |
| 239 | fp.close() |
| 240 | parser = expat.ParserCreate() |
| 241 | with self.assertRaises(ValueError): |
| 242 | parser.ParseFile(fp) |
| 243 | finally: |
| 244 | test_support.unlink(test_support.TESTFN) |
| 245 | |
Ned Deily | b693e9f | 2014-03-27 16:38:32 -0700 | [diff] [blame] | 246 | def test_parse_again(self): |
| 247 | parser = expat.ParserCreate() |
| 248 | file = StringIO.StringIO(data) |
| 249 | parser.ParseFile(file) |
| 250 | # Issue 6676: ensure a meaningful exception is raised when attempting |
| 251 | # to parse more than one XML document per xmlparser instance, |
| 252 | # a limitation of the Expat library. |
| 253 | with self.assertRaises(expat.error) as cm: |
| 254 | parser.ParseFile(file) |
| 255 | self.assertEqual(expat.ErrorString(cm.exception.code), |
| 256 | expat.errors.XML_ERROR_FINISHED) |
Fred Drake | 1e0611b | 2000-12-23 22:12:07 +0000 | [diff] [blame] | 257 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 258 | class NamespaceSeparatorTest(unittest.TestCase): |
| 259 | def test_legal(self): |
| 260 | # Tests that make sure we get errors when the namespace_separator value |
| 261 | # is illegal, and that we don't for good values: |
| 262 | expat.ParserCreate() |
| 263 | expat.ParserCreate(namespace_separator=None) |
| 264 | expat.ParserCreate(namespace_separator=' ') |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 265 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 266 | def test_illegal(self): |
| 267 | try: |
| 268 | expat.ParserCreate(namespace_separator=42) |
| 269 | self.fail() |
| 270 | except TypeError, e: |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 271 | self.assertEqual(str(e), |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 272 | 'ParserCreate() argument 2 must be string or None, not int') |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 273 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 274 | try: |
| 275 | expat.ParserCreate(namespace_separator='too long') |
| 276 | self.fail() |
| 277 | except ValueError, e: |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 278 | self.assertEqual(str(e), |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 279 | 'namespace_separator must be at most one character, omitted, or None') |
Fred Drake | 8f42e2b | 2001-04-25 16:03:54 +0000 | [diff] [blame] | 280 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 281 | def test_zero_length(self): |
| 282 | # ParserCreate() needs to accept a namespace_separator of zero length |
| 283 | # to satisfy the requirements of RDF applications that are required |
| 284 | # to simply glue together the namespace URI and the localname. Though |
| 285 | # considered a wart of the RDF specifications, it needs to be supported. |
| 286 | # |
| 287 | # See XML-SIG mailing list thread starting with |
| 288 | # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html |
| 289 | # |
| 290 | expat.ParserCreate(namespace_separator='') # too short |
Fred Drake | 8f42e2b | 2001-04-25 16:03:54 +0000 | [diff] [blame] | 291 | |
Fred Drake | 1add023 | 2002-06-27 19:41:51 +0000 | [diff] [blame] | 292 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 293 | class InterningTest(unittest.TestCase): |
| 294 | def test(self): |
| 295 | # Test the interning machinery. |
| 296 | p = expat.ParserCreate() |
| 297 | L = [] |
| 298 | def collector(name, *args): |
| 299 | L.append(name) |
| 300 | p.StartElementHandler = collector |
| 301 | p.EndElementHandler = collector |
| 302 | p.Parse("<e> <e/> <e></e> </e>", 1) |
| 303 | tag = L[0] |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 304 | self.assertEqual(len(L), 6) |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 305 | for entry in L: |
| 306 | # L should have the same string repeated over and over. |
| 307 | self.assertTrue(tag is entry) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 308 | |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 309 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 310 | class BufferTextTest(unittest.TestCase): |
| 311 | def setUp(self): |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 312 | self.stuff = [] |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 313 | self.parser = expat.ParserCreate() |
| 314 | self.parser.buffer_text = 1 |
| 315 | self.parser.CharacterDataHandler = self.CharacterDataHandler |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 316 | |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 317 | def check(self, expected, label): |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 318 | self.assertEqual(self.stuff, expected, |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 319 | "%s\nstuff = %r\nexpected = %r" |
| 320 | % (label, self.stuff, map(unicode, expected))) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 321 | |
| 322 | def CharacterDataHandler(self, text): |
| 323 | self.stuff.append(text) |
| 324 | |
| 325 | def StartElementHandler(self, name, attrs): |
| 326 | self.stuff.append("<%s>" % name) |
| 327 | bt = attrs.get("buffer-text") |
| 328 | if bt == "yes": |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 329 | self.parser.buffer_text = 1 |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 330 | elif bt == "no": |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 331 | self.parser.buffer_text = 0 |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 332 | |
| 333 | def EndElementHandler(self, name): |
| 334 | self.stuff.append("</%s>" % name) |
| 335 | |
| 336 | def CommentHandler(self, data): |
| 337 | self.stuff.append("<!--%s-->" % data) |
| 338 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 339 | def setHandlers(self, handlers=[]): |
| 340 | for name in handlers: |
| 341 | setattr(self.parser, name, getattr(self, name)) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 342 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 343 | def test_default_to_disabled(self): |
| 344 | parser = expat.ParserCreate() |
| 345 | self.assertFalse(parser.buffer_text) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 346 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 347 | def test_buffering_enabled(self): |
| 348 | # Make sure buffering is turned on |
| 349 | self.assertTrue(self.parser.buffer_text) |
| 350 | self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 351 | self.assertEqual(self.stuff, ['123'], |
| 352 | "buffered text not properly collapsed") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 353 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 354 | def test1(self): |
| 355 | # XXX This test exposes more detail of Expat's text chunking than we |
| 356 | # XXX like, but it tests what we need to concisely. |
| 357 | self.setHandlers(["StartElementHandler"]) |
| 358 | self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 359 | self.assertEqual(self.stuff, |
| 360 | ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], |
| 361 | "buffering control not reacting as expected") |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 362 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 363 | def test2(self): |
| 364 | self.parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 365 | self.assertEqual(self.stuff, ["1<2> \n 3"], |
| 366 | "buffered text not properly collapsed") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 367 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 368 | def test3(self): |
| 369 | self.setHandlers(["StartElementHandler"]) |
| 370 | self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 371 | self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"], |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 372 | "buffered text not properly split") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 373 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 374 | def test4(self): |
| 375 | self.setHandlers(["StartElementHandler", "EndElementHandler"]) |
| 376 | self.parser.CharacterDataHandler = None |
| 377 | self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 378 | self.assertEqual(self.stuff, |
| 379 | ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"]) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 380 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 381 | def test5(self): |
| 382 | self.setHandlers(["StartElementHandler", "EndElementHandler"]) |
| 383 | self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 384 | self.assertEqual(self.stuff, |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 385 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"]) |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 386 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 387 | def test6(self): |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 388 | self.setHandlers(["CommentHandler", "EndElementHandler", |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 389 | "StartElementHandler"]) |
| 390 | self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 391 | self.assertEqual(self.stuff, |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 392 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 393 | "buffered text not properly split") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 394 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 395 | def test7(self): |
| 396 | self.setHandlers(["CommentHandler", "EndElementHandler", |
| 397 | "StartElementHandler"]) |
| 398 | self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 399 | self.assertEqual(self.stuff, |
| 400 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", |
| 401 | "<!--abc-->", "4", "<!--def-->", "5", "</a>"], |
| 402 | "buffered text not properly split") |
Fred Drake | 2a3d7db | 2002-06-28 22:56:48 +0000 | [diff] [blame] | 403 | |
Fred Drake | d7ea55b | 2004-08-13 03:09:07 +0000 | [diff] [blame] | 404 | |
| 405 | # Test handling of exception from callback: |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 406 | class HandlerExceptionTest(unittest.TestCase): |
| 407 | def StartElementHandler(self, name, attrs): |
| 408 | raise RuntimeError(name) |
Fred Drake | d7ea55b | 2004-08-13 03:09:07 +0000 | [diff] [blame] | 409 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 410 | def test(self): |
| 411 | parser = expat.ParserCreate() |
| 412 | parser.StartElementHandler = self.StartElementHandler |
| 413 | try: |
| 414 | parser.Parse("<a><b><c/></b></a>", 1) |
| 415 | self.fail() |
| 416 | except RuntimeError, e: |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 417 | self.assertEqual(e.args[0], 'a', |
| 418 | "Expected RuntimeError for element 'a', but" + \ |
| 419 | " found %r" % e.args[0]) |
Fred Drake | d7ea55b | 2004-08-13 03:09:07 +0000 | [diff] [blame] | 420 | |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 421 | |
| 422 | # Test Current* members: |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 423 | class PositionTest(unittest.TestCase): |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 424 | def StartElementHandler(self, name, attrs): |
| 425 | self.check_pos('s') |
| 426 | |
| 427 | def EndElementHandler(self, name): |
| 428 | self.check_pos('e') |
| 429 | |
| 430 | def check_pos(self, event): |
| 431 | pos = (event, |
| 432 | self.parser.CurrentByteIndex, |
| 433 | self.parser.CurrentLineNumber, |
| 434 | self.parser.CurrentColumnNumber) |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 435 | self.assertTrue(self.upto < len(self.expected_list), |
| 436 | 'too many parser events') |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 437 | expected = self.expected_list[self.upto] |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 438 | self.assertEqual(pos, expected, |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 439 | 'Expected position %s, got position %s' %(pos, expected)) |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 440 | self.upto += 1 |
| 441 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 442 | def test(self): |
| 443 | self.parser = expat.ParserCreate() |
| 444 | self.parser.StartElementHandler = self.StartElementHandler |
| 445 | self.parser.EndElementHandler = self.EndElementHandler |
| 446 | self.upto = 0 |
| 447 | self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), |
| 448 | ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] |
Dave Cole | 3203efb | 2004-08-26 00:37:31 +0000 | [diff] [blame] | 449 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 450 | xml = '<a>\n <b>\n <c/>\n </b>\n</a>' |
| 451 | self.parser.Parse(xml, 1) |
Fred Drake | 6ffe499 | 2006-07-01 16:28:20 +0000 | [diff] [blame] | 452 | |
| 453 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 454 | class sf1296433Test(unittest.TestCase): |
| 455 | def test_parse_only_xml_data(self): |
| 456 | # http://python.org/sf/1296433 |
| 457 | # |
| 458 | xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) |
| 459 | # this one doesn't crash |
| 460 | #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 461 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 462 | class SpecificException(Exception): |
| 463 | pass |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 464 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 465 | def handler(text): |
| 466 | raise SpecificException |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 467 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 468 | parser = expat.ParserCreate() |
| 469 | parser.CharacterDataHandler = handler |
Neal Norwitz | 0d4c06e | 2007-04-25 06:30:05 +0000 | [diff] [blame] | 470 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 471 | self.assertRaises(Exception, parser.Parse, xml) |
Fred Drake | 6ffe499 | 2006-07-01 16:28:20 +0000 | [diff] [blame] | 472 | |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 473 | class ChardataBufferTest(unittest.TestCase): |
| 474 | """ |
| 475 | test setting of chardata buffer size |
| 476 | """ |
| 477 | |
| 478 | def test_1025_bytes(self): |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 479 | self.assertEqual(self.small_buffer_test(1025), 2) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 480 | |
| 481 | def test_1000_bytes(self): |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 482 | self.assertEqual(self.small_buffer_test(1000), 1) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 483 | |
| 484 | def test_wrong_size(self): |
| 485 | parser = expat.ParserCreate() |
| 486 | parser.buffer_text = 1 |
Serhiy Storchaka | f3bdc10 | 2015-09-07 22:42:12 +0300 | [diff] [blame] | 487 | with self.assertRaises(ValueError): |
| 488 | parser.buffer_size = -1 |
| 489 | with self.assertRaises(ValueError): |
| 490 | parser.buffer_size = 0 |
| 491 | with self.assertRaises(TypeError): |
| 492 | parser.buffer_size = 512.0 |
| 493 | with self.assertRaises(TypeError): |
| 494 | parser.buffer_size = sys.maxint+1 |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 495 | |
| 496 | def test_unchanged_size(self): |
| 497 | xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512)) |
| 498 | xml2 = 'a'*512 + '</s>' |
| 499 | parser = expat.ParserCreate() |
| 500 | parser.CharacterDataHandler = self.counting_handler |
| 501 | parser.buffer_size = 512 |
| 502 | parser.buffer_text = 1 |
| 503 | |
| 504 | # Feed 512 bytes of character data: the handler should be called |
| 505 | # once. |
| 506 | self.n = 0 |
| 507 | parser.Parse(xml1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 508 | self.assertEqual(self.n, 1) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 509 | |
| 510 | # Reassign to buffer_size, but assign the same size. |
| 511 | parser.buffer_size = parser.buffer_size |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 512 | self.assertEqual(self.n, 1) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 513 | |
| 514 | # Try parsing rest of the document |
| 515 | parser.Parse(xml2) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 516 | self.assertEqual(self.n, 2) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 517 | |
| 518 | |
| 519 | def test_disabling_buffer(self): |
| 520 | xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512) |
| 521 | xml2 = ('b' * 1024) |
| 522 | xml3 = "%s</a>" % ('c' * 1024) |
| 523 | parser = expat.ParserCreate() |
| 524 | parser.CharacterDataHandler = self.counting_handler |
| 525 | parser.buffer_text = 1 |
| 526 | parser.buffer_size = 1024 |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 527 | self.assertEqual(parser.buffer_size, 1024) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 528 | |
| 529 | # Parse one chunk of XML |
| 530 | self.n = 0 |
| 531 | parser.Parse(xml1, 0) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 532 | self.assertEqual(parser.buffer_size, 1024) |
| 533 | self.assertEqual(self.n, 1) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 534 | |
| 535 | # Turn off buffering and parse the next chunk. |
| 536 | parser.buffer_text = 0 |
| 537 | self.assertFalse(parser.buffer_text) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 538 | self.assertEqual(parser.buffer_size, 1024) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 539 | for i in range(10): |
| 540 | parser.Parse(xml2, 0) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 541 | self.assertEqual(self.n, 11) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 542 | |
| 543 | parser.buffer_text = 1 |
| 544 | self.assertTrue(parser.buffer_text) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 545 | self.assertEqual(parser.buffer_size, 1024) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 546 | parser.Parse(xml3, 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 547 | self.assertEqual(self.n, 12) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 548 | |
| 549 | |
| 550 | |
| 551 | def make_document(self, bytes): |
| 552 | return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>') |
| 553 | |
| 554 | def counting_handler(self, text): |
| 555 | self.n += 1 |
| 556 | |
| 557 | def small_buffer_test(self, buffer_len): |
| 558 | xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len) |
| 559 | parser = expat.ParserCreate() |
| 560 | parser.CharacterDataHandler = self.counting_handler |
| 561 | parser.buffer_size = 1024 |
| 562 | parser.buffer_text = 1 |
| 563 | |
| 564 | self.n = 0 |
| 565 | parser.Parse(xml) |
| 566 | return self.n |
| 567 | |
| 568 | def test_change_size_1(self): |
| 569 | xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024) |
| 570 | xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) |
| 571 | parser = expat.ParserCreate() |
| 572 | parser.CharacterDataHandler = self.counting_handler |
| 573 | parser.buffer_text = 1 |
| 574 | parser.buffer_size = 1024 |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 575 | self.assertEqual(parser.buffer_size, 1024) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 576 | |
| 577 | self.n = 0 |
| 578 | parser.Parse(xml1, 0) |
| 579 | parser.buffer_size *= 2 |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 580 | self.assertEqual(parser.buffer_size, 2048) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 581 | parser.Parse(xml2, 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 582 | self.assertEqual(self.n, 2) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 583 | |
| 584 | def test_change_size_2(self): |
| 585 | xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023) |
| 586 | xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) |
| 587 | parser = expat.ParserCreate() |
| 588 | parser.CharacterDataHandler = self.counting_handler |
| 589 | parser.buffer_text = 1 |
| 590 | parser.buffer_size = 2048 |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 591 | self.assertEqual(parser.buffer_size, 2048) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 592 | |
| 593 | self.n=0 |
| 594 | parser.Parse(xml1, 0) |
Ezio Melotti | dde5b94 | 2010-02-03 05:37:26 +0000 | [diff] [blame] | 595 | parser.buffer_size //= 2 |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 596 | self.assertEqual(parser.buffer_size, 1024) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 597 | parser.Parse(xml2, 1) |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 598 | self.assertEqual(self.n, 4) |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 599 | |
Brett Cannon | 764465f | 2009-08-13 19:27:12 +0000 | [diff] [blame] | 600 | class MalformedInputText(unittest.TestCase): |
| 601 | def test1(self): |
| 602 | xml = "\0\r\n" |
| 603 | parser = expat.ParserCreate() |
| 604 | try: |
| 605 | parser.Parse(xml, True) |
| 606 | self.fail() |
| 607 | except expat.ExpatError as e: |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 608 | self.assertEqual(str(e), 'unclosed token: line 2, column 0') |
Brett Cannon | 764465f | 2009-08-13 19:27:12 +0000 | [diff] [blame] | 609 | |
| 610 | def test2(self): |
| 611 | xml = "<?xml version\xc2\x85='1.0'?>\r\n" |
| 612 | parser = expat.ParserCreate() |
| 613 | try: |
| 614 | parser.Parse(xml, True) |
| 615 | self.fail() |
| 616 | except expat.ExpatError as e: |
Ezio Melotti | 2623a37 | 2010-11-21 13:34:58 +0000 | [diff] [blame] | 617 | self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14') |
Fred Drake | 6ffe499 | 2006-07-01 16:28:20 +0000 | [diff] [blame] | 618 | |
Christian Heimes | 8112ea2 | 2012-09-24 13:17:08 +0200 | [diff] [blame] | 619 | class ForeignDTDTests(unittest.TestCase): |
| 620 | """ |
| 621 | Tests for the UseForeignDTD method of expat parser objects. |
| 622 | """ |
| 623 | def test_use_foreign_dtd(self): |
| 624 | """ |
| 625 | If UseForeignDTD is passed True and a document without an external |
| 626 | entity reference is parsed, ExternalEntityRefHandler is first called |
| 627 | with None for the public and system ids. |
| 628 | """ |
| 629 | handler_call_args = [] |
| 630 | def resolve_entity(context, base, system_id, public_id): |
| 631 | handler_call_args.append((public_id, system_id)) |
| 632 | return 1 |
| 633 | |
| 634 | parser = expat.ParserCreate() |
| 635 | parser.UseForeignDTD(True) |
| 636 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) |
| 637 | parser.ExternalEntityRefHandler = resolve_entity |
| 638 | parser.Parse("<?xml version='1.0'?><element/>") |
| 639 | self.assertEqual(handler_call_args, [(None, None)]) |
| 640 | |
| 641 | # test UseForeignDTD() is equal to UseForeignDTD(True) |
| 642 | handler_call_args[:] = [] |
| 643 | |
| 644 | parser = expat.ParserCreate() |
| 645 | parser.UseForeignDTD() |
| 646 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) |
| 647 | parser.ExternalEntityRefHandler = resolve_entity |
| 648 | parser.Parse("<?xml version='1.0'?><element/>") |
| 649 | self.assertEqual(handler_call_args, [(None, None)]) |
| 650 | |
| 651 | def test_ignore_use_foreign_dtd(self): |
| 652 | """ |
| 653 | If UseForeignDTD is passed True and a document with an external |
| 654 | entity reference is parsed, ExternalEntityRefHandler is called with |
| 655 | the public and system ids from the document. |
| 656 | """ |
| 657 | handler_call_args = [] |
| 658 | def resolve_entity(context, base, system_id, public_id): |
| 659 | handler_call_args.append((public_id, system_id)) |
| 660 | return 1 |
| 661 | |
| 662 | parser = expat.ParserCreate() |
| 663 | parser.UseForeignDTD(True) |
| 664 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) |
| 665 | parser.ExternalEntityRefHandler = resolve_entity |
| 666 | parser.Parse( |
| 667 | "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") |
| 668 | self.assertEqual(handler_call_args, [("bar", "baz")]) |
| 669 | |
| 670 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 671 | def test_main(): |
| 672 | run_unittest(SetAttributeTest, |
| 673 | ParseTest, |
| 674 | NamespaceSeparatorTest, |
| 675 | InterningTest, |
| 676 | BufferTextTest, |
| 677 | HandlerExceptionTest, |
| 678 | PositionTest, |
Andrew M. Kuchling | e0a49b6 | 2008-01-08 14:30:55 +0000 | [diff] [blame] | 679 | sf1296433Test, |
Brett Cannon | 764465f | 2009-08-13 19:27:12 +0000 | [diff] [blame] | 680 | ChardataBufferTest, |
Christian Heimes | 8112ea2 | 2012-09-24 13:17:08 +0200 | [diff] [blame] | 681 | MalformedInputText, |
| 682 | ForeignDTDTests) |
Fred Drake | 6ffe499 | 2006-07-01 16:28:20 +0000 | [diff] [blame] | 683 | |
Collin Winter | d28fcbc | 2007-03-28 23:34:06 +0000 | [diff] [blame] | 684 | if __name__ == "__main__": |
| 685 | test_main() |