blob: 4a1bad3442b25b8e45c6a7bbdcec2d1ee4973064 [file] [log] [blame]
Benjamin Peterson863a0c32011-03-02 23:40:36 +00001import io
2import unittest
Benjamin Peterson863a0c32011-03-02 23:40:36 +00003import xml.sax
4
5from xml.sax.xmlreader import AttributesImpl
Christian Heimes17b1d5d2018-09-23 09:50:25 +02006from xml.sax.handler import feature_external_ges
Benjamin Peterson863a0c32011-03-02 23:40:36 +00007from xml.dom import pulldom
8
Zachary Ware38c707e2015-04-13 15:00:43 -05009from test.support import findfile
Benjamin Peterson863a0c32011-03-02 23:40:36 +000010
11
12tstfile = findfile("test.xml", subdir="xmltestdata")
13
14# A handy XML snippet, containing attributes, a namespace prefix, and a
15# self-closing tag:
16SMALL_SAMPLE = """<?xml version="1.0"?>
17<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
18<!-- A comment -->
19<title>Introduction to XSL</title>
20<hr/>
21<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
22</html>"""
23
24
25class PullDOMTestCase(unittest.TestCase):
26
27 def test_parse(self):
28 """Minimal test of DOMEventStream.parse()"""
29
30 # This just tests that parsing from a stream works. Actual parser
31 # semantics are tested using parseString with a more focused XML
32 # fragment.
33
34 # Test with a filename:
Brett Cannonf30645d2011-03-15 17:22:52 -040035 handler = pulldom.parse(tstfile)
36 self.addCleanup(handler.stream.close)
37 list(handler)
Benjamin Peterson863a0c32011-03-02 23:40:36 +000038
39 # Test with a file object:
40 with open(tstfile, "rb") as fin:
41 list(pulldom.parse(fin))
42
43 def test_parse_semantics(self):
44 """Test DOMEventStream parsing semantics."""
45
46 items = pulldom.parseString(SMALL_SAMPLE)
47 evt, node = next(items)
48 # Just check the node is a Document:
49 self.assertTrue(hasattr(node, "createElement"))
50 self.assertEqual(pulldom.START_DOCUMENT, evt)
51 evt, node = next(items)
52 self.assertEqual(pulldom.START_ELEMENT, evt)
53 self.assertEqual("html", node.tagName)
54 self.assertEqual(2, len(node.attributes))
55 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
56 "http://www.xml.com/books")
57 evt, node = next(items)
58 self.assertEqual(pulldom.CHARACTERS, evt) # Line break
59 evt, node = next(items)
60 # XXX - A comment should be reported here!
61 # self.assertEqual(pulldom.COMMENT, evt)
62 # Line break after swallowed comment:
63 self.assertEqual(pulldom.CHARACTERS, evt)
64 evt, node = next(items)
65 self.assertEqual("title", node.tagName)
66 title_node = node
67 evt, node = next(items)
68 self.assertEqual(pulldom.CHARACTERS, evt)
69 self.assertEqual("Introduction to XSL", node.data)
70 evt, node = next(items)
71 self.assertEqual(pulldom.END_ELEMENT, evt)
72 self.assertEqual("title", node.tagName)
73 self.assertTrue(title_node is node)
74 evt, node = next(items)
75 self.assertEqual(pulldom.CHARACTERS, evt)
76 evt, node = next(items)
77 self.assertEqual(pulldom.START_ELEMENT, evt)
78 self.assertEqual("hr", node.tagName)
79 evt, node = next(items)
80 self.assertEqual(pulldom.END_ELEMENT, evt)
81 self.assertEqual("hr", node.tagName)
82 evt, node = next(items)
83 self.assertEqual(pulldom.CHARACTERS, evt)
84 evt, node = next(items)
85 self.assertEqual(pulldom.START_ELEMENT, evt)
86 self.assertEqual("p", node.tagName)
87 evt, node = next(items)
88 self.assertEqual(pulldom.START_ELEMENT, evt)
89 self.assertEqual("xdc:author", node.tagName)
90 evt, node = next(items)
91 self.assertEqual(pulldom.CHARACTERS, evt)
92 evt, node = next(items)
93 self.assertEqual(pulldom.END_ELEMENT, evt)
94 self.assertEqual("xdc:author", node.tagName)
95 evt, node = next(items)
96 self.assertEqual(pulldom.END_ELEMENT, evt)
97 evt, node = next(items)
98 self.assertEqual(pulldom.CHARACTERS, evt)
99 evt, node = next(items)
100 self.assertEqual(pulldom.END_ELEMENT, evt)
101 # XXX No END_DOCUMENT item is ever obtained:
102 #evt, node = next(items)
103 #self.assertEqual(pulldom.END_DOCUMENT, evt)
104
105 def test_expandItem(self):
106 """Ensure expandItem works as expected."""
107 items = pulldom.parseString(SMALL_SAMPLE)
108 # Loop through the nodes until we get to a "title" start tag:
109 for evt, item in items:
110 if evt == pulldom.START_ELEMENT and item.tagName == "title":
111 items.expandNode(item)
112 self.assertEqual(1, len(item.childNodes))
113 break
114 else:
115 self.fail("No \"title\" element detected in SMALL_SAMPLE!")
116 # Loop until we get to the next start-element:
117 for evt, node in items:
118 if evt == pulldom.START_ELEMENT:
119 break
120 self.assertEqual("hr", node.tagName,
121 "expandNode did not leave DOMEventStream in the correct state.")
122 # Attempt to expand a standalone element:
123 items.expandNode(node)
124 self.assertEqual(next(items)[0], pulldom.CHARACTERS)
125 evt, node = next(items)
126 self.assertEqual(node.tagName, "p")
127 items.expandNode(node)
128 next(items) # Skip character data
129 evt, node = next(items)
130 self.assertEqual(node.tagName, "html")
131 with self.assertRaises(StopIteration):
132 next(items)
133 items.clear()
134 self.assertIsNone(items.parser)
135 self.assertIsNone(items.stream)
136
137 @unittest.expectedFailure
138 def test_comment(self):
139 """PullDOM does not receive "comment" events."""
140 items = pulldom.parseString(SMALL_SAMPLE)
141 for evt, _ in items:
142 if evt == pulldom.COMMENT:
143 break
144 else:
145 self.fail("No comment was encountered")
146
147 @unittest.expectedFailure
148 def test_end_document(self):
149 """PullDOM does not receive "end-document" events."""
150 items = pulldom.parseString(SMALL_SAMPLE)
151 # Read all of the nodes up to and including </html>:
152 for evt, node in items:
153 if evt == pulldom.END_ELEMENT and node.tagName == "html":
154 break
155 try:
156 # Assert that the next node is END_DOCUMENT:
157 evt, node = next(items)
158 self.assertEqual(pulldom.END_DOCUMENT, evt)
159 except StopIteration:
160 self.fail(
161 "Ran out of events, but should have received END_DOCUMENT")
162
Berker Peksag84a13fb2018-08-11 09:05:04 +0300163 def test_getitem_deprecation(self):
164 parser = pulldom.parseString(SMALL_SAMPLE)
165 with self.assertWarnsRegex(DeprecationWarning,
166 r'Use iterator protocol instead'):
167 # This should have returned 'END_ELEMENT'.
168 self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT)
169
Christian Heimes17b1d5d2018-09-23 09:50:25 +0200170 def test_external_ges_default(self):
171 parser = pulldom.parseString(SMALL_SAMPLE)
172 saxparser = parser.parser
173 ges = saxparser.getFeature(feature_external_ges)
174 self.assertEqual(ges, False)
175
Benjamin Peterson863a0c32011-03-02 23:40:36 +0000176
177class ThoroughTestCase(unittest.TestCase):
178 """Test the hard-to-reach parts of pulldom."""
179
180 def test_thorough_parse(self):
181 """Test some of the hard-to-reach parts of PullDOM."""
182 self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
183
184 @unittest.expectedFailure
185 def test_sax2dom_fail(self):
186 """SAX2DOM can"t handle a PI before the root element."""
187 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
188 self._test_thorough(pd)
189
190 def test_thorough_sax2dom(self):
191 """Test some of the hard-to-reach parts of SAX2DOM."""
192 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
193 self._test_thorough(pd, False)
194
195 def _test_thorough(self, pd, before_root=True):
196 """Test some of the hard-to-reach parts of the parser, using a mock
197 parser."""
198
199 evt, node = next(pd)
200 self.assertEqual(pulldom.START_DOCUMENT, evt)
201 # Just check the node is a Document:
202 self.assertTrue(hasattr(node, "createElement"))
203
204 if before_root:
205 evt, node = next(pd)
206 self.assertEqual(pulldom.COMMENT, evt)
207 self.assertEqual("a comment", node.data)
208 evt, node = next(pd)
209 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
210 self.assertEqual("target", node.target)
211 self.assertEqual("data", node.data)
212
213 evt, node = next(pd)
214 self.assertEqual(pulldom.START_ELEMENT, evt)
215 self.assertEqual("html", node.tagName)
216
217 evt, node = next(pd)
218 self.assertEqual(pulldom.COMMENT, evt)
219 self.assertEqual("a comment", node.data)
220 evt, node = next(pd)
221 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
222 self.assertEqual("target", node.target)
223 self.assertEqual("data", node.data)
224
225 evt, node = next(pd)
226 self.assertEqual(pulldom.START_ELEMENT, evt)
227 self.assertEqual("p", node.tagName)
228
229 evt, node = next(pd)
230 self.assertEqual(pulldom.CHARACTERS, evt)
231 self.assertEqual("text", node.data)
232 evt, node = next(pd)
233 self.assertEqual(pulldom.END_ELEMENT, evt)
234 self.assertEqual("p", node.tagName)
235 evt, node = next(pd)
236 self.assertEqual(pulldom.END_ELEMENT, evt)
237 self.assertEqual("html", node.tagName)
238 evt, node = next(pd)
239 self.assertEqual(pulldom.END_DOCUMENT, evt)
240
241
242class SAXExerciser(object):
243 """A fake sax parser that calls some of the harder-to-reach sax methods to
244 ensure it emits the correct events"""
245
246 def setContentHandler(self, handler):
247 self._handler = handler
248
249 def parse(self, _):
250 h = self._handler
251 h.startDocument()
252
253 # The next two items ensure that items preceding the first
254 # start_element are properly stored and emitted:
255 h.comment("a comment")
256 h.processingInstruction("target", "data")
257
258 h.startElement("html", AttributesImpl({}))
259
260 h.comment("a comment")
261 h.processingInstruction("target", "data")
262
263 h.startElement("p", AttributesImpl({"class": "paraclass"}))
264 h.characters("text")
265 h.endElement("p")
266 h.endElement("html")
267 h.endDocument()
268
269 def stub(self, *args, **kwargs):
270 """Stub method. Does nothing."""
271 pass
272 setProperty = stub
273 setFeature = stub
274
275
276class SAX2DOMExerciser(SAXExerciser):
277 """The same as SAXExerciser, but without the processing instruction and
278 comment before the root element, because S2D can"t handle it"""
279
280 def parse(self, _):
281 h = self._handler
282 h.startDocument()
283 h.startElement("html", AttributesImpl({}))
284 h.comment("a comment")
285 h.processingInstruction("target", "data")
286 h.startElement("p", AttributesImpl({"class": "paraclass"}))
287 h.characters("text")
288 h.endElement("p")
289 h.endElement("html")
290 h.endDocument()
291
292
293class SAX2DOMTestHelper(pulldom.DOMEventStream):
294 """Allows us to drive SAX2DOM from a DOMEventStream."""
295
296 def reset(self):
297 self.pulldom = pulldom.SAX2DOM()
298 # This content handler relies on namespace support
299 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
300 self.parser.setContentHandler(self.pulldom)
301
302
303class SAX2DOMTestCase(unittest.TestCase):
304
305 def confirm(self, test, testname="Test"):
306 self.assertTrue(test, testname)
307
308 def test_basic(self):
309 """Ensure SAX2DOM can parse from a stream."""
310 with io.StringIO(SMALL_SAMPLE) as fin:
311 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
312 len(SMALL_SAMPLE))
313 for evt, node in sd:
314 if evt == pulldom.START_ELEMENT and node.tagName == "html":
315 break
316 # Because the buffer is the same length as the XML, all the
317 # nodes should have been parsed and added:
318 self.assertGreater(len(node.childNodes), 0)
319
320 def testSAX2DOM(self):
321 """Ensure SAX2DOM expands nodes as expected."""
322 sax2dom = pulldom.SAX2DOM()
323 sax2dom.startDocument()
324 sax2dom.startElement("doc", {})
325 sax2dom.characters("text")
326 sax2dom.startElement("subelm", {})
327 sax2dom.characters("text")
328 sax2dom.endElement("subelm")
329 sax2dom.characters("text")
330 sax2dom.endElement("doc")
331 sax2dom.endDocument()
332
333 doc = sax2dom.document
334 root = doc.documentElement
335 (text1, elm1, text2) = root.childNodes
336 text3 = elm1.childNodes[0]
337
338 self.assertIsNone(text1.previousSibling)
339 self.assertIs(text1.nextSibling, elm1)
340 self.assertIs(elm1.previousSibling, text1)
341 self.assertIs(elm1.nextSibling, text2)
342 self.assertIs(text2.previousSibling, elm1)
343 self.assertIsNone(text2.nextSibling)
344 self.assertIsNone(text3.previousSibling)
345 self.assertIsNone(text3.nextSibling)
346
347 self.assertIs(root.parentNode, doc)
348 self.assertIs(text1.parentNode, root)
349 self.assertIs(elm1.parentNode, root)
350 self.assertIs(text2.parentNode, root)
351 self.assertIs(text3.parentNode, elm1)
352 doc.unlink()
353
354
Benjamin Peterson863a0c32011-03-02 23:40:36 +0000355if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500356 unittest.main()