blob: 6dc51e4371d0f630300fa9487f88015e9e28793b [file] [log] [blame]
Benjamin Peterson863a0c32011-03-02 23:40:36 +00001import io
2import unittest
Benjamin Peterson863a0c32011-03-02 23:40:36 +00003import xml.sax
4
5from xml.sax.xmlreader import AttributesImpl
Christian Heimes394e55a2018-09-24 14:38:37 +02006from xml.sax.handler import feature_external_ges
Benjamin Peterson863a0c32011-03-02 23:40:36 +00007from xml.dom import pulldom
8
Zachary Ware38c707e2015-04-13 15:00:43 -05009from test.support import findfile
Benjamin Peterson863a0c32011-03-02 23:40:36 +000010
11
12tstfile = findfile("test.xml", subdir="xmltestdata")
13
14# A handy XML snippet, containing attributes, a namespace prefix, and a
15# self-closing tag:
16SMALL_SAMPLE = """<?xml version="1.0"?>
17<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
18<!-- A comment -->
19<title>Introduction to XSL</title>
20<hr/>
21<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
22</html>"""
23
24
25class PullDOMTestCase(unittest.TestCase):
26
27 def test_parse(self):
28 """Minimal test of DOMEventStream.parse()"""
29
30 # This just tests that parsing from a stream works. Actual parser
31 # semantics are tested using parseString with a more focused XML
32 # fragment.
33
34 # Test with a filename:
Brett Cannonf30645d2011-03-15 17:22:52 -040035 handler = pulldom.parse(tstfile)
36 self.addCleanup(handler.stream.close)
37 list(handler)
Benjamin Peterson863a0c32011-03-02 23:40:36 +000038
39 # Test with a file object:
40 with open(tstfile, "rb") as fin:
41 list(pulldom.parse(fin))
42
43 def test_parse_semantics(self):
44 """Test DOMEventStream parsing semantics."""
45
46 items = pulldom.parseString(SMALL_SAMPLE)
47 evt, node = next(items)
48 # Just check the node is a Document:
49 self.assertTrue(hasattr(node, "createElement"))
50 self.assertEqual(pulldom.START_DOCUMENT, evt)
51 evt, node = next(items)
52 self.assertEqual(pulldom.START_ELEMENT, evt)
53 self.assertEqual("html", node.tagName)
54 self.assertEqual(2, len(node.attributes))
55 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
56 "http://www.xml.com/books")
57 evt, node = next(items)
58 self.assertEqual(pulldom.CHARACTERS, evt) # Line break
59 evt, node = next(items)
60 # XXX - A comment should be reported here!
61 # self.assertEqual(pulldom.COMMENT, evt)
62 # Line break after swallowed comment:
63 self.assertEqual(pulldom.CHARACTERS, evt)
64 evt, node = next(items)
65 self.assertEqual("title", node.tagName)
66 title_node = node
67 evt, node = next(items)
68 self.assertEqual(pulldom.CHARACTERS, evt)
69 self.assertEqual("Introduction to XSL", node.data)
70 evt, node = next(items)
71 self.assertEqual(pulldom.END_ELEMENT, evt)
72 self.assertEqual("title", node.tagName)
73 self.assertTrue(title_node is node)
74 evt, node = next(items)
75 self.assertEqual(pulldom.CHARACTERS, evt)
76 evt, node = next(items)
77 self.assertEqual(pulldom.START_ELEMENT, evt)
78 self.assertEqual("hr", node.tagName)
79 evt, node = next(items)
80 self.assertEqual(pulldom.END_ELEMENT, evt)
81 self.assertEqual("hr", node.tagName)
82 evt, node = next(items)
83 self.assertEqual(pulldom.CHARACTERS, evt)
84 evt, node = next(items)
85 self.assertEqual(pulldom.START_ELEMENT, evt)
86 self.assertEqual("p", node.tagName)
87 evt, node = next(items)
88 self.assertEqual(pulldom.START_ELEMENT, evt)
89 self.assertEqual("xdc:author", node.tagName)
90 evt, node = next(items)
91 self.assertEqual(pulldom.CHARACTERS, evt)
92 evt, node = next(items)
93 self.assertEqual(pulldom.END_ELEMENT, evt)
94 self.assertEqual("xdc:author", node.tagName)
95 evt, node = next(items)
96 self.assertEqual(pulldom.END_ELEMENT, evt)
97 evt, node = next(items)
98 self.assertEqual(pulldom.CHARACTERS, evt)
99 evt, node = next(items)
100 self.assertEqual(pulldom.END_ELEMENT, evt)
101 # XXX No END_DOCUMENT item is ever obtained:
102 #evt, node = next(items)
103 #self.assertEqual(pulldom.END_DOCUMENT, evt)
104
105 def test_expandItem(self):
106 """Ensure expandItem works as expected."""
107 items = pulldom.parseString(SMALL_SAMPLE)
108 # Loop through the nodes until we get to a "title" start tag:
109 for evt, item in items:
110 if evt == pulldom.START_ELEMENT and item.tagName == "title":
111 items.expandNode(item)
112 self.assertEqual(1, len(item.childNodes))
113 break
114 else:
115 self.fail("No \"title\" element detected in SMALL_SAMPLE!")
116 # Loop until we get to the next start-element:
117 for evt, node in items:
118 if evt == pulldom.START_ELEMENT:
119 break
120 self.assertEqual("hr", node.tagName,
121 "expandNode did not leave DOMEventStream in the correct state.")
122 # Attempt to expand a standalone element:
123 items.expandNode(node)
124 self.assertEqual(next(items)[0], pulldom.CHARACTERS)
125 evt, node = next(items)
126 self.assertEqual(node.tagName, "p")
127 items.expandNode(node)
128 next(items) # Skip character data
129 evt, node = next(items)
130 self.assertEqual(node.tagName, "html")
131 with self.assertRaises(StopIteration):
132 next(items)
133 items.clear()
134 self.assertIsNone(items.parser)
135 self.assertIsNone(items.stream)
136
137 @unittest.expectedFailure
138 def test_comment(self):
139 """PullDOM does not receive "comment" events."""
140 items = pulldom.parseString(SMALL_SAMPLE)
141 for evt, _ in items:
142 if evt == pulldom.COMMENT:
143 break
144 else:
145 self.fail("No comment was encountered")
146
147 @unittest.expectedFailure
148 def test_end_document(self):
149 """PullDOM does not receive "end-document" events."""
150 items = pulldom.parseString(SMALL_SAMPLE)
151 # Read all of the nodes up to and including </html>:
152 for evt, node in items:
153 if evt == pulldom.END_ELEMENT and node.tagName == "html":
154 break
155 try:
156 # Assert that the next node is END_DOCUMENT:
157 evt, node = next(items)
158 self.assertEqual(pulldom.END_DOCUMENT, evt)
159 except StopIteration:
160 self.fail(
161 "Ran out of events, but should have received END_DOCUMENT")
162
Christian Heimes394e55a2018-09-24 14:38:37 +0200163 def test_external_ges_default(self):
164 parser = pulldom.parseString(SMALL_SAMPLE)
165 saxparser = parser.parser
166 ges = saxparser.getFeature(feature_external_ges)
167 self.assertEqual(ges, False)
168
Benjamin Peterson863a0c32011-03-02 23:40:36 +0000169
170class ThoroughTestCase(unittest.TestCase):
171 """Test the hard-to-reach parts of pulldom."""
172
173 def test_thorough_parse(self):
174 """Test some of the hard-to-reach parts of PullDOM."""
175 self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
176
177 @unittest.expectedFailure
178 def test_sax2dom_fail(self):
179 """SAX2DOM can"t handle a PI before the root element."""
180 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
181 self._test_thorough(pd)
182
183 def test_thorough_sax2dom(self):
184 """Test some of the hard-to-reach parts of SAX2DOM."""
185 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
186 self._test_thorough(pd, False)
187
188 def _test_thorough(self, pd, before_root=True):
189 """Test some of the hard-to-reach parts of the parser, using a mock
190 parser."""
191
192 evt, node = next(pd)
193 self.assertEqual(pulldom.START_DOCUMENT, evt)
194 # Just check the node is a Document:
195 self.assertTrue(hasattr(node, "createElement"))
196
197 if before_root:
198 evt, node = next(pd)
199 self.assertEqual(pulldom.COMMENT, evt)
200 self.assertEqual("a comment", node.data)
201 evt, node = next(pd)
202 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
203 self.assertEqual("target", node.target)
204 self.assertEqual("data", node.data)
205
206 evt, node = next(pd)
207 self.assertEqual(pulldom.START_ELEMENT, evt)
208 self.assertEqual("html", node.tagName)
209
210 evt, node = next(pd)
211 self.assertEqual(pulldom.COMMENT, evt)
212 self.assertEqual("a comment", node.data)
213 evt, node = next(pd)
214 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
215 self.assertEqual("target", node.target)
216 self.assertEqual("data", node.data)
217
218 evt, node = next(pd)
219 self.assertEqual(pulldom.START_ELEMENT, evt)
220 self.assertEqual("p", node.tagName)
221
222 evt, node = next(pd)
223 self.assertEqual(pulldom.CHARACTERS, evt)
224 self.assertEqual("text", node.data)
225 evt, node = next(pd)
226 self.assertEqual(pulldom.END_ELEMENT, evt)
227 self.assertEqual("p", node.tagName)
228 evt, node = next(pd)
229 self.assertEqual(pulldom.END_ELEMENT, evt)
230 self.assertEqual("html", node.tagName)
231 evt, node = next(pd)
232 self.assertEqual(pulldom.END_DOCUMENT, evt)
233
234
235class SAXExerciser(object):
236 """A fake sax parser that calls some of the harder-to-reach sax methods to
237 ensure it emits the correct events"""
238
239 def setContentHandler(self, handler):
240 self._handler = handler
241
242 def parse(self, _):
243 h = self._handler
244 h.startDocument()
245
246 # The next two items ensure that items preceding the first
247 # start_element are properly stored and emitted:
248 h.comment("a comment")
249 h.processingInstruction("target", "data")
250
251 h.startElement("html", AttributesImpl({}))
252
253 h.comment("a comment")
254 h.processingInstruction("target", "data")
255
256 h.startElement("p", AttributesImpl({"class": "paraclass"}))
257 h.characters("text")
258 h.endElement("p")
259 h.endElement("html")
260 h.endDocument()
261
262 def stub(self, *args, **kwargs):
263 """Stub method. Does nothing."""
264 pass
265 setProperty = stub
266 setFeature = stub
267
268
269class SAX2DOMExerciser(SAXExerciser):
270 """The same as SAXExerciser, but without the processing instruction and
271 comment before the root element, because S2D can"t handle it"""
272
273 def parse(self, _):
274 h = self._handler
275 h.startDocument()
276 h.startElement("html", AttributesImpl({}))
277 h.comment("a comment")
278 h.processingInstruction("target", "data")
279 h.startElement("p", AttributesImpl({"class": "paraclass"}))
280 h.characters("text")
281 h.endElement("p")
282 h.endElement("html")
283 h.endDocument()
284
285
286class SAX2DOMTestHelper(pulldom.DOMEventStream):
287 """Allows us to drive SAX2DOM from a DOMEventStream."""
288
289 def reset(self):
290 self.pulldom = pulldom.SAX2DOM()
291 # This content handler relies on namespace support
292 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
293 self.parser.setContentHandler(self.pulldom)
294
295
296class SAX2DOMTestCase(unittest.TestCase):
297
298 def confirm(self, test, testname="Test"):
299 self.assertTrue(test, testname)
300
301 def test_basic(self):
302 """Ensure SAX2DOM can parse from a stream."""
303 with io.StringIO(SMALL_SAMPLE) as fin:
304 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
305 len(SMALL_SAMPLE))
306 for evt, node in sd:
307 if evt == pulldom.START_ELEMENT and node.tagName == "html":
308 break
309 # Because the buffer is the same length as the XML, all the
310 # nodes should have been parsed and added:
311 self.assertGreater(len(node.childNodes), 0)
312
313 def testSAX2DOM(self):
314 """Ensure SAX2DOM expands nodes as expected."""
315 sax2dom = pulldom.SAX2DOM()
316 sax2dom.startDocument()
317 sax2dom.startElement("doc", {})
318 sax2dom.characters("text")
319 sax2dom.startElement("subelm", {})
320 sax2dom.characters("text")
321 sax2dom.endElement("subelm")
322 sax2dom.characters("text")
323 sax2dom.endElement("doc")
324 sax2dom.endDocument()
325
326 doc = sax2dom.document
327 root = doc.documentElement
328 (text1, elm1, text2) = root.childNodes
329 text3 = elm1.childNodes[0]
330
331 self.assertIsNone(text1.previousSibling)
332 self.assertIs(text1.nextSibling, elm1)
333 self.assertIs(elm1.previousSibling, text1)
334 self.assertIs(elm1.nextSibling, text2)
335 self.assertIs(text2.previousSibling, elm1)
336 self.assertIsNone(text2.nextSibling)
337 self.assertIsNone(text3.previousSibling)
338 self.assertIsNone(text3.nextSibling)
339
340 self.assertIs(root.parentNode, doc)
341 self.assertIs(text1.parentNode, root)
342 self.assertIs(elm1.parentNode, root)
343 self.assertIs(text2.parentNode, root)
344 self.assertIs(text3.parentNode, elm1)
345 doc.unlink()
346
347
Benjamin Peterson863a0c32011-03-02 23:40:36 +0000348if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500349 unittest.main()