blob: 3d89e3adda26ce646d3a05eced17aca727e6e779 [file] [log] [blame]
Benjamin Peterson863a0c32011-03-02 23:40:36 +00001import io
2import unittest
Benjamin Peterson863a0c32011-03-02 23:40:36 +00003import xml.sax
4
5from xml.sax.xmlreader import AttributesImpl
6from xml.dom import pulldom
7
Zachary Ware38c707e2015-04-13 15:00:43 -05008from test.support import findfile
Benjamin Peterson863a0c32011-03-02 23:40:36 +00009
10
11tstfile = findfile("test.xml", subdir="xmltestdata")
12
13# A handy XML snippet, containing attributes, a namespace prefix, and a
14# self-closing tag:
15SMALL_SAMPLE = """<?xml version="1.0"?>
16<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
17<!-- A comment -->
18<title>Introduction to XSL</title>
19<hr/>
20<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
21</html>"""
22
23
24class PullDOMTestCase(unittest.TestCase):
25
26 def test_parse(self):
27 """Minimal test of DOMEventStream.parse()"""
28
29 # This just tests that parsing from a stream works. Actual parser
30 # semantics are tested using parseString with a more focused XML
31 # fragment.
32
33 # Test with a filename:
Brett Cannonf30645d2011-03-15 17:22:52 -040034 handler = pulldom.parse(tstfile)
35 self.addCleanup(handler.stream.close)
36 list(handler)
Benjamin Peterson863a0c32011-03-02 23:40:36 +000037
38 # Test with a file object:
39 with open(tstfile, "rb") as fin:
40 list(pulldom.parse(fin))
41
42 def test_parse_semantics(self):
43 """Test DOMEventStream parsing semantics."""
44
45 items = pulldom.parseString(SMALL_SAMPLE)
46 evt, node = next(items)
47 # Just check the node is a Document:
48 self.assertTrue(hasattr(node, "createElement"))
49 self.assertEqual(pulldom.START_DOCUMENT, evt)
50 evt, node = next(items)
51 self.assertEqual(pulldom.START_ELEMENT, evt)
52 self.assertEqual("html", node.tagName)
53 self.assertEqual(2, len(node.attributes))
54 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
55 "http://www.xml.com/books")
56 evt, node = next(items)
57 self.assertEqual(pulldom.CHARACTERS, evt) # Line break
58 evt, node = next(items)
59 # XXX - A comment should be reported here!
60 # self.assertEqual(pulldom.COMMENT, evt)
61 # Line break after swallowed comment:
62 self.assertEqual(pulldom.CHARACTERS, evt)
63 evt, node = next(items)
64 self.assertEqual("title", node.tagName)
65 title_node = node
66 evt, node = next(items)
67 self.assertEqual(pulldom.CHARACTERS, evt)
68 self.assertEqual("Introduction to XSL", node.data)
69 evt, node = next(items)
70 self.assertEqual(pulldom.END_ELEMENT, evt)
71 self.assertEqual("title", node.tagName)
72 self.assertTrue(title_node is node)
73 evt, node = next(items)
74 self.assertEqual(pulldom.CHARACTERS, evt)
75 evt, node = next(items)
76 self.assertEqual(pulldom.START_ELEMENT, evt)
77 self.assertEqual("hr", node.tagName)
78 evt, node = next(items)
79 self.assertEqual(pulldom.END_ELEMENT, evt)
80 self.assertEqual("hr", node.tagName)
81 evt, node = next(items)
82 self.assertEqual(pulldom.CHARACTERS, evt)
83 evt, node = next(items)
84 self.assertEqual(pulldom.START_ELEMENT, evt)
85 self.assertEqual("p", node.tagName)
86 evt, node = next(items)
87 self.assertEqual(pulldom.START_ELEMENT, evt)
88 self.assertEqual("xdc:author", node.tagName)
89 evt, node = next(items)
90 self.assertEqual(pulldom.CHARACTERS, evt)
91 evt, node = next(items)
92 self.assertEqual(pulldom.END_ELEMENT, evt)
93 self.assertEqual("xdc:author", node.tagName)
94 evt, node = next(items)
95 self.assertEqual(pulldom.END_ELEMENT, evt)
96 evt, node = next(items)
97 self.assertEqual(pulldom.CHARACTERS, evt)
98 evt, node = next(items)
99 self.assertEqual(pulldom.END_ELEMENT, evt)
100 # XXX No END_DOCUMENT item is ever obtained:
101 #evt, node = next(items)
102 #self.assertEqual(pulldom.END_DOCUMENT, evt)
103
104 def test_expandItem(self):
105 """Ensure expandItem works as expected."""
106 items = pulldom.parseString(SMALL_SAMPLE)
107 # Loop through the nodes until we get to a "title" start tag:
108 for evt, item in items:
109 if evt == pulldom.START_ELEMENT and item.tagName == "title":
110 items.expandNode(item)
111 self.assertEqual(1, len(item.childNodes))
112 break
113 else:
114 self.fail("No \"title\" element detected in SMALL_SAMPLE!")
115 # Loop until we get to the next start-element:
116 for evt, node in items:
117 if evt == pulldom.START_ELEMENT:
118 break
119 self.assertEqual("hr", node.tagName,
120 "expandNode did not leave DOMEventStream in the correct state.")
121 # Attempt to expand a standalone element:
122 items.expandNode(node)
123 self.assertEqual(next(items)[0], pulldom.CHARACTERS)
124 evt, node = next(items)
125 self.assertEqual(node.tagName, "p")
126 items.expandNode(node)
127 next(items) # Skip character data
128 evt, node = next(items)
129 self.assertEqual(node.tagName, "html")
130 with self.assertRaises(StopIteration):
131 next(items)
132 items.clear()
133 self.assertIsNone(items.parser)
134 self.assertIsNone(items.stream)
135
136 @unittest.expectedFailure
137 def test_comment(self):
138 """PullDOM does not receive "comment" events."""
139 items = pulldom.parseString(SMALL_SAMPLE)
140 for evt, _ in items:
141 if evt == pulldom.COMMENT:
142 break
143 else:
144 self.fail("No comment was encountered")
145
146 @unittest.expectedFailure
147 def test_end_document(self):
148 """PullDOM does not receive "end-document" events."""
149 items = pulldom.parseString(SMALL_SAMPLE)
150 # Read all of the nodes up to and including </html>:
151 for evt, node in items:
152 if evt == pulldom.END_ELEMENT and node.tagName == "html":
153 break
154 try:
155 # Assert that the next node is END_DOCUMENT:
156 evt, node = next(items)
157 self.assertEqual(pulldom.END_DOCUMENT, evt)
158 except StopIteration:
159 self.fail(
160 "Ran out of events, but should have received END_DOCUMENT")
161
162
163class ThoroughTestCase(unittest.TestCase):
164 """Test the hard-to-reach parts of pulldom."""
165
166 def test_thorough_parse(self):
167 """Test some of the hard-to-reach parts of PullDOM."""
168 self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
169
170 @unittest.expectedFailure
171 def test_sax2dom_fail(self):
172 """SAX2DOM can"t handle a PI before the root element."""
173 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
174 self._test_thorough(pd)
175
176 def test_thorough_sax2dom(self):
177 """Test some of the hard-to-reach parts of SAX2DOM."""
178 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
179 self._test_thorough(pd, False)
180
181 def _test_thorough(self, pd, before_root=True):
182 """Test some of the hard-to-reach parts of the parser, using a mock
183 parser."""
184
185 evt, node = next(pd)
186 self.assertEqual(pulldom.START_DOCUMENT, evt)
187 # Just check the node is a Document:
188 self.assertTrue(hasattr(node, "createElement"))
189
190 if before_root:
191 evt, node = next(pd)
192 self.assertEqual(pulldom.COMMENT, evt)
193 self.assertEqual("a comment", node.data)
194 evt, node = next(pd)
195 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
196 self.assertEqual("target", node.target)
197 self.assertEqual("data", node.data)
198
199 evt, node = next(pd)
200 self.assertEqual(pulldom.START_ELEMENT, evt)
201 self.assertEqual("html", node.tagName)
202
203 evt, node = next(pd)
204 self.assertEqual(pulldom.COMMENT, evt)
205 self.assertEqual("a comment", node.data)
206 evt, node = next(pd)
207 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
208 self.assertEqual("target", node.target)
209 self.assertEqual("data", node.data)
210
211 evt, node = next(pd)
212 self.assertEqual(pulldom.START_ELEMENT, evt)
213 self.assertEqual("p", node.tagName)
214
215 evt, node = next(pd)
216 self.assertEqual(pulldom.CHARACTERS, evt)
217 self.assertEqual("text", node.data)
218 evt, node = next(pd)
219 self.assertEqual(pulldom.END_ELEMENT, evt)
220 self.assertEqual("p", node.tagName)
221 evt, node = next(pd)
222 self.assertEqual(pulldom.END_ELEMENT, evt)
223 self.assertEqual("html", node.tagName)
224 evt, node = next(pd)
225 self.assertEqual(pulldom.END_DOCUMENT, evt)
226
227
228class SAXExerciser(object):
229 """A fake sax parser that calls some of the harder-to-reach sax methods to
230 ensure it emits the correct events"""
231
232 def setContentHandler(self, handler):
233 self._handler = handler
234
235 def parse(self, _):
236 h = self._handler
237 h.startDocument()
238
239 # The next two items ensure that items preceding the first
240 # start_element are properly stored and emitted:
241 h.comment("a comment")
242 h.processingInstruction("target", "data")
243
244 h.startElement("html", AttributesImpl({}))
245
246 h.comment("a comment")
247 h.processingInstruction("target", "data")
248
249 h.startElement("p", AttributesImpl({"class": "paraclass"}))
250 h.characters("text")
251 h.endElement("p")
252 h.endElement("html")
253 h.endDocument()
254
255 def stub(self, *args, **kwargs):
256 """Stub method. Does nothing."""
257 pass
258 setProperty = stub
259 setFeature = stub
260
261
262class SAX2DOMExerciser(SAXExerciser):
263 """The same as SAXExerciser, but without the processing instruction and
264 comment before the root element, because S2D can"t handle it"""
265
266 def parse(self, _):
267 h = self._handler
268 h.startDocument()
269 h.startElement("html", AttributesImpl({}))
270 h.comment("a comment")
271 h.processingInstruction("target", "data")
272 h.startElement("p", AttributesImpl({"class": "paraclass"}))
273 h.characters("text")
274 h.endElement("p")
275 h.endElement("html")
276 h.endDocument()
277
278
279class SAX2DOMTestHelper(pulldom.DOMEventStream):
280 """Allows us to drive SAX2DOM from a DOMEventStream."""
281
282 def reset(self):
283 self.pulldom = pulldom.SAX2DOM()
284 # This content handler relies on namespace support
285 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
286 self.parser.setContentHandler(self.pulldom)
287
288
289class SAX2DOMTestCase(unittest.TestCase):
290
291 def confirm(self, test, testname="Test"):
292 self.assertTrue(test, testname)
293
294 def test_basic(self):
295 """Ensure SAX2DOM can parse from a stream."""
296 with io.StringIO(SMALL_SAMPLE) as fin:
297 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
298 len(SMALL_SAMPLE))
299 for evt, node in sd:
300 if evt == pulldom.START_ELEMENT and node.tagName == "html":
301 break
302 # Because the buffer is the same length as the XML, all the
303 # nodes should have been parsed and added:
304 self.assertGreater(len(node.childNodes), 0)
305
306 def testSAX2DOM(self):
307 """Ensure SAX2DOM expands nodes as expected."""
308 sax2dom = pulldom.SAX2DOM()
309 sax2dom.startDocument()
310 sax2dom.startElement("doc", {})
311 sax2dom.characters("text")
312 sax2dom.startElement("subelm", {})
313 sax2dom.characters("text")
314 sax2dom.endElement("subelm")
315 sax2dom.characters("text")
316 sax2dom.endElement("doc")
317 sax2dom.endDocument()
318
319 doc = sax2dom.document
320 root = doc.documentElement
321 (text1, elm1, text2) = root.childNodes
322 text3 = elm1.childNodes[0]
323
324 self.assertIsNone(text1.previousSibling)
325 self.assertIs(text1.nextSibling, elm1)
326 self.assertIs(elm1.previousSibling, text1)
327 self.assertIs(elm1.nextSibling, text2)
328 self.assertIs(text2.previousSibling, elm1)
329 self.assertIsNone(text2.nextSibling)
330 self.assertIsNone(text3.previousSibling)
331 self.assertIsNone(text3.nextSibling)
332
333 self.assertIs(root.parentNode, doc)
334 self.assertIs(text1.parentNode, root)
335 self.assertIs(elm1.parentNode, root)
336 self.assertIs(text2.parentNode, root)
337 self.assertIs(text3.parentNode, elm1)
338 doc.unlink()
339
340
Benjamin Peterson863a0c32011-03-02 23:40:36 +0000341if __name__ == "__main__":
Zachary Ware38c707e2015-04-13 15:00:43 -0500342 unittest.main()