Issue #2175: SAX parsers now support a character stream of InputSource object.
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index c8d5b21..813dc2e 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -185,12 +185,24 @@
def make_byte_stream(self):
return BytesIO(b"This is a byte stream.")
+ def make_character_stream(self):
+ return StringIO("This is a character stream.")
+
def checkContent(self, stream, content):
self.assertIsNotNone(stream)
self.assertEqual(stream.read(), content)
stream.close()
+ def test_character_stream(self):
+ # If the source is an InputSource with a character stream, use it.
+ src = InputSource(self.file)
+ src.setCharacterStream(self.make_character_stream())
+ prep = prepare_input_source(src)
+ self.assertIsNone(prep.getByteStream())
+ self.checkContent(prep.getCharacterStream(),
+ "This is a character stream.")
+
def test_byte_stream(self):
# If the source is an InputSource that does not have a character
# stream but does have a byte stream, use the byte stream.
@@ -225,6 +237,14 @@
self.checkContent(prep.getByteStream(),
b"This is a byte stream.")
+ def test_text_file(self):
+ # If the source is a text file-like object, use it as a character
+ # stream.
+ prep = prepare_input_source(self.make_character_stream())
+ self.assertIsNone(prep.getByteStream())
+ self.checkContent(prep.getCharacterStream(),
+ "This is a character stream.")
+
# ===== XMLGenerator
@@ -904,6 +924,19 @@
self.assertEqual(result.getvalue(), xml_test_out)
+ def test_expat_inpsource_character_stream(self):
+ parser = create_parser()
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
+ inpsrc.setCharacterStream(f)
+ parser.parse(inpsrc)
+
+ self.assertEqual(result.getvalue(), xml_test_out)
+
# ===== IncrementalParser support
def test_expat_incremental(self):
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index a227cda..65ac7e3 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -219,9 +219,14 @@
self._parsing = 0
# break cycle created by expat handlers pointing to our methods
self._parser = None
- bs = self._source.getByteStream()
- if bs is not None:
- bs.close()
+ try:
+ file = self._source.getCharacterStream()
+ if file is not None:
+ file.close()
+ finally:
+ file = self._source.getByteStream()
+ if file is not None:
+ file.close()
def _reset_cont_handler(self):
self._parser.ProcessingInstructionHandler = \
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py
index 1d3d0ec..a69c7f7 100644
--- a/Lib/xml/sax/saxutils.py
+++ b/Lib/xml/sax/saxutils.py
@@ -345,11 +345,14 @@
elif hasattr(source, "read"):
f = source
source = xmlreader.InputSource()
- source.setByteStream(f)
+ if isinstance(f.read(0), str):
+ source.setCharacterStream(f)
+ else:
+ source.setByteStream(f)
if hasattr(f, "name") and isinstance(f.name, str):
source.setSystemId(f.name)
- if source.getByteStream() is None:
+ if source.getCharacterStream() is None and source.getByteStream() is None:
sysid = source.getSystemId()
basehead = os.path.dirname(os.path.normpath(base))
sysidfilename = os.path.join(basehead, sysid)
diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py
index 7ef497f..716f228 100644
--- a/Lib/xml/sax/xmlreader.py
+++ b/Lib/xml/sax/xmlreader.py
@@ -117,7 +117,9 @@
source = saxutils.prepare_input_source(source)
self.prepareParser(source)
- file = source.getByteStream()
+ file = source.getCharacterStream()
+ if file is None:
+ file = source.getByteStream()
buffer = file.read(self._bufsize)
while buffer:
self.feed(buffer)