bpo-43712 : fileinput: Add encoding parameter (GH-25272)
diff --git a/Lib/fileinput.py b/Lib/fileinput.py
index 0c31f93..6218c4f 100644
--- a/Lib/fileinput.py
+++ b/Lib/fileinput.py
@@ -3,7 +3,7 @@
Typical use is:
import fileinput
- for line in fileinput.input():
+ for line in fileinput.input(encoding="utf-8"):
process(line)
This iterates over the lines of all files listed in sys.argv[1:],
@@ -63,15 +63,9 @@
deleted when the output file is closed. In-place filtering is
disabled when standard input is read. XXX The current implementation
does not work for MS-DOS 8+3 filesystems.
-
-XXX Possible additions:
-
-- optional getopt argument processing
-- isatty()
-- read(), read(size), even readlines()
-
"""
+import io
import sys, os
from types import GenericAlias
@@ -81,7 +75,8 @@
_state = None
-def input(files=None, inplace=False, backup="", *, mode="r", openhook=None):
+def input(files=None, inplace=False, backup="", *, mode="r", openhook=None,
+ encoding=None, errors=None):
"""Return an instance of the FileInput class, which can be iterated.
The parameters are passed to the constructor of the FileInput class.
@@ -91,7 +86,8 @@ def input(files=None, inplace=False, backup="", *, mode="r", openhook=None):
global _state
if _state and _state._file:
raise RuntimeError("input() already active")
- _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook)
+ _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook,
+ encoding=encoding, errors=errors)
return _state
def close():
@@ -186,7 +182,7 @@ class FileInput:
"""
def __init__(self, files=None, inplace=False, backup="", *,
- mode="r", openhook=None):
+ mode="r", openhook=None, encoding=None, errors=None):
if isinstance(files, str):
files = (files,)
elif isinstance(files, os.PathLike):
@@ -209,6 +205,16 @@ def __init__(self, files=None, inplace=False, backup="", *,
self._file = None
self._isstdin = False
self._backupfilename = None
+ self._encoding = encoding
+ self._errors = errors
+
+ # We can not use io.text_encoding() here because old openhook doesn't
+ # take encoding parameter.
+ if "b" not in mode and encoding is None and sys.flags.warn_default_encoding:
+ import warnings
+ warnings.warn("'encoding' argument not specified.",
+ EncodingWarning, 2)
+
# restrict mode argument to reading modes
if mode not in ('r', 'rU', 'U', 'rb'):
raise ValueError("FileInput opening mode must be one of "
@@ -362,9 +368,20 @@ def _readline(self):
else:
# This may raise OSError
if self._openhook:
- self._file = self._openhook(self._filename, self._mode)
+ # Custom hooks made previous to Python 3.10 didn't have
+ # encoding argument
+ if self._encoding is None:
+ self._file = self._openhook(self._filename, self._mode)
+ else:
+ self._file = self._openhook(
+ self._filename, self._mode, encoding=self._encoding, errors=self._errors)
else:
- self._file = open(self._filename, self._mode)
+ # EncodingWarning is emitted in __init__() already
+ if "b" not in self._mode:
+ encoding = self._encoding or "locale"
+ else:
+ encoding = None
+ self._file = open(self._filename, self._mode, encoding=encoding, errors=self._errors)
self._readline = self._file.readline # hide FileInput._readline
return self._readline()
@@ -395,16 +412,23 @@ def isstdin(self):
__class_getitem__ = classmethod(GenericAlias)
-def hook_compressed(filename, mode):
+def hook_compressed(filename, mode, *, encoding=None, errors=None):
+ if encoding is None: # EncodingWarning is emitted in FileInput() already.
+ encoding = "locale"
ext = os.path.splitext(filename)[1]
if ext == '.gz':
import gzip
- return gzip.open(filename, mode)
+ stream = gzip.open(filename, mode)
elif ext == '.bz2':
import bz2
- return bz2.BZ2File(filename, mode)
+ stream = bz2.BZ2File(filename, mode)
else:
- return open(filename, mode)
+ return open(filename, mode, encoding=encoding, errors=errors)
+
+ # gzip and bz2 are binary mode by default.
+ if "b" not in mode:
+ stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors)
+ return stream
def hook_encoded(encoding, errors=None):
diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py
index d5edf74..d01d3962 100644
--- a/Lib/test/test_fileinput.py
+++ b/Lib/test/test_fileinput.py
@@ -2,6 +2,7 @@
Tests for fileinput module.
Nick Mathewson
'''
+import io
import os
import sys
import re
@@ -238,7 +239,7 @@ def test_opening_mode(self):
# try opening in universal newline mode
t1 = self.writeTmp(b"A\nB\r\nC\rD", mode="wb")
with warnings_helper.check_warnings(('', DeprecationWarning)):
- fi = FileInput(files=t1, mode="U")
+ fi = FileInput(files=t1, mode="U", encoding="utf-8")
with warnings_helper.check_warnings(('', DeprecationWarning)):
lines = list(fi)
self.assertEqual(lines, ["A\n", "B\n", "C\n", "D"])
@@ -278,7 +279,7 @@ def test_file_opening_hook(self):
class CustomOpenHook:
def __init__(self):
self.invoked = False
- def __call__(self, *args):
+ def __call__(self, *args, **kargs):
self.invoked = True
return open(*args)
@@ -334,6 +335,14 @@ def test_inplace_binary_write_mode(self):
with open(temp_file, 'rb') as f:
self.assertEqual(f.read(), b'New line.')
+ def test_file_hook_backward_compatibility(self):
+ def old_hook(filename, mode):
+ return io.StringIO("I used to receive only filename and mode")
+ t = self.writeTmp("\n")
+ with FileInput([t], openhook=old_hook) as fi:
+ result = fi.readline()
+ self.assertEqual(result, "I used to receive only filename and mode")
+
def test_context_manager(self):
t1 = self.writeTmp("A\nB\nC")
t2 = self.writeTmp("D\nE\nF")
@@ -529,12 +538,14 @@ class MockFileInput:
"""A class that mocks out fileinput.FileInput for use during unit tests"""
def __init__(self, files=None, inplace=False, backup="", *,
- mode="r", openhook=None):
+ mode="r", openhook=None, encoding=None, errors=None):
self.files = files
self.inplace = inplace
self.backup = backup
self.mode = mode
self.openhook = openhook
+ self.encoding = encoding
+ self.errors = errors
self._file = None
self.invocation_counts = collections.defaultdict(lambda: 0)
self.return_values = {}
@@ -637,10 +648,11 @@ def do_test_call_input(self):
backup = object()
mode = object()
openhook = object()
+ encoding = object()
# call fileinput.input() with different values for each argument
result = fileinput.input(files=files, inplace=inplace, backup=backup,
- mode=mode, openhook=openhook)
+ mode=mode, openhook=openhook, encoding=encoding)
# ensure fileinput._state was set to the returned object
self.assertIs(result, fileinput._state, "fileinput._state")
@@ -863,11 +875,15 @@ def test_state_is_not_None(self):
self.assertIs(fileinput._state, instance)
class InvocationRecorder:
+
def __init__(self):
self.invocation_count = 0
+
def __call__(self, *args, **kwargs):
self.invocation_count += 1
self.last_invocation = (args, kwargs)
+ return io.BytesIO(b'some bytes')
+
class Test_hook_compressed(unittest.TestCase):
"""Unit tests for fileinput.hook_compressed()"""
@@ -886,33 +902,43 @@ def test_gz_ext_fake(self):
original_open = gzip.open
gzip.open = self.fake_open
try:
- result = fileinput.hook_compressed("test.gz", 3)
+ result = fileinput.hook_compressed("test.gz", "3")
finally:
gzip.open = original_open
self.assertEqual(self.fake_open.invocation_count, 1)
- self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {}))
+ self.assertEqual(self.fake_open.last_invocation, (("test.gz", "3"), {}))
+
+ @unittest.skipUnless(gzip, "Requires gzip and zlib")
+ def test_gz_with_encoding_fake(self):
+ original_open = gzip.open
+ gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string')
+ try:
+ result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8")
+ finally:
+ gzip.open = original_open
+ self.assertEqual(list(result), ['Ex-binary string'])
@unittest.skipUnless(bz2, "Requires bz2")
def test_bz2_ext_fake(self):
original_open = bz2.BZ2File
bz2.BZ2File = self.fake_open
try:
- result = fileinput.hook_compressed("test.bz2", 4)
+ result = fileinput.hook_compressed("test.bz2", "4")
finally:
bz2.BZ2File = original_open
self.assertEqual(self.fake_open.invocation_count, 1)
- self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {}))
+ self.assertEqual(self.fake_open.last_invocation, (("test.bz2", "4"), {}))
def test_blah_ext(self):
- self.do_test_use_builtin_open("abcd.blah", 5)
+ self.do_test_use_builtin_open("abcd.blah", "5")
def test_gz_ext_builtin(self):
- self.do_test_use_builtin_open("abcd.Gz", 6)
+ self.do_test_use_builtin_open("abcd.Gz", "6")
def test_bz2_ext_builtin(self):
- self.do_test_use_builtin_open("abcd.Bz2", 7)
+ self.do_test_use_builtin_open("abcd.Bz2", "7")
def do_test_use_builtin_open(self, filename, mode):
original_open = self.replace_builtin_open(self.fake_open)
@@ -923,7 +949,7 @@ def do_test_use_builtin_open(self, filename, mode):
self.assertEqual(self.fake_open.invocation_count, 1)
self.assertEqual(self.fake_open.last_invocation,
- ((filename, mode), {}))
+ ((filename, mode), {'encoding': 'locale', 'errors': None}))
@staticmethod
def replace_builtin_open(new_open_func):