blob: a6f09949ffd8ba6a98946e3ffd314a6dd88d830e [file] [log] [blame]
Tim Peters3230d5c2001-07-11 22:21:17 +00001'''
2Tests for fileinput module.
3Nick Mathewson
4'''
5
Collin Winterbf618562007-04-07 04:40:43 +00006import unittest
7from test.test_support import verbose, TESTFN, run_unittest
Serhiy Storchaka69b7f812016-03-08 18:35:45 +02008from test.test_support import unlink as safe_unlink, check_warnings
Christian Heimesc5f05e42008-02-23 17:40:11 +00009import sys, re
Tim Peters3230d5c2001-07-11 22:21:17 +000010from StringIO import StringIO
Georg Brandlc98eeed2006-02-19 14:57:47 +000011from fileinput import FileInput, hook_encoded
Tim Peters3230d5c2001-07-11 22:21:17 +000012
13# The fileinput module has 2 interfaces: the FileInput class which does
14# all the work, and a few functions (input, etc.) that use a global _state
15# variable. We only test the FileInput class, since the other functions
16# only provide a thin facade over FileInput.
17
18# Write lines (a list of lines) to temp file number i, and return the
19# temp file's name.
Tim Peters4d7cad12006-02-19 21:22:10 +000020def writeTmp(i, lines, mode='w'): # opening in text mode is the default
Tim Peters3230d5c2001-07-11 22:21:17 +000021 name = TESTFN + str(i)
Tim Peters4d7cad12006-02-19 21:22:10 +000022 f = open(name, mode)
Tim Peters3230d5c2001-07-11 22:21:17 +000023 f.writelines(lines)
24 f.close()
25 return name
26
Tim Peters3230d5c2001-07-11 22:21:17 +000027def remove_tempfiles(*names):
28 for name in names:
Collin Winterbf618562007-04-07 04:40:43 +000029 safe_unlink(name)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +000030
Serhiy Storchaka69b7f812016-03-08 18:35:45 +020031class LineReader:
32
33 def __init__(self):
34 self._linesread = []
35
36 @property
37 def linesread(self):
38 try:
39 return self._linesread[:]
40 finally:
41 self._linesread = []
42
43 def openhook(self, filename, mode):
44 self.it = iter(filename.splitlines(True))
45 return self
46
47 def readline(self, size=None):
48 line = next(self.it, '')
49 self._linesread.append(line)
50 return line
51
52 def readlines(self, hint=-1):
53 lines = []
54 size = 0
55 while True:
56 line = self.readline()
57 if not line:
58 return lines
59 lines.append(line)
60 size += len(line)
61 if size >= hint:
62 return lines
63
64 def close(self):
65 pass
66
Collin Winterbf618562007-04-07 04:40:43 +000067class BufferSizesTests(unittest.TestCase):
68 def test_buffer_sizes(self):
69 # First, run the tests with default and teeny buffer size.
70 for round, bs in (0, 0), (1, 30):
71 try:
72 t1 = writeTmp(1, ["Line %s of file 1\n" % (i+1) for i in range(15)])
73 t2 = writeTmp(2, ["Line %s of file 2\n" % (i+1) for i in range(10)])
74 t3 = writeTmp(3, ["Line %s of file 3\n" % (i+1) for i in range(5)])
75 t4 = writeTmp(4, ["Line %s of file 4\n" % (i+1) for i in range(1)])
76 self.buffer_size_test(t1, t2, t3, t4, bs, round)
77 finally:
78 remove_tempfiles(t1, t2, t3, t4)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +000079
Collin Winterbf618562007-04-07 04:40:43 +000080 def buffer_size_test(self, t1, t2, t3, t4, bs=0, round=0):
81 pat = re.compile(r'LINE (\d+) OF FILE (\d+)')
Neal Norwitz0d4c06e2007-04-25 06:30:05 +000082
Collin Winterbf618562007-04-07 04:40:43 +000083 start = 1 + round*6
84 if verbose:
85 print '%s. Simple iteration (bs=%s)' % (start+0, bs)
86 fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
Tim Peters3230d5c2001-07-11 22:21:17 +000087 lines = list(fi)
Tim Peters3230d5c2001-07-11 22:21:17 +000088 fi.close()
Collin Winterbf618562007-04-07 04:40:43 +000089 self.assertEqual(len(lines), 31)
90 self.assertEqual(lines[4], 'Line 5 of file 1\n')
91 self.assertEqual(lines[30], 'Line 1 of file 4\n')
92 self.assertEqual(fi.lineno(), 31)
93 self.assertEqual(fi.filename(), t4)
Tim Peters3230d5c2001-07-11 22:21:17 +000094
Collin Winterbf618562007-04-07 04:40:43 +000095 if verbose:
96 print '%s. Status variables (bs=%s)' % (start+1, bs)
97 fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
98 s = "x"
99 while s and s != 'Line 6 of file 2\n':
100 s = fi.readline()
101 self.assertEqual(fi.filename(), t2)
102 self.assertEqual(fi.lineno(), 21)
103 self.assertEqual(fi.filelineno(), 6)
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000104 self.assertFalse(fi.isfirstline())
105 self.assertFalse(fi.isstdin())
Tim Peters3230d5c2001-07-11 22:21:17 +0000106
Collin Winterbf618562007-04-07 04:40:43 +0000107 if verbose:
108 print '%s. Nextfile (bs=%s)' % (start+2, bs)
109 fi.nextfile()
110 self.assertEqual(fi.readline(), 'Line 1 of file 3\n')
111 self.assertEqual(fi.lineno(), 22)
112 fi.close()
Tim Peters3230d5c2001-07-11 22:21:17 +0000113
Collin Winterbf618562007-04-07 04:40:43 +0000114 if verbose:
115 print '%s. Stdin (bs=%s)' % (start+3, bs)
116 fi = FileInput(files=(t1, t2, t3, t4, '-'), bufsize=bs)
117 savestdin = sys.stdin
118 try:
119 sys.stdin = StringIO("Line 1 of stdin\nLine 2 of stdin\n")
120 lines = list(fi)
121 self.assertEqual(len(lines), 33)
122 self.assertEqual(lines[32], 'Line 2 of stdin\n')
123 self.assertEqual(fi.filename(), '<stdin>')
124 fi.nextfile()
125 finally:
126 sys.stdin = savestdin
Tim Peters3230d5c2001-07-11 22:21:17 +0000127
Collin Winterbf618562007-04-07 04:40:43 +0000128 if verbose:
129 print '%s. Boundary conditions (bs=%s)' % (start+4, bs)
130 fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
131 self.assertEqual(fi.lineno(), 0)
132 self.assertEqual(fi.filename(), None)
133 fi.nextfile()
134 self.assertEqual(fi.lineno(), 0)
135 self.assertEqual(fi.filename(), None)
Tim Peters3230d5c2001-07-11 22:21:17 +0000136
Collin Winterbf618562007-04-07 04:40:43 +0000137 if verbose:
138 print '%s. Inplace (bs=%s)' % (start+5, bs)
139 savestdout = sys.stdout
140 try:
141 fi = FileInput(files=(t1, t2, t3, t4), inplace=1, bufsize=bs)
142 for line in fi:
143 line = line[:-1].upper()
144 print line
145 fi.close()
146 finally:
147 sys.stdout = savestdout
Tim Peters3230d5c2001-07-11 22:21:17 +0000148
Collin Winterbf618562007-04-07 04:40:43 +0000149 fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
150 for line in fi:
151 self.assertEqual(line[-1], '\n')
152 m = pat.match(line[:-1])
153 self.assertNotEqual(m, None)
154 self.assertEqual(int(m.group(1)), fi.filelineno())
155 fi.close()
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000156
Collin Winterbf618562007-04-07 04:40:43 +0000157class FileInputTests(unittest.TestCase):
158 def test_zero_byte_files(self):
159 try:
160 t1 = writeTmp(1, [""])
161 t2 = writeTmp(2, [""])
162 t3 = writeTmp(3, ["The only line there is.\n"])
163 t4 = writeTmp(4, [""])
164 fi = FileInput(files=(t1, t2, t3, t4))
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000165
Collin Winterbf618562007-04-07 04:40:43 +0000166 line = fi.readline()
167 self.assertEqual(line, 'The only line there is.\n')
168 self.assertEqual(fi.lineno(), 1)
169 self.assertEqual(fi.filelineno(), 1)
170 self.assertEqual(fi.filename(), t3)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000171
Collin Winterbf618562007-04-07 04:40:43 +0000172 line = fi.readline()
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000173 self.assertFalse(line)
Collin Winterbf618562007-04-07 04:40:43 +0000174 self.assertEqual(fi.lineno(), 1)
175 self.assertEqual(fi.filelineno(), 0)
176 self.assertEqual(fi.filename(), t4)
177 fi.close()
178 finally:
179 remove_tempfiles(t1, t2, t3, t4)
Georg Brandle4662172006-02-19 09:51:27 +0000180
Collin Winterbf618562007-04-07 04:40:43 +0000181 def test_files_that_dont_end_with_newline(self):
182 try:
183 t1 = writeTmp(1, ["A\nB\nC"])
184 t2 = writeTmp(2, ["D\nE\nF"])
185 fi = FileInput(files=(t1, t2))
186 lines = list(fi)
187 self.assertEqual(lines, ["A\n", "B\n", "C", "D\n", "E\n", "F"])
188 self.assertEqual(fi.filelineno(), 3)
189 self.assertEqual(fi.lineno(), 6)
190 finally:
191 remove_tempfiles(t1, t2)
Georg Brandl67e9fb92006-02-19 13:56:17 +0000192
Collin Winterbf618562007-04-07 04:40:43 +0000193 def test_unicode_filenames(self):
194 try:
195 t1 = writeTmp(1, ["A\nB"])
196 encoding = sys.getfilesystemencoding()
197 if encoding is None:
198 encoding = 'ascii'
199 fi = FileInput(files=unicode(t1, encoding))
200 lines = list(fi)
201 self.assertEqual(lines, ["A\n", "B"])
202 finally:
203 remove_tempfiles(t1)
Georg Brandlc029f872006-02-19 14:12:34 +0000204
Collin Winterbf618562007-04-07 04:40:43 +0000205 def test_fileno(self):
206 try:
207 t1 = writeTmp(1, ["A\nB"])
208 t2 = writeTmp(2, ["C\nD"])
209 fi = FileInput(files=(t1, t2))
210 self.assertEqual(fi.fileno(), -1)
211 line = fi.next()
212 self.assertNotEqual(fi.fileno(), -1)
213 fi.nextfile()
214 self.assertEqual(fi.fileno(), -1)
215 line = list(fi)
216 self.assertEqual(fi.fileno(), -1)
217 finally:
218 remove_tempfiles(t1, t2)
Georg Brandlc98eeed2006-02-19 14:57:47 +0000219
Collin Winterbf618562007-04-07 04:40:43 +0000220 def test_opening_mode(self):
221 try:
222 # invalid mode, should raise ValueError
223 fi = FileInput(mode="w")
224 self.fail("FileInput should reject invalid mode argument")
225 except ValueError:
226 pass
227 try:
228 # try opening in universal newline mode
229 t1 = writeTmp(1, ["A\nB\r\nC\rD"], mode="wb")
230 fi = FileInput(files=t1, mode="U")
231 lines = list(fi)
232 self.assertEqual(lines, ["A\n", "B\n", "C\n", "D"])
233 finally:
234 remove_tempfiles(t1)
235
236 def test_file_opening_hook(self):
237 try:
238 # cannot use openhook and inplace mode
239 fi = FileInput(inplace=1, openhook=lambda f,m: None)
240 self.fail("FileInput should raise if both inplace "
241 "and openhook arguments are given")
242 except ValueError:
243 pass
244 try:
245 fi = FileInput(openhook=1)
246 self.fail("FileInput should check openhook for being callable")
247 except ValueError:
248 pass
249 try:
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300250 # UTF-7 is a convenient, seldom used encoding
251 t1 = writeTmp(1, ['+AEE-\n+AEI-'], mode="wb")
252 fi = FileInput(files=t1, openhook=hook_encoded("utf-7"))
Collin Winterbf618562007-04-07 04:40:43 +0000253 lines = list(fi)
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300254 self.assertEqual(lines, [u'A\n', u'B'])
Collin Winterbf618562007-04-07 04:40:43 +0000255 finally:
256 remove_tempfiles(t1)
257
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200258 def test_readline(self):
259 with open(TESTFN, 'wb') as f:
260 f.write('A\nB\r\nC\r')
261 # Fill TextIOWrapper buffer.
262 f.write('123456789\n' * 1000)
263 # Issue #20501: readline() shouldn't read whole file.
264 f.write('\x80')
265 self.addCleanup(safe_unlink, TESTFN)
266
Serhiy Storchaka69b7f812016-03-08 18:35:45 +0200267 fi = FileInput(files=TESTFN, openhook=hook_encoded('ascii'))
Serhiy Storchakafd48a562014-03-03 21:16:27 +0200268 # The most likely failure is a UnicodeDecodeError due to the entire
269 # file being read when it shouldn't have been.
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200270 self.assertEqual(fi.readline(), u'A\n')
271 self.assertEqual(fi.readline(), u'B\r\n')
272 self.assertEqual(fi.readline(), u'C\r')
273 with self.assertRaises(UnicodeDecodeError):
274 # Read to the end of file.
275 list(fi)
276 fi.close()
277
Serhiy Storchaka69b7f812016-03-08 18:35:45 +0200278 def test_readline_buffering(self):
279 src = LineReader()
280 fi = FileInput(files=['line1\nline2', 'line3\n'], openhook=src.openhook)
281 self.assertEqual(src.linesread, [])
282 self.assertEqual(fi.readline(), 'line1\n')
283 self.assertEqual(src.linesread, ['line1\n'])
284 self.assertEqual(fi.readline(), 'line2')
285 self.assertEqual(src.linesread, ['line2'])
286 self.assertEqual(fi.readline(), 'line3\n')
287 self.assertEqual(src.linesread, ['', 'line3\n'])
288 self.assertEqual(fi.readline(), '')
289 self.assertEqual(src.linesread, [''])
290 self.assertEqual(fi.readline(), '')
291 self.assertEqual(src.linesread, [])
292 fi.close()
293
294 def test_iteration_buffering(self):
295 src = LineReader()
296 fi = FileInput(files=['line1\nline2', 'line3\n'], openhook=src.openhook)
297 self.assertEqual(src.linesread, [])
298 self.assertEqual(next(fi), 'line1\n')
299 self.assertEqual(src.linesread, ['line1\n'])
300 self.assertEqual(next(fi), 'line2')
301 self.assertEqual(src.linesread, ['line2'])
302 self.assertEqual(next(fi), 'line3\n')
303 self.assertEqual(src.linesread, ['', 'line3\n'])
304 self.assertRaises(StopIteration, next, fi)
305 self.assertEqual(src.linesread, [''])
306 self.assertRaises(StopIteration, next, fi)
307 self.assertEqual(src.linesread, [])
308 fi.close()
309
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200310class Test_hook_encoded(unittest.TestCase):
311 """Unit tests for fileinput.hook_encoded()"""
312
313 def test_modes(self):
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200314 with open(TESTFN, 'wb') as f:
Serhiy Storchakafd48a562014-03-03 21:16:27 +0200315 # UTF-7 is a convenient, seldom used encoding
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200316 f.write('A\nB\r\nC\rD+IKw-')
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200317 self.addCleanup(safe_unlink, TESTFN)
318
319 def check(mode, expected_lines):
320 fi = FileInput(files=TESTFN, mode=mode,
321 openhook=hook_encoded('utf-7'))
322 lines = list(fi)
323 fi.close()
324 self.assertEqual(lines, expected_lines)
325
326 check('r', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
327 check('rU', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
328 check('U', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
329 check('rb', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
330
Collin Winterbf618562007-04-07 04:40:43 +0000331def test_main():
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200332 run_unittest(BufferSizesTests, FileInputTests, Test_hook_encoded)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000333
Collin Winterbf618562007-04-07 04:40:43 +0000334if __name__ == "__main__":
335 test_main()