blob: c15ad847bbc3fb0083157499548cee78268b8023 [file] [log] [blame]
Tim Peters3230d5c2001-07-11 22:21:17 +00001'''
2Tests for fileinput module.
3Nick Mathewson
4'''
5
Collin Winterbf618562007-04-07 04:40:43 +00006import unittest
7from test.test_support import verbose, TESTFN, run_unittest
8from test.test_support import unlink as safe_unlink
Christian Heimesc5f05e42008-02-23 17:40:11 +00009import sys, re
Tim Peters3230d5c2001-07-11 22:21:17 +000010from StringIO import StringIO
Georg Brandlc98eeed2006-02-19 14:57:47 +000011from fileinput import FileInput, hook_encoded
Tim Peters3230d5c2001-07-11 22:21:17 +000012
13# The fileinput module has 2 interfaces: the FileInput class which does
14# all the work, and a few functions (input, etc.) that use a global _state
15# variable. We only test the FileInput class, since the other functions
16# only provide a thin facade over FileInput.
17
18# Write lines (a list of lines) to temp file number i, and return the
19# temp file's name.
Tim Peters4d7cad12006-02-19 21:22:10 +000020def writeTmp(i, lines, mode='w'): # opening in text mode is the default
Tim Peters3230d5c2001-07-11 22:21:17 +000021 name = TESTFN + str(i)
Tim Peters4d7cad12006-02-19 21:22:10 +000022 f = open(name, mode)
Tim Peters3230d5c2001-07-11 22:21:17 +000023 f.writelines(lines)
24 f.close()
25 return name
26
Tim Peters3230d5c2001-07-11 22:21:17 +000027def remove_tempfiles(*names):
28 for name in names:
Collin Winterbf618562007-04-07 04:40:43 +000029 safe_unlink(name)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +000030
Collin Winterbf618562007-04-07 04:40:43 +000031class BufferSizesTests(unittest.TestCase):
32 def test_buffer_sizes(self):
33 # First, run the tests with default and teeny buffer size.
34 for round, bs in (0, 0), (1, 30):
35 try:
36 t1 = writeTmp(1, ["Line %s of file 1\n" % (i+1) for i in range(15)])
37 t2 = writeTmp(2, ["Line %s of file 2\n" % (i+1) for i in range(10)])
38 t3 = writeTmp(3, ["Line %s of file 3\n" % (i+1) for i in range(5)])
39 t4 = writeTmp(4, ["Line %s of file 4\n" % (i+1) for i in range(1)])
40 self.buffer_size_test(t1, t2, t3, t4, bs, round)
41 finally:
42 remove_tempfiles(t1, t2, t3, t4)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +000043
Collin Winterbf618562007-04-07 04:40:43 +000044 def buffer_size_test(self, t1, t2, t3, t4, bs=0, round=0):
45 pat = re.compile(r'LINE (\d+) OF FILE (\d+)')
Neal Norwitz0d4c06e2007-04-25 06:30:05 +000046
Collin Winterbf618562007-04-07 04:40:43 +000047 start = 1 + round*6
48 if verbose:
49 print '%s. Simple iteration (bs=%s)' % (start+0, bs)
50 fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
Tim Peters3230d5c2001-07-11 22:21:17 +000051 lines = list(fi)
Tim Peters3230d5c2001-07-11 22:21:17 +000052 fi.close()
Collin Winterbf618562007-04-07 04:40:43 +000053 self.assertEqual(len(lines), 31)
54 self.assertEqual(lines[4], 'Line 5 of file 1\n')
55 self.assertEqual(lines[30], 'Line 1 of file 4\n')
56 self.assertEqual(fi.lineno(), 31)
57 self.assertEqual(fi.filename(), t4)
Tim Peters3230d5c2001-07-11 22:21:17 +000058
Collin Winterbf618562007-04-07 04:40:43 +000059 if verbose:
60 print '%s. Status variables (bs=%s)' % (start+1, bs)
61 fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
62 s = "x"
63 while s and s != 'Line 6 of file 2\n':
64 s = fi.readline()
65 self.assertEqual(fi.filename(), t2)
66 self.assertEqual(fi.lineno(), 21)
67 self.assertEqual(fi.filelineno(), 6)
Benjamin Peterson5c8da862009-06-30 22:57:08 +000068 self.assertFalse(fi.isfirstline())
69 self.assertFalse(fi.isstdin())
Tim Peters3230d5c2001-07-11 22:21:17 +000070
Collin Winterbf618562007-04-07 04:40:43 +000071 if verbose:
72 print '%s. Nextfile (bs=%s)' % (start+2, bs)
73 fi.nextfile()
74 self.assertEqual(fi.readline(), 'Line 1 of file 3\n')
75 self.assertEqual(fi.lineno(), 22)
76 fi.close()
Tim Peters3230d5c2001-07-11 22:21:17 +000077
Collin Winterbf618562007-04-07 04:40:43 +000078 if verbose:
79 print '%s. Stdin (bs=%s)' % (start+3, bs)
80 fi = FileInput(files=(t1, t2, t3, t4, '-'), bufsize=bs)
81 savestdin = sys.stdin
82 try:
83 sys.stdin = StringIO("Line 1 of stdin\nLine 2 of stdin\n")
84 lines = list(fi)
85 self.assertEqual(len(lines), 33)
86 self.assertEqual(lines[32], 'Line 2 of stdin\n')
87 self.assertEqual(fi.filename(), '<stdin>')
88 fi.nextfile()
89 finally:
90 sys.stdin = savestdin
Tim Peters3230d5c2001-07-11 22:21:17 +000091
Collin Winterbf618562007-04-07 04:40:43 +000092 if verbose:
93 print '%s. Boundary conditions (bs=%s)' % (start+4, bs)
94 fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
95 self.assertEqual(fi.lineno(), 0)
96 self.assertEqual(fi.filename(), None)
97 fi.nextfile()
98 self.assertEqual(fi.lineno(), 0)
99 self.assertEqual(fi.filename(), None)
Tim Peters3230d5c2001-07-11 22:21:17 +0000100
Collin Winterbf618562007-04-07 04:40:43 +0000101 if verbose:
102 print '%s. Inplace (bs=%s)' % (start+5, bs)
103 savestdout = sys.stdout
104 try:
105 fi = FileInput(files=(t1, t2, t3, t4), inplace=1, bufsize=bs)
106 for line in fi:
107 line = line[:-1].upper()
108 print line
109 fi.close()
110 finally:
111 sys.stdout = savestdout
Tim Peters3230d5c2001-07-11 22:21:17 +0000112
Collin Winterbf618562007-04-07 04:40:43 +0000113 fi = FileInput(files=(t1, t2, t3, t4), bufsize=bs)
114 for line in fi:
115 self.assertEqual(line[-1], '\n')
116 m = pat.match(line[:-1])
117 self.assertNotEqual(m, None)
118 self.assertEqual(int(m.group(1)), fi.filelineno())
119 fi.close()
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000120
Collin Winterbf618562007-04-07 04:40:43 +0000121class FileInputTests(unittest.TestCase):
122 def test_zero_byte_files(self):
123 try:
124 t1 = writeTmp(1, [""])
125 t2 = writeTmp(2, [""])
126 t3 = writeTmp(3, ["The only line there is.\n"])
127 t4 = writeTmp(4, [""])
128 fi = FileInput(files=(t1, t2, t3, t4))
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000129
Collin Winterbf618562007-04-07 04:40:43 +0000130 line = fi.readline()
131 self.assertEqual(line, 'The only line there is.\n')
132 self.assertEqual(fi.lineno(), 1)
133 self.assertEqual(fi.filelineno(), 1)
134 self.assertEqual(fi.filename(), t3)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000135
Collin Winterbf618562007-04-07 04:40:43 +0000136 line = fi.readline()
Benjamin Peterson5c8da862009-06-30 22:57:08 +0000137 self.assertFalse(line)
Collin Winterbf618562007-04-07 04:40:43 +0000138 self.assertEqual(fi.lineno(), 1)
139 self.assertEqual(fi.filelineno(), 0)
140 self.assertEqual(fi.filename(), t4)
141 fi.close()
142 finally:
143 remove_tempfiles(t1, t2, t3, t4)
Georg Brandle4662172006-02-19 09:51:27 +0000144
Collin Winterbf618562007-04-07 04:40:43 +0000145 def test_files_that_dont_end_with_newline(self):
146 try:
147 t1 = writeTmp(1, ["A\nB\nC"])
148 t2 = writeTmp(2, ["D\nE\nF"])
149 fi = FileInput(files=(t1, t2))
150 lines = list(fi)
151 self.assertEqual(lines, ["A\n", "B\n", "C", "D\n", "E\n", "F"])
152 self.assertEqual(fi.filelineno(), 3)
153 self.assertEqual(fi.lineno(), 6)
154 finally:
155 remove_tempfiles(t1, t2)
Georg Brandl67e9fb92006-02-19 13:56:17 +0000156
Collin Winterbf618562007-04-07 04:40:43 +0000157 def test_unicode_filenames(self):
158 try:
159 t1 = writeTmp(1, ["A\nB"])
160 encoding = sys.getfilesystemencoding()
161 if encoding is None:
162 encoding = 'ascii'
163 fi = FileInput(files=unicode(t1, encoding))
164 lines = list(fi)
165 self.assertEqual(lines, ["A\n", "B"])
166 finally:
167 remove_tempfiles(t1)
Georg Brandlc029f872006-02-19 14:12:34 +0000168
Collin Winterbf618562007-04-07 04:40:43 +0000169 def test_fileno(self):
170 try:
171 t1 = writeTmp(1, ["A\nB"])
172 t2 = writeTmp(2, ["C\nD"])
173 fi = FileInput(files=(t1, t2))
174 self.assertEqual(fi.fileno(), -1)
175 line = fi.next()
176 self.assertNotEqual(fi.fileno(), -1)
177 fi.nextfile()
178 self.assertEqual(fi.fileno(), -1)
179 line = list(fi)
180 self.assertEqual(fi.fileno(), -1)
181 finally:
182 remove_tempfiles(t1, t2)
Georg Brandlc98eeed2006-02-19 14:57:47 +0000183
Collin Winterbf618562007-04-07 04:40:43 +0000184 def test_opening_mode(self):
185 try:
186 # invalid mode, should raise ValueError
187 fi = FileInput(mode="w")
188 self.fail("FileInput should reject invalid mode argument")
189 except ValueError:
190 pass
191 try:
192 # try opening in universal newline mode
193 t1 = writeTmp(1, ["A\nB\r\nC\rD"], mode="wb")
194 fi = FileInput(files=t1, mode="U")
195 lines = list(fi)
196 self.assertEqual(lines, ["A\n", "B\n", "C\n", "D"])
197 finally:
198 remove_tempfiles(t1)
199
200 def test_file_opening_hook(self):
201 try:
202 # cannot use openhook and inplace mode
203 fi = FileInput(inplace=1, openhook=lambda f,m: None)
204 self.fail("FileInput should raise if both inplace "
205 "and openhook arguments are given")
206 except ValueError:
207 pass
208 try:
209 fi = FileInput(openhook=1)
210 self.fail("FileInput should check openhook for being callable")
211 except ValueError:
212 pass
213 try:
214 t1 = writeTmp(1, ["A\nB"], mode="wb")
215 fi = FileInput(files=t1, openhook=hook_encoded("rot13"))
216 lines = list(fi)
217 self.assertEqual(lines, ["N\n", "O"])
218 finally:
219 remove_tempfiles(t1)
220
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200221 def test_readline(self):
222 with open(TESTFN, 'wb') as f:
223 f.write('A\nB\r\nC\r')
224 # Fill TextIOWrapper buffer.
225 f.write('123456789\n' * 1000)
226 # Issue #20501: readline() shouldn't read whole file.
227 f.write('\x80')
228 self.addCleanup(safe_unlink, TESTFN)
229
230 fi = FileInput(files=TESTFN, openhook=hook_encoded('ascii'), bufsize=8)
Serhiy Storchakafd48a562014-03-03 21:16:27 +0200231 # The most likely failure is a UnicodeDecodeError due to the entire
232 # file being read when it shouldn't have been.
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200233 self.assertEqual(fi.readline(), u'A\n')
234 self.assertEqual(fi.readline(), u'B\r\n')
235 self.assertEqual(fi.readline(), u'C\r')
236 with self.assertRaises(UnicodeDecodeError):
237 # Read to the end of file.
238 list(fi)
239 fi.close()
240
241class Test_hook_encoded(unittest.TestCase):
242 """Unit tests for fileinput.hook_encoded()"""
243
244 def test_modes(self):
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200245 with open(TESTFN, 'wb') as f:
Serhiy Storchakafd48a562014-03-03 21:16:27 +0200246 # UTF-7 is a convenient, seldom used encoding
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200247 f.write('A\nB\r\nC\rD+IKw-')
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200248 self.addCleanup(safe_unlink, TESTFN)
249
250 def check(mode, expected_lines):
251 fi = FileInput(files=TESTFN, mode=mode,
252 openhook=hook_encoded('utf-7'))
253 lines = list(fi)
254 fi.close()
255 self.assertEqual(lines, expected_lines)
256
257 check('r', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
258 check('rU', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
259 check('U', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
260 check('rb', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
261
Collin Winterbf618562007-04-07 04:40:43 +0000262def test_main():
Serhiy Storchaka68b8a942014-02-26 20:59:08 +0200263 run_unittest(BufferSizesTests, FileInputTests, Test_hook_encoded)
Neal Norwitz0d4c06e2007-04-25 06:30:05 +0000264
Collin Winterbf618562007-04-07 04:40:43 +0000265if __name__ == "__main__":
266 test_main()