blob: 166c631689747dc5ca3bc8bf9f63a6a95a56ab51 [file] [log] [blame]
Guido van Rossum7d5b99d1997-11-21 17:12:59 +00001"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5 import fileinput
6 for line in fileinput.input():
7 process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty. If a filename is '-' it
Michele Angrisanoaca273e2019-06-02 23:01:49 +020011is also replaced by sys.stdin and the optional arguments mode and
12openhook are ignored. To specify an alternative list of filenames,
13pass it as the argument to input(). A single file name is also allowed.
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000014
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin. Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read. Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect. After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
Georg Brandlc029f872006-02-19 14:12:34 +000031All files are opened in text mode by default, you can override this by
32setting the mode parameter to input() or FileInput.__init__().
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020033If an I/O error occurs during opening or reading a file, the OSError
Georg Brandlc029f872006-02-19 14:12:34 +000034exception is raised.
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000035
36If sys.stdin is used more than once, the second and further use will
37return no lines, except perhaps for interactive use, or if it has been
38explicitly reset (e.g. using sys.stdin.seek(0)).
39
40Empty files are opened and immediately closed; the only time their
41presence in the list of filenames is noticeable at all is when the
42last file opened is empty.
43
44It is possible that the last line of a file doesn't end in a newline
45character; otherwise lines are returned including the trailing
46newline.
47
48Class FileInput is the implementation; its methods filename(),
49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50correspond to the functions in the module. In addition it has a
51readline() method which returns the next input line, and a
52__getitem__() method which implements the sequence behavior. The
53sequence must be accessed in strictly sequential order; sequence
54access and readline() cannot be mixed.
55
56Optional in-place filtering: if the keyword argument inplace=1 is
57passed to input() or to the FileInput constructor, the file is moved
58to a backup file and standard output is directed to the input file.
59This makes it possible to write a filter that rewrites its input file
60in place. If the keyword argument backup=".<some extension>" is also
61given, it specifies the extension for the backup file, and the backup
62file remains around; by default, the extension is ".bak" and it is
63deleted when the output file is closed. In-place filtering is
64disabled when standard input is read. XXX The current implementation
65does not work for MS-DOS 8+3 filesystems.
66
67XXX Possible additions:
68
69- optional getopt argument processing
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000070- isatty()
71- read(), read(size), even readlines()
72
73"""
74
Walter Dörwald294bbf32002-06-06 09:48:13 +000075import sys, os
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000076
Georg Brandlef0a8652009-05-17 12:22:57 +000077__all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
Martin Panter7978e102016-01-16 06:26:54 +000078 "fileno", "isfirstline", "isstdin", "FileInput", "hook_compressed",
79 "hook_encoded"]
Skip Montanaroeccd02a2001-01-20 23:34:12 +000080
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000081_state = None
82
Matthias Bussonnier1a3faf92019-05-20 13:44:11 -070083def input(files=None, inplace=False, backup="", *, mode="r", openhook=None):
Terry Jan Reedy70d2c712013-06-28 18:59:28 -040084 """Return an instance of the FileInput class, which can be iterated.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000085
Terry Jan Reedy70d2c712013-06-28 18:59:28 -040086 The parameters are passed to the constructor of the FileInput class.
87 The returned instance, in addition to being an iterator,
88 keeps global state for the functions of this module,.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000089 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000090 global _state
91 if _state and _state._file:
Collin Winterce36ad82007-08-30 01:19:48 +000092 raise RuntimeError("input() already active")
Matthias Bussonnier1a3faf92019-05-20 13:44:11 -070093 _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook)
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000094 return _state
95
96def close():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000097 """Close the sequence."""
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000098 global _state
99 state = _state
100 _state = None
101 if state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000102 state.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000103
104def nextfile():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000105 """
106 Close the current file so that the next iteration will read the first
107 line from the next file (if any); lines not read from the file will
108 not count towards the cumulative line count. The filename is not
109 changed until after the first line of the next file has been read.
110 Before the first line has been read, this function has no effect;
111 it cannot be used to skip the first file. After the last line of the
Tim Peters8ac14952002-05-23 15:15:30 +0000112 last file has been read, this function has no effect.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000113 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000114 if not _state:
Collin Winterce36ad82007-08-30 01:19:48 +0000115 raise RuntimeError("no active input()")
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000116 return _state.nextfile()
117
118def filename():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000119 """
120 Return the name of the file currently being read.
Tim Peters8ac14952002-05-23 15:15:30 +0000121 Before the first line has been read, returns None.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000122 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000123 if not _state:
Collin Winterce36ad82007-08-30 01:19:48 +0000124 raise RuntimeError("no active input()")
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000125 return _state.filename()
126
127def lineno():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000128 """
129 Return the cumulative line number of the line that has just been read.
130 Before the first line has been read, returns 0. After the last line
Tim Peters8ac14952002-05-23 15:15:30 +0000131 of the last file has been read, returns the line number of that line.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000132 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000133 if not _state:
Collin Winterce36ad82007-08-30 01:19:48 +0000134 raise RuntimeError("no active input()")
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000135 return _state.lineno()
136
137def filelineno():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000138 """
139 Return the line number in the current file. Before the first line
140 has been read, returns 0. After the last line of the last file has
Tim Peters8ac14952002-05-23 15:15:30 +0000141 been read, returns the line number of that line within the file.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000142 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000143 if not _state:
Collin Winterce36ad82007-08-30 01:19:48 +0000144 raise RuntimeError("no active input()")
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000145 return _state.filelineno()
146
Georg Brandl67e9fb92006-02-19 13:56:17 +0000147def fileno():
148 """
149 Return the file number of the current file. When no file is currently
150 opened, returns -1.
151 """
152 if not _state:
Collin Winterce36ad82007-08-30 01:19:48 +0000153 raise RuntimeError("no active input()")
Georg Brandl67e9fb92006-02-19 13:56:17 +0000154 return _state.fileno()
155
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000156def isfirstline():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000157 """
158 Returns true the line just read is the first line of its file,
Tim Peters8ac14952002-05-23 15:15:30 +0000159 otherwise returns false.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000160 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000161 if not _state:
Collin Winterce36ad82007-08-30 01:19:48 +0000162 raise RuntimeError("no active input()")
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000163 return _state.isfirstline()
164
165def isstdin():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000166 """
167 Returns true if the last line was read from sys.stdin,
Tim Peters8ac14952002-05-23 15:15:30 +0000168 otherwise returns false.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000169 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000170 if not _state:
Collin Winterce36ad82007-08-30 01:19:48 +0000171 raise RuntimeError("no active input()")
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000172 return _state.isstdin()
173
174class FileInput:
Matthias Bussonnier1a3faf92019-05-20 13:44:11 -0700175 """FileInput([files[, inplace[, backup]]], *, mode=None, openhook=None)
Tim Peters8ac14952002-05-23 15:15:30 +0000176
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000177 Class FileInput is the implementation of the module; its methods
Georg Brandl67e9fb92006-02-19 13:56:17 +0000178 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
179 nextfile() and close() correspond to the functions of the same name
180 in the module.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000181 In addition it has a readline() method which returns the next
182 input line, and a __getitem__() method which implements the
183 sequence behavior. The sequence must be accessed in strictly
Tim Peters8ac14952002-05-23 15:15:30 +0000184 sequential order; random access and readline() cannot be mixed.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000185 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000186
Matthias Bussonnier1a3faf92019-05-20 13:44:11 -0700187 def __init__(self, files=None, inplace=False, backup="", *,
Georg Brandlc98eeed2006-02-19 14:57:47 +0000188 mode="r", openhook=None):
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000189 if isinstance(files, str):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000190 files = (files,)
Roy Williams002665a2017-05-22 22:24:17 -0700191 elif isinstance(files, os.PathLike):
192 files = (os.fspath(files), )
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000193 else:
Guido van Rossum2516b392000-04-10 17:16:12 +0000194 if files is None:
195 files = sys.argv[1:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000196 if not files:
Guido van Rossum2516b392000-04-10 17:16:12 +0000197 files = ('-',)
198 else:
199 files = tuple(files)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000200 self._files = files
201 self._inplace = inplace
202 self._backup = backup
203 self._savestdout = None
204 self._output = None
205 self._filename = None
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200206 self._startlineno = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000207 self._filelineno = 0
208 self._file = None
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000209 self._isstdin = False
Guido van Rossum0aec9fb1998-07-20 15:49:28 +0000210 self._backupfilename = None
Georg Brandlc029f872006-02-19 14:12:34 +0000211 # restrict mode argument to reading modes
Victor Stinnere471e722019-10-28 15:40:08 +0100212 if mode not in ('r', 'rb'):
213 raise ValueError("FileInput opening mode must be 'r' or 'rb'")
Georg Brandlc029f872006-02-19 14:12:34 +0000214 self._mode = mode
Victor Stinnere471e722019-10-28 15:40:08 +0100215 self._write_mode = mode.replace('r', 'w')
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200216 if openhook:
217 if inplace:
218 raise ValueError("FileInput cannot use an opening hook in inplace mode")
219 if not callable(openhook):
220 raise ValueError("FileInput openhook must be callable")
Georg Brandlc98eeed2006-02-19 14:57:47 +0000221 self._openhook = openhook
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000222
223 def __del__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000224 self.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000225
226 def close(self):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300227 try:
228 self.nextfile()
229 finally:
230 self._files = ()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000231
Georg Brandl6cb7b652010-07-31 20:08:15 +0000232 def __enter__(self):
233 return self
234
235 def __exit__(self, type, value, traceback):
236 self.close()
237
Neil Schemenauer908632a2002-03-26 20:28:40 +0000238 def __iter__(self):
239 return self
240
Georg Brandla18af4e2007-04-21 15:47:16 +0000241 def __next__(self):
Serhiy Storchaka0554d832016-03-08 23:35:35 +0200242 while True:
243 line = self._readline()
244 if line:
245 self._filelineno += 1
246 return line
247 if not self._file:
248 raise StopIteration
249 self.nextfile()
250 # repeat with next file
Tim Peters863ac442002-04-16 01:38:40 +0000251
Neil Schemenauer908632a2002-03-26 20:28:40 +0000252 def __getitem__(self, i):
Berker Peksag84a13fb2018-08-11 09:05:04 +0300253 import warnings
254 warnings.warn(
255 "Support for indexing FileInput objects is deprecated. "
256 "Use iterator protocol instead.",
257 DeprecationWarning,
258 stacklevel=2
259 )
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200260 if i != self.lineno():
Collin Winterce36ad82007-08-30 01:19:48 +0000261 raise RuntimeError("accessing lines out of order")
Neil Schemenauer908632a2002-03-26 20:28:40 +0000262 try:
Georg Brandla18af4e2007-04-21 15:47:16 +0000263 return self.__next__()
Neil Schemenauer908632a2002-03-26 20:28:40 +0000264 except StopIteration:
Collin Winterce36ad82007-08-30 01:19:48 +0000265 raise IndexError("end of input reached")
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000266
267 def nextfile(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000268 savestdout = self._savestdout
Serhiy Storchaka2116b122015-04-10 13:29:28 +0300269 self._savestdout = None
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000270 if savestdout:
271 sys.stdout = savestdout
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000272
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000273 output = self._output
Serhiy Storchaka2116b122015-04-10 13:29:28 +0300274 self._output = None
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300275 try:
276 if output:
277 output.close()
278 finally:
279 file = self._file
Serhiy Storchaka2116b122015-04-10 13:29:28 +0300280 self._file = None
Serhiy Storchaka0554d832016-03-08 23:35:35 +0200281 try:
282 del self._readline # restore FileInput._readline
283 except AttributeError:
284 pass
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300285 try:
286 if file and not self._isstdin:
287 file.close()
288 finally:
289 backupfilename = self._backupfilename
Serhiy Storchaka2116b122015-04-10 13:29:28 +0300290 self._backupfilename = None
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300291 if backupfilename and not self._backup:
292 try: os.unlink(backupfilename)
293 except OSError: pass
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000294
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300295 self._isstdin = False
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000296
297 def readline(self):
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200298 while True:
299 line = self._readline()
300 if line:
301 self._filelineno += 1
302 return line
303 if not self._file:
304 return line
305 self.nextfile()
306 # repeat with next file
307
Serhiy Storchaka0554d832016-03-08 23:35:35 +0200308 def _readline(self):
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200309 if not self._files:
310 if 'b' in self._mode:
311 return b''
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000312 else:
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200313 return ''
314 self._filename = self._files[0]
315 self._files = self._files[1:]
316 self._startlineno = self.lineno()
317 self._filelineno = 0
318 self._file = None
319 self._isstdin = False
320 self._backupfilename = 0
321 if self._filename == '-':
322 self._filename = '<stdin>'
323 if 'b' in self._mode:
324 self._file = getattr(sys.stdin, 'buffer', sys.stdin)
325 else:
326 self._file = sys.stdin
327 self._isstdin = True
328 else:
329 if self._inplace:
330 self._backupfilename = (
Zhiming Wang06de1ae2017-09-05 01:37:24 +0800331 os.fspath(self._filename) + (self._backup or ".bak"))
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200332 try:
333 os.unlink(self._backupfilename)
334 except OSError:
335 pass
336 # The next few lines may raise OSError
337 os.rename(self._filename, self._backupfilename)
338 self._file = open(self._backupfilename, self._mode)
339 try:
340 perm = os.fstat(self._file.fileno()).st_mode
341 except OSError:
Berker Peksagbe6dbfb2019-04-29 17:55:39 +0300342 self._output = open(self._filename, self._write_mode)
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200343 else:
344 mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
345 if hasattr(os, 'O_BINARY'):
346 mode |= os.O_BINARY
347
348 fd = os.open(self._filename, mode, perm)
Berker Peksagbe6dbfb2019-04-29 17:55:39 +0300349 self._output = os.fdopen(fd, self._write_mode)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200350 try:
Anthony Sottile8377cd42019-02-25 14:32:27 -0800351 os.chmod(self._filename, perm)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200352 except OSError:
353 pass
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200354 self._savestdout = sys.stdout
355 sys.stdout = self._output
356 else:
357 # This may raise OSError
358 if self._openhook:
359 self._file = self._openhook(self._filename, self._mode)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000360 else:
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200361 self._file = open(self._filename, self._mode)
Serhiy Storchaka0554d832016-03-08 23:35:35 +0200362 self._readline = self._file.readline # hide FileInput._readline
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200363 return self._readline()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000364
365 def filename(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000366 return self._filename
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000367
368 def lineno(self):
Serhiy Storchakacc2dbc52016-03-08 18:28:36 +0200369 return self._startlineno + self._filelineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000370
371 def filelineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000372 return self._filelineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000373
Georg Brandl67e9fb92006-02-19 13:56:17 +0000374 def fileno(self):
375 if self._file:
376 try:
377 return self._file.fileno()
378 except ValueError:
379 return -1
380 else:
381 return -1
382
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000383 def isfirstline(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000384 return self._filelineno == 1
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000385
386 def isstdin(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000387 return self._isstdin
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000388
Georg Brandlc98eeed2006-02-19 14:57:47 +0000389
390def hook_compressed(filename, mode):
391 ext = os.path.splitext(filename)[1]
392 if ext == '.gz':
393 import gzip
394 return gzip.open(filename, mode)
395 elif ext == '.bz2':
396 import bz2
397 return bz2.BZ2File(filename, mode)
398 else:
399 return open(filename, mode)
400
401
Serhiy Storchakab2752102016-04-27 23:13:46 +0300402def hook_encoded(encoding, errors=None):
Georg Brandlc98eeed2006-02-19 14:57:47 +0000403 def openhook(filename, mode):
Serhiy Storchakab2752102016-04-27 23:13:46 +0300404 return open(filename, mode, encoding=encoding, errors=errors)
Georg Brandlc98eeed2006-02-19 14:57:47 +0000405 return openhook
406
407
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000408def _test():
409 import getopt
Georg Brandlef0a8652009-05-17 12:22:57 +0000410 inplace = False
411 backup = False
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000412 opts, args = getopt.getopt(sys.argv[1:], "ib:")
413 for o, a in opts:
Georg Brandlef0a8652009-05-17 12:22:57 +0000414 if o == '-i': inplace = True
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000415 if o == '-b': backup = a
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000416 for line in input(args, inplace=inplace, backup=backup):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000417 if line[-1:] == '\n': line = line[:-1]
418 if line[-1:] == '\r': line = line[:-1]
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000419 print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
420 isfirstline() and "*" or "", line))
421 print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000422
423if __name__ == '__main__':
424 _test()