blob: d45aeca960b9361b52dbdbf97cbed2d2399868ca [file] [log] [blame]
Guido van Rossum7d5b99d1997-11-21 17:12:59 +00001"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5 import fileinput
6 for line in fileinput.input():
7 process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty. If a filename is '-' it
11is also replaced by sys.stdin. To specify an alternative list of
12filenames, pass it as the argument to input(). A single file name is
13also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin. Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read. Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect. After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode. If an I/O error occurs during
32opening or reading a file, the IOError exception is raised.
33
34If sys.stdin is used more than once, the second and further use will
35return no lines, except perhaps for interactive use, or if it has been
36explicitly reset (e.g. using sys.stdin.seek(0)).
37
38Empty files are opened and immediately closed; the only time their
39presence in the list of filenames is noticeable at all is when the
40last file opened is empty.
41
42It is possible that the last line of a file doesn't end in a newline
43character; otherwise lines are returned including the trailing
44newline.
45
46Class FileInput is the implementation; its methods filename(),
47lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
48correspond to the functions in the module. In addition it has a
49readline() method which returns the next input line, and a
50__getitem__() method which implements the sequence behavior. The
51sequence must be accessed in strictly sequential order; sequence
52access and readline() cannot be mixed.
53
54Optional in-place filtering: if the keyword argument inplace=1 is
55passed to input() or to the FileInput constructor, the file is moved
56to a backup file and standard output is directed to the input file.
57This makes it possible to write a filter that rewrites its input file
58in place. If the keyword argument backup=".<some extension>" is also
59given, it specifies the extension for the backup file, and the backup
60file remains around; by default, the extension is ".bak" and it is
61deleted when the output file is closed. In-place filtering is
62disabled when standard input is read. XXX The current implementation
63does not work for MS-DOS 8+3 filesystems.
64
Guido van Rossum47955242001-01-05 14:44:39 +000065Performance: this module is unfortunately one of the slower ways of
66processing large numbers of input lines. Nevertheless, a significant
67speed-up has been obtained by using readlines(bufsize) instead of
68readline(). A new keyword argument, bufsize=N, is present on the
69input() function and the FileInput() class to override the default
70buffer size.
71
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000072XXX Possible additions:
73
74- optional getopt argument processing
75- specify open mode ('r' or 'rb')
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000076- fileno()
77- isatty()
78- read(), read(size), even readlines()
79
80"""
81
Walter Dörwald294bbf32002-06-06 09:48:13 +000082import sys, os
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000083
Skip Montanaroeccd02a2001-01-20 23:34:12 +000084__all__ = ["input","close","nextfile","filename","lineno","filelineno",
85 "isfirstline","isstdin","FileInput"]
86
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000087_state = None
88
Guido van Rossum47955242001-01-05 14:44:39 +000089DEFAULT_BUFSIZE = 8*1024
90
91def input(files=None, inplace=0, backup="", bufsize=0):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000092 """input([files[, inplace[, backup]]])
93
94 Create an instance of the FileInput class. The instance will be used
95 as global state for the functions of this module, and is also returned
96 to use during iteration. The parameters to this function will be passed
Tim Peters8ac14952002-05-23 15:15:30 +000097 along to the constructor of the FileInput class.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000098 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000099 global _state
100 if _state and _state._file:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000101 raise RuntimeError, "input() already active"
Guido van Rossum47955242001-01-05 14:44:39 +0000102 _state = FileInput(files, inplace, backup, bufsize)
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000103 return _state
104
105def close():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000106 """Close the sequence."""
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000107 global _state
108 state = _state
109 _state = None
110 if state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000111 state.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000112
113def nextfile():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000114 """
115 Close the current file so that the next iteration will read the first
116 line from the next file (if any); lines not read from the file will
117 not count towards the cumulative line count. The filename is not
118 changed until after the first line of the next file has been read.
119 Before the first line has been read, this function has no effect;
120 it cannot be used to skip the first file. After the last line of the
Tim Peters8ac14952002-05-23 15:15:30 +0000121 last file has been read, this function has no effect.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000122 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000123 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000124 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000125 return _state.nextfile()
126
127def filename():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000128 """
129 Return the name of the file currently being read.
Tim Peters8ac14952002-05-23 15:15:30 +0000130 Before the first line has been read, returns None.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000131 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000132 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000134 return _state.filename()
135
136def lineno():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000137 """
138 Return the cumulative line number of the line that has just been read.
139 Before the first line has been read, returns 0. After the last line
Tim Peters8ac14952002-05-23 15:15:30 +0000140 of the last file has been read, returns the line number of that line.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000141 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000142 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000143 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000144 return _state.lineno()
145
146def filelineno():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000147 """
148 Return the line number in the current file. Before the first line
149 has been read, returns 0. After the last line of the last file has
Tim Peters8ac14952002-05-23 15:15:30 +0000150 been read, returns the line number of that line within the file.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000151 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000152 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000153 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000154 return _state.filelineno()
155
156def isfirstline():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000157 """
158 Returns true the line just read is the first line of its file,
Tim Peters8ac14952002-05-23 15:15:30 +0000159 otherwise returns false.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000160 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000161 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000163 return _state.isfirstline()
164
165def isstdin():
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000166 """
167 Returns true if the last line was read from sys.stdin,
Tim Peters8ac14952002-05-23 15:15:30 +0000168 otherwise returns false.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000169 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000170 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000171 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000172 return _state.isstdin()
173
174class FileInput:
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000175 """class FileInput([files[, inplace[, backup]]])
Tim Peters8ac14952002-05-23 15:15:30 +0000176
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000177 Class FileInput is the implementation of the module; its methods
178 filename(), lineno(), fileline(), isfirstline(), isstdin(), nextfile()
179 and close() correspond to the functions of the same name in the module.
180 In addition it has a readline() method which returns the next
181 input line, and a __getitem__() method which implements the
182 sequence behavior. The sequence must be accessed in strictly
Tim Peters8ac14952002-05-23 15:15:30 +0000183 sequential order; random access and readline() cannot be mixed.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000184 """
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000185
Guido van Rossum47955242001-01-05 14:44:39 +0000186 def __init__(self, files=None, inplace=0, backup="", bufsize=0):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000187 if type(files) == type(''):
188 files = (files,)
189 else:
Guido van Rossum2516b392000-04-10 17:16:12 +0000190 if files is None:
191 files = sys.argv[1:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000192 if not files:
Guido van Rossum2516b392000-04-10 17:16:12 +0000193 files = ('-',)
194 else:
195 files = tuple(files)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000196 self._files = files
197 self._inplace = inplace
198 self._backup = backup
Guido van Rossum47955242001-01-05 14:44:39 +0000199 self._bufsize = bufsize or DEFAULT_BUFSIZE
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000200 self._savestdout = None
201 self._output = None
202 self._filename = None
203 self._lineno = 0
204 self._filelineno = 0
205 self._file = None
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000206 self._isstdin = False
Guido van Rossum0aec9fb1998-07-20 15:49:28 +0000207 self._backupfilename = None
Guido van Rossum47955242001-01-05 14:44:39 +0000208 self._buffer = []
209 self._bufindex = 0
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000210
211 def __del__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000212 self.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000213
214 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000215 self.nextfile()
216 self._files = ()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000217
Neil Schemenauer908632a2002-03-26 20:28:40 +0000218 def __iter__(self):
219 return self
220
221 def next(self):
Guido van Rossum47955242001-01-05 14:44:39 +0000222 try:
223 line = self._buffer[self._bufindex]
224 except IndexError:
225 pass
226 else:
227 self._bufindex += 1
228 self._lineno += 1
229 self._filelineno += 1
230 return line
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 line = self.readline()
232 if not line:
Neil Schemenauer908632a2002-03-26 20:28:40 +0000233 raise StopIteration
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000234 return line
Tim Peters863ac442002-04-16 01:38:40 +0000235
Neil Schemenauer908632a2002-03-26 20:28:40 +0000236 def __getitem__(self, i):
237 if i != self._lineno:
238 raise RuntimeError, "accessing lines out of order"
239 try:
240 return self.next()
241 except StopIteration:
242 raise IndexError, "end of input reached"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000243
244 def nextfile(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 savestdout = self._savestdout
246 self._savestdout = 0
247 if savestdout:
248 sys.stdout = savestdout
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000249
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 output = self._output
251 self._output = 0
252 if output:
253 output.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000254
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000255 file = self._file
256 self._file = 0
257 if file and not self._isstdin:
258 file.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000259
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000260 backupfilename = self._backupfilename
261 self._backupfilename = 0
262 if backupfilename and not self._backup:
263 try: os.unlink(backupfilename)
264 except: pass
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000265
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000266 self._isstdin = False
Guido van Rossum47955242001-01-05 14:44:39 +0000267 self._buffer = []
268 self._bufindex = 0
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000269
270 def readline(self):
Guido van Rossum47955242001-01-05 14:44:39 +0000271 try:
272 line = self._buffer[self._bufindex]
273 except IndexError:
274 pass
275 else:
276 self._bufindex += 1
277 self._lineno += 1
278 self._filelineno += 1
279 return line
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000280 if not self._file:
281 if not self._files:
282 return ""
283 self._filename = self._files[0]
284 self._files = self._files[1:]
285 self._filelineno = 0
286 self._file = None
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000287 self._isstdin = False
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000288 self._backupfilename = 0
289 if self._filename == '-':
290 self._filename = '<stdin>'
291 self._file = sys.stdin
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000292 self._isstdin = True
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000293 else:
294 if self._inplace:
295 self._backupfilename = (
Guido van Rossume2ae77b2001-10-24 20:42:55 +0000296 self._filename + (self._backup or os.extsep+"bak"))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000297 try: os.unlink(self._backupfilename)
298 except os.error: pass
Guido van Rossumdcb85831999-10-18 21:41:43 +0000299 # The next few lines may raise IOError
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000300 os.rename(self._filename, self._backupfilename)
301 self._file = open(self._backupfilename, "r")
Guido van Rossumdcb85831999-10-18 21:41:43 +0000302 try:
Walter Dörwald294bbf32002-06-06 09:48:13 +0000303 perm = os.fstat(self._file.fileno()).st_mode
Guido van Rossumdcb85831999-10-18 21:41:43 +0000304 except:
305 self._output = open(self._filename, "w")
306 else:
307 fd = os.open(self._filename,
308 os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
309 perm)
310 self._output = os.fdopen(fd, "w")
311 try:
312 os.chmod(self._filename, perm)
313 except:
314 pass
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000315 self._savestdout = sys.stdout
316 sys.stdout = self._output
317 else:
318 # This may raise IOError
319 self._file = open(self._filename, "r")
Guido van Rossum47955242001-01-05 14:44:39 +0000320 self._buffer = self._file.readlines(self._bufsize)
321 self._bufindex = 0
322 if not self._buffer:
323 self.nextfile()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000324 # Recursive call
325 return self.readline()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000326
327 def filename(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000328 return self._filename
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000329
330 def lineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000331 return self._lineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000332
333 def filelineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000334 return self._filelineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000335
336 def isfirstline(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000337 return self._filelineno == 1
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000338
339 def isstdin(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000340 return self._isstdin
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000341
342def _test():
343 import getopt
344 inplace = 0
345 backup = 0
346 opts, args = getopt.getopt(sys.argv[1:], "ib:")
347 for o, a in opts:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000348 if o == '-i': inplace = 1
349 if o == '-b': backup = a
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000350 for line in input(args, inplace=inplace, backup=backup):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000351 if line[-1:] == '\n': line = line[:-1]
352 if line[-1:] == '\r': line = line[:-1]
353 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
354 isfirstline() and "*" or "", line)
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000355 print "%d: %s[%d]" % (lineno(), filename(), filelineno())
356
357if __name__ == '__main__':
358 _test()