blob: 77487edd706787a31bbf3f2e96cad4f9af7c05ed [file] [log] [blame]
Guido van Rossum7d5b99d1997-11-21 17:12:59 +00001"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5 import fileinput
6 for line in fileinput.input():
7 process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty. If a filename is '-' it
11is also replaced by sys.stdin. To specify an alternative list of
12filenames, pass it as the argument to input(). A single file name is
13also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin. Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read. Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect. After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode. If an I/O error occurs during
32opening or reading a file, the IOError exception is raised.
33
34If sys.stdin is used more than once, the second and further use will
35return no lines, except perhaps for interactive use, or if it has been
36explicitly reset (e.g. using sys.stdin.seek(0)).
37
38Empty files are opened and immediately closed; the only time their
39presence in the list of filenames is noticeable at all is when the
40last file opened is empty.
41
42It is possible that the last line of a file doesn't end in a newline
43character; otherwise lines are returned including the trailing
44newline.
45
46Class FileInput is the implementation; its methods filename(),
47lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
48correspond to the functions in the module. In addition it has a
49readline() method which returns the next input line, and a
50__getitem__() method which implements the sequence behavior. The
51sequence must be accessed in strictly sequential order; sequence
52access and readline() cannot be mixed.
53
54Optional in-place filtering: if the keyword argument inplace=1 is
55passed to input() or to the FileInput constructor, the file is moved
56to a backup file and standard output is directed to the input file.
57This makes it possible to write a filter that rewrites its input file
58in place. If the keyword argument backup=".<some extension>" is also
59given, it specifies the extension for the backup file, and the backup
60file remains around; by default, the extension is ".bak" and it is
61deleted when the output file is closed. In-place filtering is
62disabled when standard input is read. XXX The current implementation
63does not work for MS-DOS 8+3 filesystems.
64
Guido van Rossum47955242001-01-05 14:44:39 +000065Performance: this module is unfortunately one of the slower ways of
66processing large numbers of input lines. Nevertheless, a significant
67speed-up has been obtained by using readlines(bufsize) instead of
68readline(). A new keyword argument, bufsize=N, is present on the
69input() function and the FileInput() class to override the default
70buffer size.
71
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000072XXX Possible additions:
73
74- optional getopt argument processing
75- specify open mode ('r' or 'rb')
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000076- fileno()
77- isatty()
78- read(), read(size), even readlines()
79
80"""
81
Guido van Rossumdcb85831999-10-18 21:41:43 +000082import sys, os, stat
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000083
Skip Montanaroeccd02a2001-01-20 23:34:12 +000084__all__ = ["input","close","nextfile","filename","lineno","filelineno",
85 "isfirstline","isstdin","FileInput"]
86
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000087_state = None
88
Guido van Rossum47955242001-01-05 14:44:39 +000089DEFAULT_BUFSIZE = 8*1024
90
91def input(files=None, inplace=0, backup="", bufsize=0):
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000092 global _state
93 if _state and _state._file:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000094 raise RuntimeError, "input() already active"
Guido van Rossum47955242001-01-05 14:44:39 +000095 _state = FileInput(files, inplace, backup, bufsize)
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000096 return _state
97
98def close():
99 global _state
100 state = _state
101 _state = None
102 if state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000103 state.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000104
105def nextfile():
106 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000107 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000108 return _state.nextfile()
109
110def filename():
111 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000112 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000113 return _state.filename()
114
115def lineno():
116 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000117 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000118 return _state.lineno()
119
120def filelineno():
121 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000122 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000123 return _state.filelineno()
124
125def isfirstline():
126 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000127 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000128 return _state.isfirstline()
129
130def isstdin():
131 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000132 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000133 return _state.isstdin()
134
135class FileInput:
136
Guido van Rossum47955242001-01-05 14:44:39 +0000137 def __init__(self, files=None, inplace=0, backup="", bufsize=0):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000138 if type(files) == type(''):
139 files = (files,)
140 else:
Guido van Rossum2516b392000-04-10 17:16:12 +0000141 if files is None:
142 files = sys.argv[1:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000143 if not files:
Guido van Rossum2516b392000-04-10 17:16:12 +0000144 files = ('-',)
145 else:
146 files = tuple(files)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000147 self._files = files
148 self._inplace = inplace
149 self._backup = backup
Guido van Rossum47955242001-01-05 14:44:39 +0000150 self._bufsize = bufsize or DEFAULT_BUFSIZE
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000151 self._savestdout = None
152 self._output = None
153 self._filename = None
154 self._lineno = 0
155 self._filelineno = 0
156 self._file = None
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000157 self._isstdin = False
Guido van Rossum0aec9fb1998-07-20 15:49:28 +0000158 self._backupfilename = None
Guido van Rossum47955242001-01-05 14:44:39 +0000159 self._buffer = []
160 self._bufindex = 0
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000161
162 def __del__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000163 self.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000164
165 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000166 self.nextfile()
167 self._files = ()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000168
Neil Schemenauer908632a2002-03-26 20:28:40 +0000169 def __iter__(self):
170 return self
171
172 def next(self):
Guido van Rossum47955242001-01-05 14:44:39 +0000173 try:
174 line = self._buffer[self._bufindex]
175 except IndexError:
176 pass
177 else:
178 self._bufindex += 1
179 self._lineno += 1
180 self._filelineno += 1
181 return line
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000182 line = self.readline()
183 if not line:
Neil Schemenauer908632a2002-03-26 20:28:40 +0000184 raise StopIteration
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000185 return line
Neil Schemenauer908632a2002-03-26 20:28:40 +0000186
187 def __getitem__(self, i):
188 if i != self._lineno:
189 raise RuntimeError, "accessing lines out of order"
190 try:
191 return self.next()
192 except StopIteration:
193 raise IndexError, "end of input reached"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000194
195 def nextfile(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000196 savestdout = self._savestdout
197 self._savestdout = 0
198 if savestdout:
199 sys.stdout = savestdout
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000200
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000201 output = self._output
202 self._output = 0
203 if output:
204 output.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000205
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000206 file = self._file
207 self._file = 0
208 if file and not self._isstdin:
209 file.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000210
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000211 backupfilename = self._backupfilename
212 self._backupfilename = 0
213 if backupfilename and not self._backup:
214 try: os.unlink(backupfilename)
215 except: pass
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000216
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000217 self._isstdin = False
Guido van Rossum47955242001-01-05 14:44:39 +0000218 self._buffer = []
219 self._bufindex = 0
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000220
221 def readline(self):
Guido van Rossum47955242001-01-05 14:44:39 +0000222 try:
223 line = self._buffer[self._bufindex]
224 except IndexError:
225 pass
226 else:
227 self._bufindex += 1
228 self._lineno += 1
229 self._filelineno += 1
230 return line
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000231 if not self._file:
232 if not self._files:
233 return ""
234 self._filename = self._files[0]
235 self._files = self._files[1:]
236 self._filelineno = 0
237 self._file = None
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000238 self._isstdin = False
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000239 self._backupfilename = 0
240 if self._filename == '-':
241 self._filename = '<stdin>'
242 self._file = sys.stdin
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000243 self._isstdin = True
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 else:
245 if self._inplace:
246 self._backupfilename = (
Guido van Rossume2ae77b2001-10-24 20:42:55 +0000247 self._filename + (self._backup or os.extsep+"bak"))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000248 try: os.unlink(self._backupfilename)
249 except os.error: pass
Guido van Rossumdcb85831999-10-18 21:41:43 +0000250 # The next few lines may raise IOError
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000251 os.rename(self._filename, self._backupfilename)
252 self._file = open(self._backupfilename, "r")
Guido van Rossumdcb85831999-10-18 21:41:43 +0000253 try:
254 perm = os.fstat(self._file.fileno())[stat.ST_MODE]
255 except:
256 self._output = open(self._filename, "w")
257 else:
258 fd = os.open(self._filename,
259 os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
260 perm)
261 self._output = os.fdopen(fd, "w")
262 try:
263 os.chmod(self._filename, perm)
264 except:
265 pass
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000266 self._savestdout = sys.stdout
267 sys.stdout = self._output
268 else:
269 # This may raise IOError
270 self._file = open(self._filename, "r")
Guido van Rossum47955242001-01-05 14:44:39 +0000271 self._buffer = self._file.readlines(self._bufsize)
272 self._bufindex = 0
273 if not self._buffer:
274 self.nextfile()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000275 # Recursive call
276 return self.readline()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000277
278 def filename(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000279 return self._filename
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000280
281 def lineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000282 return self._lineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000283
284 def filelineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000285 return self._filelineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000286
287 def isfirstline(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000288 return self._filelineno == 1
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000289
290 def isstdin(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000291 return self._isstdin
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000292
293def _test():
294 import getopt
295 inplace = 0
296 backup = 0
297 opts, args = getopt.getopt(sys.argv[1:], "ib:")
298 for o, a in opts:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000299 if o == '-i': inplace = 1
300 if o == '-b': backup = a
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000301 for line in input(args, inplace=inplace, backup=backup):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000302 if line[-1:] == '\n': line = line[:-1]
303 if line[-1:] == '\r': line = line[:-1]
304 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
305 isfirstline() and "*" or "", line)
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000306 print "%d: %s[%d]" % (lineno(), filename(), filelineno())
307
308if __name__ == '__main__':
309 _test()