blob: 9bc17001d3c6f49a81f235e77b509a92aa1dc74c [file] [log] [blame]
Guido van Rossum7d5b99d1997-11-21 17:12:59 +00001"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5 import fileinput
6 for line in fileinput.input():
7 process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty. If a filename is '-' it
11is also replaced by sys.stdin. To specify an alternative list of
12filenames, pass it as the argument to input(). A single file name is
13also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin. Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read. Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect. After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode. If an I/O error occurs during
32opening or reading a file, the IOError exception is raised.
33
34If sys.stdin is used more than once, the second and further use will
35return no lines, except perhaps for interactive use, or if it has been
36explicitly reset (e.g. using sys.stdin.seek(0)).
37
38Empty files are opened and immediately closed; the only time their
39presence in the list of filenames is noticeable at all is when the
40last file opened is empty.
41
42It is possible that the last line of a file doesn't end in a newline
43character; otherwise lines are returned including the trailing
44newline.
45
46Class FileInput is the implementation; its methods filename(),
47lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
48correspond to the functions in the module. In addition it has a
49readline() method which returns the next input line, and a
50__getitem__() method which implements the sequence behavior. The
51sequence must be accessed in strictly sequential order; sequence
52access and readline() cannot be mixed.
53
54Optional in-place filtering: if the keyword argument inplace=1 is
55passed to input() or to the FileInput constructor, the file is moved
56to a backup file and standard output is directed to the input file.
57This makes it possible to write a filter that rewrites its input file
58in place. If the keyword argument backup=".<some extension>" is also
59given, it specifies the extension for the backup file, and the backup
60file remains around; by default, the extension is ".bak" and it is
61deleted when the output file is closed. In-place filtering is
62disabled when standard input is read. XXX The current implementation
63does not work for MS-DOS 8+3 filesystems.
64
Guido van Rossum47955242001-01-05 14:44:39 +000065Performance: this module is unfortunately one of the slower ways of
66processing large numbers of input lines. Nevertheless, a significant
67speed-up has been obtained by using readlines(bufsize) instead of
68readline(). A new keyword argument, bufsize=N, is present on the
69input() function and the FileInput() class to override the default
70buffer size.
71
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000072XXX Possible additions:
73
74- optional getopt argument processing
75- specify open mode ('r' or 'rb')
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000076- fileno()
77- isatty()
78- read(), read(size), even readlines()
79
80"""
81
Guido van Rossumdcb85831999-10-18 21:41:43 +000082import sys, os, stat
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000083
84_state = None
85
Guido van Rossum47955242001-01-05 14:44:39 +000086DEFAULT_BUFSIZE = 8*1024
87
88def input(files=None, inplace=0, backup="", bufsize=0):
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000089 global _state
90 if _state and _state._file:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000091 raise RuntimeError, "input() already active"
Guido van Rossum47955242001-01-05 14:44:39 +000092 _state = FileInput(files, inplace, backup, bufsize)
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000093 return _state
94
95def close():
96 global _state
97 state = _state
98 _state = None
99 if state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000100 state.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000101
102def nextfile():
103 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000104 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000105 return _state.nextfile()
106
107def filename():
108 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000109 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000110 return _state.filename()
111
112def lineno():
113 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000114 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000115 return _state.lineno()
116
117def filelineno():
118 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000119 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000120 return _state.filelineno()
121
122def isfirstline():
123 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000124 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000125 return _state.isfirstline()
126
127def isstdin():
128 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000129 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000130 return _state.isstdin()
131
132class FileInput:
133
Guido van Rossum47955242001-01-05 14:44:39 +0000134 def __init__(self, files=None, inplace=0, backup="", bufsize=0):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000135 if type(files) == type(''):
136 files = (files,)
137 else:
Guido van Rossum2516b392000-04-10 17:16:12 +0000138 if files is None:
139 files = sys.argv[1:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000140 if not files:
Guido van Rossum2516b392000-04-10 17:16:12 +0000141 files = ('-',)
142 else:
143 files = tuple(files)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000144 self._files = files
145 self._inplace = inplace
146 self._backup = backup
Guido van Rossum47955242001-01-05 14:44:39 +0000147 self._bufsize = bufsize or DEFAULT_BUFSIZE
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000148 self._savestdout = None
149 self._output = None
150 self._filename = None
151 self._lineno = 0
152 self._filelineno = 0
153 self._file = None
154 self._isstdin = 0
Guido van Rossum0aec9fb1998-07-20 15:49:28 +0000155 self._backupfilename = None
Guido van Rossum47955242001-01-05 14:44:39 +0000156 self._buffer = []
157 self._bufindex = 0
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000158
159 def __del__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000160 self.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000161
162 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000163 self.nextfile()
164 self._files = ()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000165
166 def __getitem__(self, i):
Guido van Rossum47955242001-01-05 14:44:39 +0000167 try:
168 line = self._buffer[self._bufindex]
169 except IndexError:
170 pass
171 else:
172 self._bufindex += 1
173 self._lineno += 1
174 self._filelineno += 1
175 return line
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000176 if i != self._lineno:
177 raise RuntimeError, "accessing lines out of order"
178 line = self.readline()
179 if not line:
180 raise IndexError, "end of input reached"
181 return line
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000182
183 def nextfile(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000184 savestdout = self._savestdout
185 self._savestdout = 0
186 if savestdout:
187 sys.stdout = savestdout
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000188
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000189 output = self._output
190 self._output = 0
191 if output:
192 output.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000193
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000194 file = self._file
195 self._file = 0
196 if file and not self._isstdin:
197 file.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000198
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000199 backupfilename = self._backupfilename
200 self._backupfilename = 0
201 if backupfilename and not self._backup:
202 try: os.unlink(backupfilename)
203 except: pass
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000204
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000205 self._isstdin = 0
Guido van Rossum47955242001-01-05 14:44:39 +0000206 self._buffer = []
207 self._bufindex = 0
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000208
209 def readline(self):
Guido van Rossum47955242001-01-05 14:44:39 +0000210 try:
211 line = self._buffer[self._bufindex]
212 except IndexError:
213 pass
214 else:
215 self._bufindex += 1
216 self._lineno += 1
217 self._filelineno += 1
218 return line
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000219 if not self._file:
220 if not self._files:
221 return ""
222 self._filename = self._files[0]
223 self._files = self._files[1:]
224 self._filelineno = 0
225 self._file = None
226 self._isstdin = 0
227 self._backupfilename = 0
228 if self._filename == '-':
229 self._filename = '<stdin>'
230 self._file = sys.stdin
231 self._isstdin = 1
232 else:
233 if self._inplace:
234 self._backupfilename = (
235 self._filename + (self._backup or ".bak"))
236 try: os.unlink(self._backupfilename)
237 except os.error: pass
Guido van Rossumdcb85831999-10-18 21:41:43 +0000238 # The next few lines may raise IOError
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000239 os.rename(self._filename, self._backupfilename)
240 self._file = open(self._backupfilename, "r")
Guido van Rossumdcb85831999-10-18 21:41:43 +0000241 try:
242 perm = os.fstat(self._file.fileno())[stat.ST_MODE]
243 except:
244 self._output = open(self._filename, "w")
245 else:
246 fd = os.open(self._filename,
247 os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
248 perm)
249 self._output = os.fdopen(fd, "w")
250 try:
251 os.chmod(self._filename, perm)
252 except:
253 pass
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 self._savestdout = sys.stdout
255 sys.stdout = self._output
256 else:
257 # This may raise IOError
258 self._file = open(self._filename, "r")
Guido van Rossum47955242001-01-05 14:44:39 +0000259 self._buffer = self._file.readlines(self._bufsize)
260 self._bufindex = 0
261 if not self._buffer:
262 self.nextfile()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000263 # Recursive call
264 return self.readline()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000265
266 def filename(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000267 return self._filename
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000268
269 def lineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000270 return self._lineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000271
272 def filelineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000273 return self._filelineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000274
275 def isfirstline(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000276 return self._filelineno == 1
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000277
278 def isstdin(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000279 return self._isstdin
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000280
281def _test():
282 import getopt
283 inplace = 0
284 backup = 0
285 opts, args = getopt.getopt(sys.argv[1:], "ib:")
286 for o, a in opts:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000287 if o == '-i': inplace = 1
288 if o == '-b': backup = a
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000289 for line in input(args, inplace=inplace, backup=backup):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000290 if line[-1:] == '\n': line = line[:-1]
291 if line[-1:] == '\r': line = line[:-1]
292 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
293 isfirstline() and "*" or "", line)
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000294 print "%d: %s[%d]" % (lineno(), filename(), filelineno())
295
296if __name__ == '__main__':
297 _test()