blob: d1b7617e2500d8cc7310cd8f87ade83d521cbe79 [file] [log] [blame]
Guido van Rossum7d5b99d1997-11-21 17:12:59 +00001"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5 import fileinput
6 for line in fileinput.input():
7 process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty. If a filename is '-' it
11is also replaced by sys.stdin. To specify an alternative list of
12filenames, pass it as the argument to input(). A single file name is
13also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin. Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read. Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect. After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode. If an I/O error occurs during
32opening or reading a file, the IOError exception is raised.
33
34If sys.stdin is used more than once, the second and further use will
35return no lines, except perhaps for interactive use, or if it has been
36explicitly reset (e.g. using sys.stdin.seek(0)).
37
38Empty files are opened and immediately closed; the only time their
39presence in the list of filenames is noticeable at all is when the
40last file opened is empty.
41
42It is possible that the last line of a file doesn't end in a newline
43character; otherwise lines are returned including the trailing
44newline.
45
46Class FileInput is the implementation; its methods filename(),
47lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
48correspond to the functions in the module. In addition it has a
49readline() method which returns the next input line, and a
50__getitem__() method which implements the sequence behavior. The
51sequence must be accessed in strictly sequential order; sequence
52access and readline() cannot be mixed.
53
54Optional in-place filtering: if the keyword argument inplace=1 is
55passed to input() or to the FileInput constructor, the file is moved
56to a backup file and standard output is directed to the input file.
57This makes it possible to write a filter that rewrites its input file
58in place. If the keyword argument backup=".<some extension>" is also
59given, it specifies the extension for the backup file, and the backup
60file remains around; by default, the extension is ".bak" and it is
61deleted when the output file is closed. In-place filtering is
62disabled when standard input is read. XXX The current implementation
63does not work for MS-DOS 8+3 filesystems.
64
65XXX Possible additions:
66
67- optional getopt argument processing
68- specify open mode ('r' or 'rb')
69- specify buffer size
70- fileno()
71- isatty()
72- read(), read(size), even readlines()
73
74"""
75
Guido van Rossumdcb85831999-10-18 21:41:43 +000076import sys, os, stat
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000077
78_state = None
79
Guido van Rossum2516b392000-04-10 17:16:12 +000080def input(files=None, inplace=0, backup=""):
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000081 global _state
82 if _state and _state._file:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000083 raise RuntimeError, "input() already active"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000084 _state = FileInput(files, inplace, backup)
85 return _state
86
87def close():
88 global _state
89 state = _state
90 _state = None
91 if state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000092 state.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000093
94def nextfile():
95 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000096 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +000097 return _state.nextfile()
98
99def filename():
100 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000101 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000102 return _state.filename()
103
104def lineno():
105 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000106 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000107 return _state.lineno()
108
109def filelineno():
110 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000111 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000112 return _state.filelineno()
113
114def isfirstline():
115 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000116 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000117 return _state.isfirstline()
118
119def isstdin():
120 if not _state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000121 raise RuntimeError, "no active input()"
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000122 return _state.isstdin()
123
124class FileInput:
125
Guido van Rossum2516b392000-04-10 17:16:12 +0000126 def __init__(self, files=None, inplace=0, backup=""):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000127 if type(files) == type(''):
128 files = (files,)
129 else:
Guido van Rossum2516b392000-04-10 17:16:12 +0000130 if files is None:
131 files = sys.argv[1:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000132 if not files:
Guido van Rossum2516b392000-04-10 17:16:12 +0000133 files = ('-',)
134 else:
135 files = tuple(files)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000136 self._files = files
137 self._inplace = inplace
138 self._backup = backup
139 self._savestdout = None
140 self._output = None
141 self._filename = None
142 self._lineno = 0
143 self._filelineno = 0
144 self._file = None
145 self._isstdin = 0
Guido van Rossum0aec9fb1998-07-20 15:49:28 +0000146 self._backupfilename = None
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000147
148 def __del__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000149 self.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000150
151 def close(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000152 self.nextfile()
153 self._files = ()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000154
155 def __getitem__(self, i):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000156 if i != self._lineno:
157 raise RuntimeError, "accessing lines out of order"
158 line = self.readline()
159 if not line:
160 raise IndexError, "end of input reached"
161 return line
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000162
163 def nextfile(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000164 savestdout = self._savestdout
165 self._savestdout = 0
166 if savestdout:
167 sys.stdout = savestdout
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000168
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000169 output = self._output
170 self._output = 0
171 if output:
172 output.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000173
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000174 file = self._file
175 self._file = 0
176 if file and not self._isstdin:
177 file.close()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000178
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000179 backupfilename = self._backupfilename
180 self._backupfilename = 0
181 if backupfilename and not self._backup:
182 try: os.unlink(backupfilename)
183 except: pass
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000184
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000185 self._isstdin = 0
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000186
187 def readline(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000188 if not self._file:
189 if not self._files:
190 return ""
191 self._filename = self._files[0]
192 self._files = self._files[1:]
193 self._filelineno = 0
194 self._file = None
195 self._isstdin = 0
196 self._backupfilename = 0
197 if self._filename == '-':
198 self._filename = '<stdin>'
199 self._file = sys.stdin
200 self._isstdin = 1
201 else:
202 if self._inplace:
203 self._backupfilename = (
204 self._filename + (self._backup or ".bak"))
205 try: os.unlink(self._backupfilename)
206 except os.error: pass
Guido van Rossumdcb85831999-10-18 21:41:43 +0000207 # The next few lines may raise IOError
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000208 os.rename(self._filename, self._backupfilename)
209 self._file = open(self._backupfilename, "r")
Guido van Rossumdcb85831999-10-18 21:41:43 +0000210 try:
211 perm = os.fstat(self._file.fileno())[stat.ST_MODE]
212 except:
213 self._output = open(self._filename, "w")
214 else:
215 fd = os.open(self._filename,
216 os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
217 perm)
218 self._output = os.fdopen(fd, "w")
219 try:
220 os.chmod(self._filename, perm)
221 except:
222 pass
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000223 self._savestdout = sys.stdout
224 sys.stdout = self._output
225 else:
226 # This may raise IOError
227 self._file = open(self._filename, "r")
228 line = self._file.readline()
229 if line:
230 self._lineno = self._lineno + 1
231 self._filelineno = self._filelineno + 1
232 return line
233 self.nextfile()
234 # Recursive call
235 return self.readline()
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000236
237 def filename(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 return self._filename
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000239
240 def lineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000241 return self._lineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000242
243 def filelineno(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 return self._filelineno
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000245
246 def isfirstline(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000247 return self._filelineno == 1
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000248
249 def isstdin(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 return self._isstdin
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000251
252def _test():
253 import getopt
254 inplace = 0
255 backup = 0
256 opts, args = getopt.getopt(sys.argv[1:], "ib:")
257 for o, a in opts:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000258 if o == '-i': inplace = 1
259 if o == '-b': backup = a
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000260 for line in input(args, inplace=inplace, backup=backup):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000261 if line[-1:] == '\n': line = line[:-1]
262 if line[-1:] == '\r': line = line[:-1]
263 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
264 isfirstline() and "*" or "", line)
Guido van Rossum7d5b99d1997-11-21 17:12:59 +0000265 print "%d: %s[%d]" % (lineno(), filename(), filelineno())
266
267if __name__ == '__main__':
268 _test()