blob: 3e9affb9c511b924f117244ee21a11db417eb7b7 [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Facundo Batistaf88a0772008-02-17 16:21:13 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000012
13Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000014Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000016
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000017If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
Tim Petersad147202000-10-05 03:48:38 +000021
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000022You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000031
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
Facundo Batistaf88a0772008-02-17 16:21:13 +000035
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000040"""
41
42__version__ = "1"
43
44import tokenize
Facundo Batistaf88a0772008-02-17 16:21:13 +000045import os, shutil
Tim Petersad147202000-10-05 03:48:38 +000046import sys
47
Facundo Batistaf88a0772008-02-17 16:21:13 +000048verbose = 0
49recurse = 0
50dryrun = 0
51makebackup = True
Tim Petersad147202000-10-05 03:48:38 +000052
Skip Montanaro165163f2004-03-27 18:43:56 +000053def usage(msg=None):
54 if msg is not None:
55 print >> sys.stderr, msg
56 print >> sys.stderr, __doc__
57
Tim Petersad147202000-10-05 03:48:38 +000058def errprint(*args):
59 sep = ""
60 for arg in args:
61 sys.stderr.write(sep + str(arg))
62 sep = " "
63 sys.stderr.write("\n")
64
65def main():
66 import getopt
Facundo Batistaf88a0772008-02-17 16:21:13 +000067 global verbose, recurse, dryrun, makebackup
Tim Petersad147202000-10-05 03:48:38 +000068 try:
Facundo Batistaf88a0772008-02-17 16:21:13 +000069 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
70 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Tim Petersad147202000-10-05 03:48:38 +000071 except getopt.error, msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000072 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000073 return
74 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000075 if o in ('-d', '--dryrun'):
Tim Petersad147202000-10-05 03:48:38 +000076 dryrun += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000077 elif o in ('-r', '--recurse'):
Tim Petersad147202000-10-05 03:48:38 +000078 recurse += 1
Facundo Batistaf88a0772008-02-17 16:21:13 +000079 elif o in ('-n', '--nobackup'):
80 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000081 elif o in ('-v', '--verbose'):
Tim Petersad147202000-10-05 03:48:38 +000082 verbose += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000083 elif o in ('-h', '--help'):
84 usage()
85 return
Tim Petersad147202000-10-05 03:48:38 +000086 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000087 r = Reindenter(sys.stdin)
88 r.run()
89 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000090 return
91 for arg in args:
92 check(arg)
93
94def check(file):
95 if os.path.isdir(file) and not os.path.islink(file):
96 if verbose:
97 print "listing directory", file
98 names = os.listdir(file)
99 for name in names:
100 fullname = os.path.join(file, name)
101 if ((recurse and os.path.isdir(fullname) and
Georg Brandl4aef7032008-11-07 08:56:27 +0000102 not os.path.islink(fullname) and
103 not os.path.split(fullname)[1].startswith("."))
Tim Petersad147202000-10-05 03:48:38 +0000104 or name.lower().endswith(".py")):
105 check(fullname)
106 return
107
108 if verbose:
109 print "checking", file, "...",
110 try:
111 f = open(file)
112 except IOError, msg:
113 errprint("%s: I/O Error: %s" % (file, str(msg)))
114 return
115
116 r = Reindenter(f)
117 f.close()
118 if r.run():
119 if verbose:
120 print "changed."
121 if dryrun:
122 print "But this is a dry run, so leaving it alone."
123 if not dryrun:
124 bak = file + ".bak"
Facundo Batistaf88a0772008-02-17 16:21:13 +0000125 if makebackup:
126 shutil.copyfile(file, bak)
127 if verbose:
128 print "backed up", file, "to", bak
Tim Petersad147202000-10-05 03:48:38 +0000129 f = open(file, "w")
130 r.write(f)
131 f.close()
132 if verbose:
133 print "wrote new", file
Brett Cannona8b09fd2008-03-18 17:25:13 +0000134 return True
Tim Petersad147202000-10-05 03:48:38 +0000135 else:
136 if verbose:
137 print "unchanged."
Brett Cannona8b09fd2008-03-18 17:25:13 +0000138 return False
Tim Petersad147202000-10-05 03:48:38 +0000139
Tim Petersba001a02001-10-04 19:44:10 +0000140def _rstrip(line, JUNK='\n \t'):
141 """Return line stripped of trailing spaces, tabs, newlines.
142
143 Note that line.rstrip() instead also strips sundry control characters,
144 but at least one known Emacs user expects to keep junk like that, not
145 mentioning Barry by name or anything <wink>.
146 """
147
148 i = len(line)
149 while i > 0 and line[i-1] in JUNK:
150 i -= 1
151 return line[:i]
152
Tim Petersad147202000-10-05 03:48:38 +0000153class Reindenter:
154
155 def __init__(self, f):
156 self.find_stmt = 1 # next token begins a fresh stmt?
157 self.level = 0 # current indent level
158
159 # Raw file lines.
160 self.raw = f.readlines()
161
162 # File lines, rstripped & tab-expanded. Dummy at start is so
163 # that we can use tokenize's 1-based line numbering easily.
164 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000165 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000166 for line in self.raw]
167 self.lines.insert(0, None)
168 self.index = 1 # index into self.lines of next line
169
170 # List of (lineno, indentlevel) pairs, one for each stmt and
171 # comment line. indentlevel is -1 for comment lines, as a
172 # signal that tokenize doesn't know what to do about them;
173 # indeed, they're our headache!
174 self.stats = []
175
176 def run(self):
177 tokenize.tokenize(self.getline, self.tokeneater)
178 # Remove trailing empty lines.
179 lines = self.lines
180 while lines and lines[-1] == "\n":
181 lines.pop()
182 # Sentinel.
183 stats = self.stats
184 stats.append((len(lines), 0))
185 # Map count of leading spaces to # we want.
186 have2want = {}
187 # Program after transformation.
188 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000189 # Copy over initial empty lines -- there's nothing to do until
190 # we see a line with *something* on it.
191 i = stats[0][0]
192 after.extend(lines[1:i])
Tim Petersad147202000-10-05 03:48:38 +0000193 for i in range(len(stats)-1):
194 thisstmt, thislevel = stats[i]
195 nextstmt = stats[i+1][0]
196 have = getlspace(lines[thisstmt])
197 want = thislevel * 4
198 if want < 0:
199 # A comment line.
200 if have:
201 # An indented comment line. If we saw the same
202 # indentation before, reuse what it most recently
203 # mapped to.
204 want = have2want.get(have, -1)
205 if want < 0:
206 # Then it probably belongs to the next real stmt.
207 for j in xrange(i+1, len(stats)-1):
208 jline, jlevel = stats[j]
209 if jlevel >= 0:
210 if have == getlspace(lines[jline]):
211 want = jlevel * 4
212 break
213 if want < 0: # Maybe it's a hanging
214 # comment like this one,
215 # in which case we should shift it like its base
216 # line got shifted.
217 for j in xrange(i-1, -1, -1):
218 jline, jlevel = stats[j]
219 if jlevel >= 0:
220 want = have + getlspace(after[jline-1]) - \
221 getlspace(lines[jline])
222 break
223 if want < 0:
224 # Still no luck -- leave it alone.
225 want = have
226 else:
227 want = 0
228 assert want >= 0
229 have2want[have] = want
230 diff = want - have
231 if diff == 0 or have == 0:
232 after.extend(lines[thisstmt:nextstmt])
233 else:
234 for line in lines[thisstmt:nextstmt]:
235 if diff > 0:
236 if line == "\n":
237 after.append(line)
238 else:
239 after.append(" " * diff + line)
240 else:
241 remove = min(getlspace(line), -diff)
242 after.append(line[remove:])
243 return self.raw != self.after
244
245 def write(self, f):
246 f.writelines(self.after)
247
248 # Line-getter for tokenize.
249 def getline(self):
250 if self.index >= len(self.lines):
251 line = ""
252 else:
253 line = self.lines[self.index]
254 self.index += 1
255 return line
256
257 # Line-eater for tokenize.
258 def tokeneater(self, type, token, (sline, scol), end, line,
259 INDENT=tokenize.INDENT,
260 DEDENT=tokenize.DEDENT,
261 NEWLINE=tokenize.NEWLINE,
262 COMMENT=tokenize.COMMENT,
263 NL=tokenize.NL):
264
265 if type == NEWLINE:
266 # A program statement, or ENDMARKER, will eventually follow,
267 # after some (possibly empty) run of tokens of the form
268 # (NL | COMMENT)* (INDENT | DEDENT+)?
269 self.find_stmt = 1
270
271 elif type == INDENT:
272 self.find_stmt = 1
273 self.level += 1
274
275 elif type == DEDENT:
276 self.find_stmt = 1
277 self.level -= 1
278
279 elif type == COMMENT:
280 if self.find_stmt:
281 self.stats.append((sline, -1))
282 # but we're still looking for a new stmt, so leave
283 # find_stmt alone
284
285 elif type == NL:
286 pass
287
288 elif self.find_stmt:
289 # This is the first "real token" following a NEWLINE, so it
290 # must be the first token of the next program statement, or an
291 # ENDMARKER.
292 self.find_stmt = 0
293 if line: # not endmarker
294 self.stats.append((sline, self.level))
295
296# Count number of leading blanks.
297def getlspace(line):
298 i, n = 0, len(line)
299 while i < n and line[i] == " ":
300 i += 1
301 return i
302
303if __name__ == '__main__':
304 main()