blob: cff9a068224705e5cb79c993a35e8cb090b373e3 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Tim Petersad147202000-10-05 03:48:38 +00002
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Christian Heimes7131fd92008-02-19 14:21:46 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000012
13Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000014Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000016
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000017If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
Tim Petersad147202000-10-05 03:48:38 +000021
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000022You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000031
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
Christian Heimes7131fd92008-02-19 14:21:46 +000035
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000040"""
41
42__version__ = "1"
43
44import tokenize
Florent Xiclunae4a33802010-08-09 12:24:20 +000045import os
46import shutil
Tim Petersad147202000-10-05 03:48:38 +000047import sys
48
Florent Xiclunae4a33802010-08-09 12:24:20 +000049verbose = False
50recurse = False
51dryrun = False
Christian Heimes7131fd92008-02-19 14:21:46 +000052makebackup = True
Tim Petersad147202000-10-05 03:48:38 +000053
Florent Xiclunae4a33802010-08-09 12:24:20 +000054
Skip Montanaro165163f2004-03-27 18:43:56 +000055def usage(msg=None):
Florent Xiclunae4a33802010-08-09 12:24:20 +000056 if msg is None:
57 msg = __doc__
58 print(msg, file=sys.stderr)
59
Skip Montanaro165163f2004-03-27 18:43:56 +000060
Tim Petersad147202000-10-05 03:48:38 +000061def errprint(*args):
Florent Xiclunae4a33802010-08-09 12:24:20 +000062 sys.stderr.write(" ".join(str(arg) for arg in args))
Tim Petersad147202000-10-05 03:48:38 +000063 sys.stderr.write("\n")
64
Florent Xiclunae4a33802010-08-09 12:24:20 +000065
Tim Petersad147202000-10-05 03:48:38 +000066def main():
67 import getopt
Christian Heimes7131fd92008-02-19 14:21:46 +000068 global verbose, recurse, dryrun, makebackup
Tim Petersad147202000-10-05 03:48:38 +000069 try:
Christian Heimes7131fd92008-02-19 14:21:46 +000070 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
71 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Guido van Rossumb940e112007-01-10 16:19:56 +000072 except getopt.error as msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000073 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000074 return
75 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000076 if o in ('-d', '--dryrun'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000077 dryrun = True
Skip Montanaro165163f2004-03-27 18:43:56 +000078 elif o in ('-r', '--recurse'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000079 recurse = True
Christian Heimes7131fd92008-02-19 14:21:46 +000080 elif o in ('-n', '--nobackup'):
81 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000082 elif o in ('-v', '--verbose'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000083 verbose = True
Skip Montanaro165163f2004-03-27 18:43:56 +000084 elif o in ('-h', '--help'):
85 usage()
86 return
Tim Petersad147202000-10-05 03:48:38 +000087 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000088 r = Reindenter(sys.stdin)
89 r.run()
90 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000091 return
92 for arg in args:
93 check(arg)
94
Florent Xiclunae4a33802010-08-09 12:24:20 +000095
Tim Petersad147202000-10-05 03:48:38 +000096def check(file):
97 if os.path.isdir(file) and not os.path.islink(file):
98 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +000099 print("listing directory", file)
Tim Petersad147202000-10-05 03:48:38 +0000100 names = os.listdir(file)
101 for name in names:
102 fullname = os.path.join(file, name)
103 if ((recurse and os.path.isdir(fullname) and
Benjamin Peterson206e3072008-10-19 14:07:49 +0000104 not os.path.islink(fullname) and
105 not os.path.split(fullname)[1].startswith("."))
Tim Petersad147202000-10-05 03:48:38 +0000106 or name.lower().endswith(".py")):
107 check(fullname)
108 return
109
110 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000111 print("checking", file, "...", end=' ')
Tim Petersad147202000-10-05 03:48:38 +0000112 try:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000113 with open(file) as f:
114 r = Reindenter(f)
Guido van Rossumb940e112007-01-10 16:19:56 +0000115 except IOError as msg:
Tim Petersad147202000-10-05 03:48:38 +0000116 errprint("%s: I/O Error: %s" % (file, str(msg)))
117 return
118
Tim Petersad147202000-10-05 03:48:38 +0000119 if r.run():
120 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000121 print("changed.")
Tim Petersad147202000-10-05 03:48:38 +0000122 if dryrun:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000123 print("But this is a dry run, so leaving it alone.")
Tim Petersad147202000-10-05 03:48:38 +0000124 if not dryrun:
125 bak = file + ".bak"
Christian Heimes7131fd92008-02-19 14:21:46 +0000126 if makebackup:
127 shutil.copyfile(file, bak)
128 if verbose:
129 print("backed up", file, "to", bak)
Florent Xiclunae4a33802010-08-09 12:24:20 +0000130 with open(file, "w") as f:
131 r.write(f)
Tim Petersad147202000-10-05 03:48:38 +0000132 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000133 print("wrote new", file)
Christian Heimesada8c3b2008-03-18 18:26:33 +0000134 return True
Tim Petersad147202000-10-05 03:48:38 +0000135 else:
136 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000137 print("unchanged.")
Christian Heimesada8c3b2008-03-18 18:26:33 +0000138 return False
Tim Petersad147202000-10-05 03:48:38 +0000139
Florent Xiclunae4a33802010-08-09 12:24:20 +0000140
Tim Petersba001a02001-10-04 19:44:10 +0000141def _rstrip(line, JUNK='\n \t'):
142 """Return line stripped of trailing spaces, tabs, newlines.
143
144 Note that line.rstrip() instead also strips sundry control characters,
145 but at least one known Emacs user expects to keep junk like that, not
146 mentioning Barry by name or anything <wink>.
147 """
148
149 i = len(line)
Florent Xiclunae4a33802010-08-09 12:24:20 +0000150 while i > 0 and line[i - 1] in JUNK:
Tim Petersba001a02001-10-04 19:44:10 +0000151 i -= 1
152 return line[:i]
153
Florent Xiclunae4a33802010-08-09 12:24:20 +0000154
Tim Petersad147202000-10-05 03:48:38 +0000155class Reindenter:
156
157 def __init__(self, f):
158 self.find_stmt = 1 # next token begins a fresh stmt?
159 self.level = 0 # current indent level
160
161 # Raw file lines.
162 self.raw = f.readlines()
163
164 # File lines, rstripped & tab-expanded. Dummy at start is so
165 # that we can use tokenize's 1-based line numbering easily.
166 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000167 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000168 for line in self.raw]
169 self.lines.insert(0, None)
170 self.index = 1 # index into self.lines of next line
171
172 # List of (lineno, indentlevel) pairs, one for each stmt and
173 # comment line. indentlevel is -1 for comment lines, as a
174 # signal that tokenize doesn't know what to do about them;
175 # indeed, they're our headache!
176 self.stats = []
177
178 def run(self):
Trent Nelson428de652008-03-18 22:41:35 +0000179 tokens = tokenize.generate_tokens(self.getline)
180 for _token in tokens:
181 self.tokeneater(*_token)
Tim Petersad147202000-10-05 03:48:38 +0000182 # Remove trailing empty lines.
183 lines = self.lines
184 while lines and lines[-1] == "\n":
185 lines.pop()
186 # Sentinel.
187 stats = self.stats
188 stats.append((len(lines), 0))
189 # Map count of leading spaces to # we want.
190 have2want = {}
191 # Program after transformation.
192 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000193 # Copy over initial empty lines -- there's nothing to do until
194 # we see a line with *something* on it.
195 i = stats[0][0]
196 after.extend(lines[1:i])
Florent Xiclunae4a33802010-08-09 12:24:20 +0000197 for i in range(len(stats) - 1):
Tim Petersad147202000-10-05 03:48:38 +0000198 thisstmt, thislevel = stats[i]
Florent Xiclunae4a33802010-08-09 12:24:20 +0000199 nextstmt = stats[i + 1][0]
Tim Petersad147202000-10-05 03:48:38 +0000200 have = getlspace(lines[thisstmt])
201 want = thislevel * 4
202 if want < 0:
203 # A comment line.
204 if have:
205 # An indented comment line. If we saw the same
206 # indentation before, reuse what it most recently
207 # mapped to.
208 want = have2want.get(have, -1)
209 if want < 0:
210 # Then it probably belongs to the next real stmt.
Florent Xiclunae4a33802010-08-09 12:24:20 +0000211 for j in range(i + 1, len(stats) - 1):
Tim Petersad147202000-10-05 03:48:38 +0000212 jline, jlevel = stats[j]
213 if jlevel >= 0:
214 if have == getlspace(lines[jline]):
215 want = jlevel * 4
216 break
217 if want < 0: # Maybe it's a hanging
218 # comment like this one,
219 # in which case we should shift it like its base
220 # line got shifted.
Florent Xiclunae4a33802010-08-09 12:24:20 +0000221 for j in range(i - 1, -1, -1):
Tim Petersad147202000-10-05 03:48:38 +0000222 jline, jlevel = stats[j]
223 if jlevel >= 0:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000224 want = have + (getlspace(after[jline - 1]) -
225 getlspace(lines[jline]))
Tim Petersad147202000-10-05 03:48:38 +0000226 break
227 if want < 0:
228 # Still no luck -- leave it alone.
229 want = have
230 else:
231 want = 0
232 assert want >= 0
233 have2want[have] = want
234 diff = want - have
235 if diff == 0 or have == 0:
236 after.extend(lines[thisstmt:nextstmt])
237 else:
238 for line in lines[thisstmt:nextstmt]:
239 if diff > 0:
240 if line == "\n":
241 after.append(line)
242 else:
243 after.append(" " * diff + line)
244 else:
245 remove = min(getlspace(line), -diff)
246 after.append(line[remove:])
247 return self.raw != self.after
248
249 def write(self, f):
250 f.writelines(self.after)
251
252 # Line-getter for tokenize.
253 def getline(self):
254 if self.index >= len(self.lines):
255 line = ""
256 else:
257 line = self.lines[self.index]
258 self.index += 1
259 return line
260
261 # Line-eater for tokenize.
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000262 def tokeneater(self, type, token, slinecol, end, line,
Tim Petersad147202000-10-05 03:48:38 +0000263 INDENT=tokenize.INDENT,
264 DEDENT=tokenize.DEDENT,
265 NEWLINE=tokenize.NEWLINE,
266 COMMENT=tokenize.COMMENT,
267 NL=tokenize.NL):
268
269 if type == NEWLINE:
270 # A program statement, or ENDMARKER, will eventually follow,
271 # after some (possibly empty) run of tokens of the form
272 # (NL | COMMENT)* (INDENT | DEDENT+)?
273 self.find_stmt = 1
274
275 elif type == INDENT:
276 self.find_stmt = 1
277 self.level += 1
278
279 elif type == DEDENT:
280 self.find_stmt = 1
281 self.level -= 1
282
283 elif type == COMMENT:
284 if self.find_stmt:
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000285 self.stats.append((slinecol[0], -1))
Tim Petersad147202000-10-05 03:48:38 +0000286 # but we're still looking for a new stmt, so leave
287 # find_stmt alone
288
289 elif type == NL:
290 pass
291
292 elif self.find_stmt:
293 # This is the first "real token" following a NEWLINE, so it
294 # must be the first token of the next program statement, or an
295 # ENDMARKER.
296 self.find_stmt = 0
297 if line: # not endmarker
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000298 self.stats.append((slinecol[0], self.level))
Tim Petersad147202000-10-05 03:48:38 +0000299
Florent Xiclunae4a33802010-08-09 12:24:20 +0000300
Tim Petersad147202000-10-05 03:48:38 +0000301# Count number of leading blanks.
302def getlspace(line):
303 i, n = 0, len(line)
304 while i < n and line[i] == " ":
305 i += 1
306 return i
307
Florent Xiclunae4a33802010-08-09 12:24:20 +0000308
Tim Petersad147202000-10-05 03:48:38 +0000309if __name__ == '__main__':
310 main()