blob: f6dadaac5a52060d7f76fafddd4c4d1448ba4246 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Tim Petersad147202000-10-05 03:48:38 +00002
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Christian Heimes7131fd92008-02-19 14:21:46 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
Jason R. Coombs76eec3d2011-07-26 11:38:04 -040011 (--newline) Newline. Specify the newline character to use (CRLF, LF).
12 Default is the same as the original file.
Christian Heimes7131fd92008-02-19 14:21:46 +000013-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000014
15Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000016Also trim excess spaces and tabs from ends of lines, and remove empty lines
17at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000018
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000019If no paths are given on the command line, reindent operates as a filter,
20reading a single source file from standard input and writing the transformed
21source to standard output. In this case, the -d, -r and -v flags are
22ignored.
Tim Petersad147202000-10-05 03:48:38 +000023
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000024You can pass one or more file and/or directory paths. When a directory
25path, all .py files within the directory will be examined, and, if the -r
26option is given, likewise recursively for subdirectories.
27
28If output is not to standard output, reindent overwrites files in place,
29renaming the originals with a .bak extension. If it finds nothing to
30change, the file is left alone. If reindent does change a file, the changed
31file is a fixed-point for future runs (i.e., running reindent on the
32resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000033
34The hard part of reindenting is figuring out what to do with comment
35lines. So long as the input files get a clean bill of health from
36tabnanny.py, reindent should do a good job.
Christian Heimes7131fd92008-02-19 14:21:46 +000037
38The backup file is a copy of the one that is being reindented. The ".bak"
39file is generated with shutil.copy(), but some corner cases regarding
Jason R. Coombs76748b72011-07-26 11:18:40 -040040user/group and permissions could leave the backup file more readable than
Christian Heimes7131fd92008-02-19 14:21:46 +000041you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000042"""
43
44__version__ = "1"
45
46import tokenize
Florent Xiclunae4a33802010-08-09 12:24:20 +000047import os
48import shutil
Tim Petersad147202000-10-05 03:48:38 +000049import sys
50
Florent Xiclunae4a33802010-08-09 12:24:20 +000051verbose = False
52recurse = False
53dryrun = False
Christian Heimes7131fd92008-02-19 14:21:46 +000054makebackup = True
Victor Stinner765531d2013-03-26 01:11:54 +010055# A specified newline to be used in the output (set by --newline option)
Jason R. Coombs47891042011-07-29 09:31:56 -040056spec_newline = None
Tim Petersad147202000-10-05 03:48:38 +000057
Florent Xiclunae4a33802010-08-09 12:24:20 +000058
Skip Montanaro165163f2004-03-27 18:43:56 +000059def usage(msg=None):
Florent Xiclunae4a33802010-08-09 12:24:20 +000060 if msg is None:
61 msg = __doc__
62 print(msg, file=sys.stderr)
63
Skip Montanaro165163f2004-03-27 18:43:56 +000064
Tim Petersad147202000-10-05 03:48:38 +000065def errprint(*args):
Florent Xiclunae4a33802010-08-09 12:24:20 +000066 sys.stderr.write(" ".join(str(arg) for arg in args))
Tim Petersad147202000-10-05 03:48:38 +000067 sys.stderr.write("\n")
68
69def main():
70 import getopt
Jason R. Coombs76eec3d2011-07-26 11:38:04 -040071 global verbose, recurse, dryrun, makebackup, spec_newline
Tim Petersad147202000-10-05 03:48:38 +000072 try:
Christian Heimes7131fd92008-02-19 14:21:46 +000073 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
Jason R. Coombs76eec3d2011-07-26 11:38:04 -040074 ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
Guido van Rossumb940e112007-01-10 16:19:56 +000075 except getopt.error as msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000076 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000077 return
78 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000079 if o in ('-d', '--dryrun'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000080 dryrun = True
Skip Montanaro165163f2004-03-27 18:43:56 +000081 elif o in ('-r', '--recurse'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000082 recurse = True
Christian Heimes7131fd92008-02-19 14:21:46 +000083 elif o in ('-n', '--nobackup'):
84 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000085 elif o in ('-v', '--verbose'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000086 verbose = True
Jason R. Coombs76eec3d2011-07-26 11:38:04 -040087 elif o in ('--newline',):
88 if not a.upper() in ('CRLF', 'LF'):
89 usage()
90 return
91 spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
Skip Montanaro165163f2004-03-27 18:43:56 +000092 elif o in ('-h', '--help'):
93 usage()
94 return
Tim Petersad147202000-10-05 03:48:38 +000095 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000096 r = Reindenter(sys.stdin)
97 r.run()
98 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000099 return
100 for arg in args:
101 check(arg)
102
Florent Xiclunae4a33802010-08-09 12:24:20 +0000103
Tim Petersad147202000-10-05 03:48:38 +0000104def check(file):
105 if os.path.isdir(file) and not os.path.islink(file):
106 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000107 print("listing directory", file)
Tim Petersad147202000-10-05 03:48:38 +0000108 names = os.listdir(file)
109 for name in names:
110 fullname = os.path.join(file, name)
111 if ((recurse and os.path.isdir(fullname) and
Benjamin Peterson206e3072008-10-19 14:07:49 +0000112 not os.path.islink(fullname) and
113 not os.path.split(fullname)[1].startswith("."))
Tim Petersad147202000-10-05 03:48:38 +0000114 or name.lower().endswith(".py")):
115 check(fullname)
116 return
117
118 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000119 print("checking", file, "...", end=' ')
Jason R. Coombs76748b72011-07-26 11:18:40 -0400120 with open(file, 'rb') as f:
Mariatta58f3c9d2017-04-19 22:59:20 -0700121 try:
122 encoding, _ = tokenize.detect_encoding(f.readline)
123 except SyntaxError as se:
124 errprint("%s: SyntaxError: %s" % (file, str(se)))
125 return
Tim Petersad147202000-10-05 03:48:38 +0000126 try:
Alexander Belopolsky4a98e3b2010-10-18 14:43:38 +0000127 with open(file, encoding=encoding) as f:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000128 r = Reindenter(f)
Guido van Rossumb940e112007-01-10 16:19:56 +0000129 except IOError as msg:
Tim Petersad147202000-10-05 03:48:38 +0000130 errprint("%s: I/O Error: %s" % (file, str(msg)))
131 return
132
Jason R. Coombs76eec3d2011-07-26 11:38:04 -0400133 newline = spec_newline if spec_newline else r.newlines
Jason R. Coombs76748b72011-07-26 11:18:40 -0400134 if isinstance(newline, tuple):
Jason R. Coombs76eec3d2011-07-26 11:38:04 -0400135 errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
Jason R. Coombs76748b72011-07-26 11:18:40 -0400136 return
137
Tim Petersad147202000-10-05 03:48:38 +0000138 if r.run():
139 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000140 print("changed.")
Tim Petersad147202000-10-05 03:48:38 +0000141 if dryrun:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000142 print("But this is a dry run, so leaving it alone.")
Tim Petersad147202000-10-05 03:48:38 +0000143 if not dryrun:
144 bak = file + ".bak"
Christian Heimes7131fd92008-02-19 14:21:46 +0000145 if makebackup:
146 shutil.copyfile(file, bak)
147 if verbose:
148 print("backed up", file, "to", bak)
Jason R. Coombs76748b72011-07-26 11:18:40 -0400149 with open(file, "w", encoding=encoding, newline=newline) as f:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000150 r.write(f)
Tim Petersad147202000-10-05 03:48:38 +0000151 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000152 print("wrote new", file)
Christian Heimesada8c3b2008-03-18 18:26:33 +0000153 return True
Tim Petersad147202000-10-05 03:48:38 +0000154 else:
155 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000156 print("unchanged.")
Christian Heimesada8c3b2008-03-18 18:26:33 +0000157 return False
Tim Petersad147202000-10-05 03:48:38 +0000158
Florent Xiclunae4a33802010-08-09 12:24:20 +0000159
Tim Petersba001a02001-10-04 19:44:10 +0000160def _rstrip(line, JUNK='\n \t'):
161 """Return line stripped of trailing spaces, tabs, newlines.
162
163 Note that line.rstrip() instead also strips sundry control characters,
164 but at least one known Emacs user expects to keep junk like that, not
165 mentioning Barry by name or anything <wink>.
166 """
167
168 i = len(line)
Florent Xiclunae4a33802010-08-09 12:24:20 +0000169 while i > 0 and line[i - 1] in JUNK:
Tim Petersba001a02001-10-04 19:44:10 +0000170 i -= 1
171 return line[:i]
172
Florent Xiclunae4a33802010-08-09 12:24:20 +0000173
Tim Petersad147202000-10-05 03:48:38 +0000174class Reindenter:
175
176 def __init__(self, f):
177 self.find_stmt = 1 # next token begins a fresh stmt?
178 self.level = 0 # current indent level
179
180 # Raw file lines.
181 self.raw = f.readlines()
182
183 # File lines, rstripped & tab-expanded. Dummy at start is so
184 # that we can use tokenize's 1-based line numbering easily.
185 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000186 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000187 for line in self.raw]
188 self.lines.insert(0, None)
189 self.index = 1 # index into self.lines of next line
190
191 # List of (lineno, indentlevel) pairs, one for each stmt and
192 # comment line. indentlevel is -1 for comment lines, as a
193 # signal that tokenize doesn't know what to do about them;
194 # indeed, they're our headache!
195 self.stats = []
196
Jason R. Coombs76748b72011-07-26 11:18:40 -0400197 # Save the newlines found in the file so they can be used to
198 # create output without mutating the newlines.
199 self.newlines = f.newlines
200
Tim Petersad147202000-10-05 03:48:38 +0000201 def run(self):
Trent Nelson428de652008-03-18 22:41:35 +0000202 tokens = tokenize.generate_tokens(self.getline)
203 for _token in tokens:
204 self.tokeneater(*_token)
Tim Petersad147202000-10-05 03:48:38 +0000205 # Remove trailing empty lines.
206 lines = self.lines
207 while lines and lines[-1] == "\n":
208 lines.pop()
209 # Sentinel.
210 stats = self.stats
211 stats.append((len(lines), 0))
212 # Map count of leading spaces to # we want.
213 have2want = {}
214 # Program after transformation.
215 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000216 # Copy over initial empty lines -- there's nothing to do until
217 # we see a line with *something* on it.
218 i = stats[0][0]
219 after.extend(lines[1:i])
Florent Xiclunae4a33802010-08-09 12:24:20 +0000220 for i in range(len(stats) - 1):
Tim Petersad147202000-10-05 03:48:38 +0000221 thisstmt, thislevel = stats[i]
Florent Xiclunae4a33802010-08-09 12:24:20 +0000222 nextstmt = stats[i + 1][0]
Tim Petersad147202000-10-05 03:48:38 +0000223 have = getlspace(lines[thisstmt])
224 want = thislevel * 4
225 if want < 0:
226 # A comment line.
227 if have:
228 # An indented comment line. If we saw the same
229 # indentation before, reuse what it most recently
230 # mapped to.
231 want = have2want.get(have, -1)
232 if want < 0:
233 # Then it probably belongs to the next real stmt.
Florent Xiclunae4a33802010-08-09 12:24:20 +0000234 for j in range(i + 1, len(stats) - 1):
Tim Petersad147202000-10-05 03:48:38 +0000235 jline, jlevel = stats[j]
236 if jlevel >= 0:
237 if have == getlspace(lines[jline]):
238 want = jlevel * 4
239 break
240 if want < 0: # Maybe it's a hanging
241 # comment like this one,
242 # in which case we should shift it like its base
243 # line got shifted.
Florent Xiclunae4a33802010-08-09 12:24:20 +0000244 for j in range(i - 1, -1, -1):
Tim Petersad147202000-10-05 03:48:38 +0000245 jline, jlevel = stats[j]
246 if jlevel >= 0:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000247 want = have + (getlspace(after[jline - 1]) -
248 getlspace(lines[jline]))
Tim Petersad147202000-10-05 03:48:38 +0000249 break
250 if want < 0:
251 # Still no luck -- leave it alone.
252 want = have
253 else:
254 want = 0
255 assert want >= 0
256 have2want[have] = want
257 diff = want - have
258 if diff == 0 or have == 0:
259 after.extend(lines[thisstmt:nextstmt])
260 else:
261 for line in lines[thisstmt:nextstmt]:
262 if diff > 0:
263 if line == "\n":
264 after.append(line)
265 else:
266 after.append(" " * diff + line)
267 else:
268 remove = min(getlspace(line), -diff)
269 after.append(line[remove:])
270 return self.raw != self.after
271
272 def write(self, f):
273 f.writelines(self.after)
274
275 # Line-getter for tokenize.
276 def getline(self):
277 if self.index >= len(self.lines):
278 line = ""
279 else:
280 line = self.lines[self.index]
281 self.index += 1
282 return line
283
284 # Line-eater for tokenize.
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000285 def tokeneater(self, type, token, slinecol, end, line,
Tim Petersad147202000-10-05 03:48:38 +0000286 INDENT=tokenize.INDENT,
287 DEDENT=tokenize.DEDENT,
288 NEWLINE=tokenize.NEWLINE,
289 COMMENT=tokenize.COMMENT,
290 NL=tokenize.NL):
291
292 if type == NEWLINE:
293 # A program statement, or ENDMARKER, will eventually follow,
294 # after some (possibly empty) run of tokens of the form
295 # (NL | COMMENT)* (INDENT | DEDENT+)?
296 self.find_stmt = 1
297
298 elif type == INDENT:
299 self.find_stmt = 1
300 self.level += 1
301
302 elif type == DEDENT:
303 self.find_stmt = 1
304 self.level -= 1
305
306 elif type == COMMENT:
307 if self.find_stmt:
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000308 self.stats.append((slinecol[0], -1))
Tim Petersad147202000-10-05 03:48:38 +0000309 # but we're still looking for a new stmt, so leave
310 # find_stmt alone
311
312 elif type == NL:
313 pass
314
315 elif self.find_stmt:
316 # This is the first "real token" following a NEWLINE, so it
317 # must be the first token of the next program statement, or an
318 # ENDMARKER.
319 self.find_stmt = 0
320 if line: # not endmarker
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000321 self.stats.append((slinecol[0], self.level))
Tim Petersad147202000-10-05 03:48:38 +0000322
Florent Xiclunae4a33802010-08-09 12:24:20 +0000323
Tim Petersad147202000-10-05 03:48:38 +0000324# Count number of leading blanks.
325def getlspace(line):
326 i, n = 0, len(line)
327 while i < n and line[i] == " ":
328 i += 1
329 return i
330
Florent Xiclunae4a33802010-08-09 12:24:20 +0000331
Tim Petersad147202000-10-05 03:48:38 +0000332if __name__ == '__main__':
333 main()