blob: b18993b0c25b1c6946c734f9f1b458dda3ddcffe [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Tim Petersad147202000-10-05 03:48:38 +00002
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Christian Heimes7131fd92008-02-19 14:21:46 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000012
13Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000014Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000016
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000017If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
Tim Petersad147202000-10-05 03:48:38 +000021
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000022You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000031
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
Christian Heimes7131fd92008-02-19 14:21:46 +000035
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
Jason R. Coombs76748b72011-07-26 11:18:40 -040038user/group and permissions could leave the backup file more readable than
Christian Heimes7131fd92008-02-19 14:21:46 +000039you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000040"""
41
42__version__ = "1"
43
44import tokenize
Florent Xiclunae4a33802010-08-09 12:24:20 +000045import os
46import shutil
Tim Petersad147202000-10-05 03:48:38 +000047import sys
48
Florent Xiclunae4a33802010-08-09 12:24:20 +000049verbose = False
50recurse = False
51dryrun = False
Christian Heimes7131fd92008-02-19 14:21:46 +000052makebackup = True
Tim Petersad147202000-10-05 03:48:38 +000053
Florent Xiclunae4a33802010-08-09 12:24:20 +000054
Skip Montanaro165163f2004-03-27 18:43:56 +000055def usage(msg=None):
Florent Xiclunae4a33802010-08-09 12:24:20 +000056 if msg is None:
57 msg = __doc__
58 print(msg, file=sys.stderr)
59
Skip Montanaro165163f2004-03-27 18:43:56 +000060
Tim Petersad147202000-10-05 03:48:38 +000061def errprint(*args):
Florent Xiclunae4a33802010-08-09 12:24:20 +000062 sys.stderr.write(" ".join(str(arg) for arg in args))
Tim Petersad147202000-10-05 03:48:38 +000063 sys.stderr.write("\n")
64
Florent Xiclunae4a33802010-08-09 12:24:20 +000065
Tim Petersad147202000-10-05 03:48:38 +000066def main():
67 import getopt
Christian Heimes7131fd92008-02-19 14:21:46 +000068 global verbose, recurse, dryrun, makebackup
Tim Petersad147202000-10-05 03:48:38 +000069 try:
Christian Heimes7131fd92008-02-19 14:21:46 +000070 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
71 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Guido van Rossumb940e112007-01-10 16:19:56 +000072 except getopt.error as msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000073 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000074 return
75 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000076 if o in ('-d', '--dryrun'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000077 dryrun = True
Skip Montanaro165163f2004-03-27 18:43:56 +000078 elif o in ('-r', '--recurse'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000079 recurse = True
Christian Heimes7131fd92008-02-19 14:21:46 +000080 elif o in ('-n', '--nobackup'):
81 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000082 elif o in ('-v', '--verbose'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000083 verbose = True
Skip Montanaro165163f2004-03-27 18:43:56 +000084 elif o in ('-h', '--help'):
85 usage()
86 return
Tim Petersad147202000-10-05 03:48:38 +000087 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000088 r = Reindenter(sys.stdin)
89 r.run()
90 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000091 return
92 for arg in args:
93 check(arg)
94
Florent Xiclunae4a33802010-08-09 12:24:20 +000095
Tim Petersad147202000-10-05 03:48:38 +000096def check(file):
97 if os.path.isdir(file) and not os.path.islink(file):
98 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +000099 print("listing directory", file)
Tim Petersad147202000-10-05 03:48:38 +0000100 names = os.listdir(file)
101 for name in names:
102 fullname = os.path.join(file, name)
103 if ((recurse and os.path.isdir(fullname) and
Benjamin Peterson206e3072008-10-19 14:07:49 +0000104 not os.path.islink(fullname) and
105 not os.path.split(fullname)[1].startswith("."))
Tim Petersad147202000-10-05 03:48:38 +0000106 or name.lower().endswith(".py")):
107 check(fullname)
108 return
109
110 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000111 print("checking", file, "...", end=' ')
Jason R. Coombs76748b72011-07-26 11:18:40 -0400112 with open(file, 'rb') as f:
Alexander Belopolsky4a98e3b2010-10-18 14:43:38 +0000113 encoding, _ = tokenize.detect_encoding(f.readline)
Tim Petersad147202000-10-05 03:48:38 +0000114 try:
Alexander Belopolsky4a98e3b2010-10-18 14:43:38 +0000115 with open(file, encoding=encoding) as f:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000116 r = Reindenter(f)
Guido van Rossumb940e112007-01-10 16:19:56 +0000117 except IOError as msg:
Tim Petersad147202000-10-05 03:48:38 +0000118 errprint("%s: I/O Error: %s" % (file, str(msg)))
119 return
120
Jason R. Coombs76748b72011-07-26 11:18:40 -0400121 newline = r.newlines
122 if isinstance(newline, tuple):
123 errprint("%s: mixed newlines detected; cannot process file" % file)
124 return
125
Tim Petersad147202000-10-05 03:48:38 +0000126 if r.run():
127 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000128 print("changed.")
Tim Petersad147202000-10-05 03:48:38 +0000129 if dryrun:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000130 print("But this is a dry run, so leaving it alone.")
Tim Petersad147202000-10-05 03:48:38 +0000131 if not dryrun:
132 bak = file + ".bak"
Christian Heimes7131fd92008-02-19 14:21:46 +0000133 if makebackup:
134 shutil.copyfile(file, bak)
135 if verbose:
136 print("backed up", file, "to", bak)
Jason R. Coombs76748b72011-07-26 11:18:40 -0400137 with open(file, "w", encoding=encoding, newline=newline) as f:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000138 r.write(f)
Tim Petersad147202000-10-05 03:48:38 +0000139 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000140 print("wrote new", file)
Christian Heimesada8c3b2008-03-18 18:26:33 +0000141 return True
Tim Petersad147202000-10-05 03:48:38 +0000142 else:
143 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000144 print("unchanged.")
Christian Heimesada8c3b2008-03-18 18:26:33 +0000145 return False
Tim Petersad147202000-10-05 03:48:38 +0000146
Florent Xiclunae4a33802010-08-09 12:24:20 +0000147
Tim Petersba001a02001-10-04 19:44:10 +0000148def _rstrip(line, JUNK='\n \t'):
149 """Return line stripped of trailing spaces, tabs, newlines.
150
151 Note that line.rstrip() instead also strips sundry control characters,
152 but at least one known Emacs user expects to keep junk like that, not
153 mentioning Barry by name or anything <wink>.
154 """
155
156 i = len(line)
Florent Xiclunae4a33802010-08-09 12:24:20 +0000157 while i > 0 and line[i - 1] in JUNK:
Tim Petersba001a02001-10-04 19:44:10 +0000158 i -= 1
159 return line[:i]
160
Florent Xiclunae4a33802010-08-09 12:24:20 +0000161
Tim Petersad147202000-10-05 03:48:38 +0000162class Reindenter:
163
164 def __init__(self, f):
165 self.find_stmt = 1 # next token begins a fresh stmt?
166 self.level = 0 # current indent level
167
168 # Raw file lines.
169 self.raw = f.readlines()
170
171 # File lines, rstripped & tab-expanded. Dummy at start is so
172 # that we can use tokenize's 1-based line numbering easily.
173 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000174 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000175 for line in self.raw]
176 self.lines.insert(0, None)
177 self.index = 1 # index into self.lines of next line
178
179 # List of (lineno, indentlevel) pairs, one for each stmt and
180 # comment line. indentlevel is -1 for comment lines, as a
181 # signal that tokenize doesn't know what to do about them;
182 # indeed, they're our headache!
183 self.stats = []
184
Jason R. Coombs76748b72011-07-26 11:18:40 -0400185 # Save the newlines found in the file so they can be used to
186 # create output without mutating the newlines.
187 self.newlines = f.newlines
188
Tim Petersad147202000-10-05 03:48:38 +0000189 def run(self):
Trent Nelson428de652008-03-18 22:41:35 +0000190 tokens = tokenize.generate_tokens(self.getline)
191 for _token in tokens:
192 self.tokeneater(*_token)
Tim Petersad147202000-10-05 03:48:38 +0000193 # Remove trailing empty lines.
194 lines = self.lines
195 while lines and lines[-1] == "\n":
196 lines.pop()
197 # Sentinel.
198 stats = self.stats
199 stats.append((len(lines), 0))
200 # Map count of leading spaces to # we want.
201 have2want = {}
202 # Program after transformation.
203 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000204 # Copy over initial empty lines -- there's nothing to do until
205 # we see a line with *something* on it.
206 i = stats[0][0]
207 after.extend(lines[1:i])
Florent Xiclunae4a33802010-08-09 12:24:20 +0000208 for i in range(len(stats) - 1):
Tim Petersad147202000-10-05 03:48:38 +0000209 thisstmt, thislevel = stats[i]
Florent Xiclunae4a33802010-08-09 12:24:20 +0000210 nextstmt = stats[i + 1][0]
Tim Petersad147202000-10-05 03:48:38 +0000211 have = getlspace(lines[thisstmt])
212 want = thislevel * 4
213 if want < 0:
214 # A comment line.
215 if have:
216 # An indented comment line. If we saw the same
217 # indentation before, reuse what it most recently
218 # mapped to.
219 want = have2want.get(have, -1)
220 if want < 0:
221 # Then it probably belongs to the next real stmt.
Florent Xiclunae4a33802010-08-09 12:24:20 +0000222 for j in range(i + 1, len(stats) - 1):
Tim Petersad147202000-10-05 03:48:38 +0000223 jline, jlevel = stats[j]
224 if jlevel >= 0:
225 if have == getlspace(lines[jline]):
226 want = jlevel * 4
227 break
228 if want < 0: # Maybe it's a hanging
229 # comment like this one,
230 # in which case we should shift it like its base
231 # line got shifted.
Florent Xiclunae4a33802010-08-09 12:24:20 +0000232 for j in range(i - 1, -1, -1):
Tim Petersad147202000-10-05 03:48:38 +0000233 jline, jlevel = stats[j]
234 if jlevel >= 0:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000235 want = have + (getlspace(after[jline - 1]) -
236 getlspace(lines[jline]))
Tim Petersad147202000-10-05 03:48:38 +0000237 break
238 if want < 0:
239 # Still no luck -- leave it alone.
240 want = have
241 else:
242 want = 0
243 assert want >= 0
244 have2want[have] = want
245 diff = want - have
246 if diff == 0 or have == 0:
247 after.extend(lines[thisstmt:nextstmt])
248 else:
249 for line in lines[thisstmt:nextstmt]:
250 if diff > 0:
251 if line == "\n":
252 after.append(line)
253 else:
254 after.append(" " * diff + line)
255 else:
256 remove = min(getlspace(line), -diff)
257 after.append(line[remove:])
258 return self.raw != self.after
259
260 def write(self, f):
261 f.writelines(self.after)
262
263 # Line-getter for tokenize.
264 def getline(self):
265 if self.index >= len(self.lines):
266 line = ""
267 else:
268 line = self.lines[self.index]
269 self.index += 1
270 return line
271
272 # Line-eater for tokenize.
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000273 def tokeneater(self, type, token, slinecol, end, line,
Tim Petersad147202000-10-05 03:48:38 +0000274 INDENT=tokenize.INDENT,
275 DEDENT=tokenize.DEDENT,
276 NEWLINE=tokenize.NEWLINE,
277 COMMENT=tokenize.COMMENT,
278 NL=tokenize.NL):
279
280 if type == NEWLINE:
281 # A program statement, or ENDMARKER, will eventually follow,
282 # after some (possibly empty) run of tokens of the form
283 # (NL | COMMENT)* (INDENT | DEDENT+)?
284 self.find_stmt = 1
285
286 elif type == INDENT:
287 self.find_stmt = 1
288 self.level += 1
289
290 elif type == DEDENT:
291 self.find_stmt = 1
292 self.level -= 1
293
294 elif type == COMMENT:
295 if self.find_stmt:
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000296 self.stats.append((slinecol[0], -1))
Tim Petersad147202000-10-05 03:48:38 +0000297 # but we're still looking for a new stmt, so leave
298 # find_stmt alone
299
300 elif type == NL:
301 pass
302
303 elif self.find_stmt:
304 # This is the first "real token" following a NEWLINE, so it
305 # must be the first token of the next program statement, or an
306 # ENDMARKER.
307 self.find_stmt = 0
308 if line: # not endmarker
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000309 self.stats.append((slinecol[0], self.level))
Tim Petersad147202000-10-05 03:48:38 +0000310
Florent Xiclunae4a33802010-08-09 12:24:20 +0000311
Tim Petersad147202000-10-05 03:48:38 +0000312# Count number of leading blanks.
313def getlspace(line):
314 i, n = 0, len(line)
315 while i < n and line[i] == " ":
316 i += 1
317 return i
318
Florent Xiclunae4a33802010-08-09 12:24:20 +0000319
Tim Petersad147202000-10-05 03:48:38 +0000320if __name__ == '__main__':
321 main()