blob: bb4152076b7584d6906b7c7322709b5b909a9f4e [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Tim Petersad147202000-10-05 03:48:38 +00002
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Christian Heimes7131fd92008-02-19 14:21:46 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000012
13Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000014Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000016
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000017If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
Tim Petersad147202000-10-05 03:48:38 +000021
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000022You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000031
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
Christian Heimes7131fd92008-02-19 14:21:46 +000035
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000040"""
41
42__version__ = "1"
43
44import tokenize
Florent Xiclunae4a33802010-08-09 12:24:20 +000045import os
46import shutil
Tim Petersad147202000-10-05 03:48:38 +000047import sys
48
Florent Xiclunae4a33802010-08-09 12:24:20 +000049verbose = False
50recurse = False
51dryrun = False
Christian Heimes7131fd92008-02-19 14:21:46 +000052makebackup = True
Tim Petersad147202000-10-05 03:48:38 +000053
Florent Xiclunae4a33802010-08-09 12:24:20 +000054
Skip Montanaro165163f2004-03-27 18:43:56 +000055def usage(msg=None):
Florent Xiclunae4a33802010-08-09 12:24:20 +000056 if msg is None:
57 msg = __doc__
58 print(msg, file=sys.stderr)
59
Skip Montanaro165163f2004-03-27 18:43:56 +000060
Tim Petersad147202000-10-05 03:48:38 +000061def errprint(*args):
Florent Xiclunae4a33802010-08-09 12:24:20 +000062 sys.stderr.write(" ".join(str(arg) for arg in args))
Tim Petersad147202000-10-05 03:48:38 +000063 sys.stderr.write("\n")
64
Florent Xiclunae4a33802010-08-09 12:24:20 +000065
Tim Petersad147202000-10-05 03:48:38 +000066def main():
67 import getopt
Christian Heimes7131fd92008-02-19 14:21:46 +000068 global verbose, recurse, dryrun, makebackup
Tim Petersad147202000-10-05 03:48:38 +000069 try:
Christian Heimes7131fd92008-02-19 14:21:46 +000070 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
71 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Guido van Rossumb940e112007-01-10 16:19:56 +000072 except getopt.error as msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000073 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000074 return
75 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000076 if o in ('-d', '--dryrun'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000077 dryrun = True
Skip Montanaro165163f2004-03-27 18:43:56 +000078 elif o in ('-r', '--recurse'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000079 recurse = True
Christian Heimes7131fd92008-02-19 14:21:46 +000080 elif o in ('-n', '--nobackup'):
81 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000082 elif o in ('-v', '--verbose'):
Florent Xiclunae4a33802010-08-09 12:24:20 +000083 verbose = True
Skip Montanaro165163f2004-03-27 18:43:56 +000084 elif o in ('-h', '--help'):
85 usage()
86 return
Tim Petersad147202000-10-05 03:48:38 +000087 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000088 r = Reindenter(sys.stdin)
89 r.run()
90 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000091 return
92 for arg in args:
93 check(arg)
94
Florent Xiclunae4a33802010-08-09 12:24:20 +000095
Tim Petersad147202000-10-05 03:48:38 +000096def check(file):
97 if os.path.isdir(file) and not os.path.islink(file):
98 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +000099 print("listing directory", file)
Tim Petersad147202000-10-05 03:48:38 +0000100 names = os.listdir(file)
101 for name in names:
102 fullname = os.path.join(file, name)
103 if ((recurse and os.path.isdir(fullname) and
Benjamin Peterson206e3072008-10-19 14:07:49 +0000104 not os.path.islink(fullname) and
105 not os.path.split(fullname)[1].startswith("."))
Tim Petersad147202000-10-05 03:48:38 +0000106 or name.lower().endswith(".py")):
107 check(fullname)
108 return
109
110 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000111 print("checking", file, "...", end=' ')
Alexander Belopolsky4a98e3b2010-10-18 14:43:38 +0000112 with open(file, 'rb') as f:
113 encoding, _ = tokenize.detect_encoding(f.readline)
Tim Petersad147202000-10-05 03:48:38 +0000114 try:
Alexander Belopolsky4a98e3b2010-10-18 14:43:38 +0000115 with open(file, encoding=encoding) as f:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000116 r = Reindenter(f)
Guido van Rossumb940e112007-01-10 16:19:56 +0000117 except IOError as msg:
Tim Petersad147202000-10-05 03:48:38 +0000118 errprint("%s: I/O Error: %s" % (file, str(msg)))
119 return
120
Tim Petersad147202000-10-05 03:48:38 +0000121 if r.run():
122 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000123 print("changed.")
Tim Petersad147202000-10-05 03:48:38 +0000124 if dryrun:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000125 print("But this is a dry run, so leaving it alone.")
Tim Petersad147202000-10-05 03:48:38 +0000126 if not dryrun:
127 bak = file + ".bak"
Christian Heimes7131fd92008-02-19 14:21:46 +0000128 if makebackup:
129 shutil.copyfile(file, bak)
130 if verbose:
131 print("backed up", file, "to", bak)
Alexander Belopolsky4a98e3b2010-10-18 14:43:38 +0000132 with open(file, "w", encoding=encoding) as f:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000133 r.write(f)
Tim Petersad147202000-10-05 03:48:38 +0000134 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000135 print("wrote new", file)
Christian Heimesada8c3b2008-03-18 18:26:33 +0000136 return True
Tim Petersad147202000-10-05 03:48:38 +0000137 else:
138 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000139 print("unchanged.")
Christian Heimesada8c3b2008-03-18 18:26:33 +0000140 return False
Tim Petersad147202000-10-05 03:48:38 +0000141
Florent Xiclunae4a33802010-08-09 12:24:20 +0000142
Tim Petersba001a02001-10-04 19:44:10 +0000143def _rstrip(line, JUNK='\n \t'):
144 """Return line stripped of trailing spaces, tabs, newlines.
145
146 Note that line.rstrip() instead also strips sundry control characters,
147 but at least one known Emacs user expects to keep junk like that, not
148 mentioning Barry by name or anything <wink>.
149 """
150
151 i = len(line)
Florent Xiclunae4a33802010-08-09 12:24:20 +0000152 while i > 0 and line[i - 1] in JUNK:
Tim Petersba001a02001-10-04 19:44:10 +0000153 i -= 1
154 return line[:i]
155
Florent Xiclunae4a33802010-08-09 12:24:20 +0000156
Tim Petersad147202000-10-05 03:48:38 +0000157class Reindenter:
158
159 def __init__(self, f):
160 self.find_stmt = 1 # next token begins a fresh stmt?
161 self.level = 0 # current indent level
162
163 # Raw file lines.
164 self.raw = f.readlines()
165
166 # File lines, rstripped & tab-expanded. Dummy at start is so
167 # that we can use tokenize's 1-based line numbering easily.
168 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000169 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000170 for line in self.raw]
171 self.lines.insert(0, None)
172 self.index = 1 # index into self.lines of next line
173
174 # List of (lineno, indentlevel) pairs, one for each stmt and
175 # comment line. indentlevel is -1 for comment lines, as a
176 # signal that tokenize doesn't know what to do about them;
177 # indeed, they're our headache!
178 self.stats = []
179
180 def run(self):
Trent Nelson428de652008-03-18 22:41:35 +0000181 tokens = tokenize.generate_tokens(self.getline)
182 for _token in tokens:
183 self.tokeneater(*_token)
Tim Petersad147202000-10-05 03:48:38 +0000184 # Remove trailing empty lines.
185 lines = self.lines
186 while lines and lines[-1] == "\n":
187 lines.pop()
188 # Sentinel.
189 stats = self.stats
190 stats.append((len(lines), 0))
191 # Map count of leading spaces to # we want.
192 have2want = {}
193 # Program after transformation.
194 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000195 # Copy over initial empty lines -- there's nothing to do until
196 # we see a line with *something* on it.
197 i = stats[0][0]
198 after.extend(lines[1:i])
Florent Xiclunae4a33802010-08-09 12:24:20 +0000199 for i in range(len(stats) - 1):
Tim Petersad147202000-10-05 03:48:38 +0000200 thisstmt, thislevel = stats[i]
Florent Xiclunae4a33802010-08-09 12:24:20 +0000201 nextstmt = stats[i + 1][0]
Tim Petersad147202000-10-05 03:48:38 +0000202 have = getlspace(lines[thisstmt])
203 want = thislevel * 4
204 if want < 0:
205 # A comment line.
206 if have:
207 # An indented comment line. If we saw the same
208 # indentation before, reuse what it most recently
209 # mapped to.
210 want = have2want.get(have, -1)
211 if want < 0:
212 # Then it probably belongs to the next real stmt.
Florent Xiclunae4a33802010-08-09 12:24:20 +0000213 for j in range(i + 1, len(stats) - 1):
Tim Petersad147202000-10-05 03:48:38 +0000214 jline, jlevel = stats[j]
215 if jlevel >= 0:
216 if have == getlspace(lines[jline]):
217 want = jlevel * 4
218 break
219 if want < 0: # Maybe it's a hanging
220 # comment like this one,
221 # in which case we should shift it like its base
222 # line got shifted.
Florent Xiclunae4a33802010-08-09 12:24:20 +0000223 for j in range(i - 1, -1, -1):
Tim Petersad147202000-10-05 03:48:38 +0000224 jline, jlevel = stats[j]
225 if jlevel >= 0:
Florent Xiclunae4a33802010-08-09 12:24:20 +0000226 want = have + (getlspace(after[jline - 1]) -
227 getlspace(lines[jline]))
Tim Petersad147202000-10-05 03:48:38 +0000228 break
229 if want < 0:
230 # Still no luck -- leave it alone.
231 want = have
232 else:
233 want = 0
234 assert want >= 0
235 have2want[have] = want
236 diff = want - have
237 if diff == 0 or have == 0:
238 after.extend(lines[thisstmt:nextstmt])
239 else:
240 for line in lines[thisstmt:nextstmt]:
241 if diff > 0:
242 if line == "\n":
243 after.append(line)
244 else:
245 after.append(" " * diff + line)
246 else:
247 remove = min(getlspace(line), -diff)
248 after.append(line[remove:])
249 return self.raw != self.after
250
251 def write(self, f):
252 f.writelines(self.after)
253
254 # Line-getter for tokenize.
255 def getline(self):
256 if self.index >= len(self.lines):
257 line = ""
258 else:
259 line = self.lines[self.index]
260 self.index += 1
261 return line
262
263 # Line-eater for tokenize.
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000264 def tokeneater(self, type, token, slinecol, end, line,
Tim Petersad147202000-10-05 03:48:38 +0000265 INDENT=tokenize.INDENT,
266 DEDENT=tokenize.DEDENT,
267 NEWLINE=tokenize.NEWLINE,
268 COMMENT=tokenize.COMMENT,
269 NL=tokenize.NL):
270
271 if type == NEWLINE:
272 # A program statement, or ENDMARKER, will eventually follow,
273 # after some (possibly empty) run of tokens of the form
274 # (NL | COMMENT)* (INDENT | DEDENT+)?
275 self.find_stmt = 1
276
277 elif type == INDENT:
278 self.find_stmt = 1
279 self.level += 1
280
281 elif type == DEDENT:
282 self.find_stmt = 1
283 self.level -= 1
284
285 elif type == COMMENT:
286 if self.find_stmt:
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000287 self.stats.append((slinecol[0], -1))
Tim Petersad147202000-10-05 03:48:38 +0000288 # but we're still looking for a new stmt, so leave
289 # find_stmt alone
290
291 elif type == NL:
292 pass
293
294 elif self.find_stmt:
295 # This is the first "real token" following a NEWLINE, so it
296 # must be the first token of the next program statement, or an
297 # ENDMARKER.
298 self.find_stmt = 0
299 if line: # not endmarker
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000300 self.stats.append((slinecol[0], self.level))
Tim Petersad147202000-10-05 03:48:38 +0000301
Florent Xiclunae4a33802010-08-09 12:24:20 +0000302
Tim Petersad147202000-10-05 03:48:38 +0000303# Count number of leading blanks.
304def getlspace(line):
305 i, n = 0, len(line)
306 while i < n and line[i] == " ":
307 i += 1
308 return i
309
Florent Xiclunae4a33802010-08-09 12:24:20 +0000310
Tim Petersad147202000-10-05 03:48:38 +0000311if __name__ == '__main__':
312 main()