blob: 8557b5debd6e1b2721ce2d256a541faa0f802ef7 [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Christian Heimes7131fd92008-02-19 14:21:46 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000012
13Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000014Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000016
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000017If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
Tim Petersad147202000-10-05 03:48:38 +000021
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000022You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000031
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
Christian Heimes7131fd92008-02-19 14:21:46 +000035
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000040"""
41
42__version__ = "1"
43
44import tokenize
Christian Heimes7131fd92008-02-19 14:21:46 +000045import os, shutil
Tim Petersad147202000-10-05 03:48:38 +000046import sys
47
Christian Heimes7131fd92008-02-19 14:21:46 +000048verbose = 0
49recurse = 0
50dryrun = 0
51makebackup = True
Tim Petersad147202000-10-05 03:48:38 +000052
Skip Montanaro165163f2004-03-27 18:43:56 +000053def usage(msg=None):
54 if msg is not None:
Guido van Rossum6247fdb2007-04-27 19:48:23 +000055 print(msg, file=sys.stderr)
56 print(__doc__, file=sys.stderr)
Skip Montanaro165163f2004-03-27 18:43:56 +000057
Tim Petersad147202000-10-05 03:48:38 +000058def errprint(*args):
59 sep = ""
60 for arg in args:
61 sys.stderr.write(sep + str(arg))
62 sep = " "
63 sys.stderr.write("\n")
64
65def main():
66 import getopt
Christian Heimes7131fd92008-02-19 14:21:46 +000067 global verbose, recurse, dryrun, makebackup
Tim Petersad147202000-10-05 03:48:38 +000068 try:
Christian Heimes7131fd92008-02-19 14:21:46 +000069 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
70 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Guido van Rossumb940e112007-01-10 16:19:56 +000071 except getopt.error as msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000072 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000073 return
74 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000075 if o in ('-d', '--dryrun'):
Tim Petersad147202000-10-05 03:48:38 +000076 dryrun += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000077 elif o in ('-r', '--recurse'):
Tim Petersad147202000-10-05 03:48:38 +000078 recurse += 1
Christian Heimes7131fd92008-02-19 14:21:46 +000079 elif o in ('-n', '--nobackup'):
80 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000081 elif o in ('-v', '--verbose'):
Tim Petersad147202000-10-05 03:48:38 +000082 verbose += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000083 elif o in ('-h', '--help'):
84 usage()
85 return
Tim Petersad147202000-10-05 03:48:38 +000086 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000087 r = Reindenter(sys.stdin)
88 r.run()
89 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000090 return
91 for arg in args:
92 check(arg)
93
94def check(file):
95 if os.path.isdir(file) and not os.path.islink(file):
96 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +000097 print("listing directory", file)
Tim Petersad147202000-10-05 03:48:38 +000098 names = os.listdir(file)
99 for name in names:
100 fullname = os.path.join(file, name)
101 if ((recurse and os.path.isdir(fullname) and
Benjamin Peterson206e3072008-10-19 14:07:49 +0000102 not os.path.islink(fullname) and
103 not os.path.split(fullname)[1].startswith("."))
Tim Petersad147202000-10-05 03:48:38 +0000104 or name.lower().endswith(".py")):
105 check(fullname)
106 return
107
108 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000109 print("checking", file, "...", end=' ')
Tim Petersad147202000-10-05 03:48:38 +0000110 try:
111 f = open(file)
Guido van Rossumb940e112007-01-10 16:19:56 +0000112 except IOError as msg:
Tim Petersad147202000-10-05 03:48:38 +0000113 errprint("%s: I/O Error: %s" % (file, str(msg)))
114 return
115
116 r = Reindenter(f)
117 f.close()
118 if r.run():
119 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000120 print("changed.")
Tim Petersad147202000-10-05 03:48:38 +0000121 if dryrun:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000122 print("But this is a dry run, so leaving it alone.")
Tim Petersad147202000-10-05 03:48:38 +0000123 if not dryrun:
124 bak = file + ".bak"
Christian Heimes7131fd92008-02-19 14:21:46 +0000125 if makebackup:
126 shutil.copyfile(file, bak)
127 if verbose:
128 print("backed up", file, "to", bak)
Tim Petersad147202000-10-05 03:48:38 +0000129 f = open(file, "w")
130 r.write(f)
131 f.close()
132 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000133 print("wrote new", file)
Christian Heimesada8c3b2008-03-18 18:26:33 +0000134 return True
Tim Petersad147202000-10-05 03:48:38 +0000135 else:
136 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000137 print("unchanged.")
Christian Heimesada8c3b2008-03-18 18:26:33 +0000138 return False
Tim Petersad147202000-10-05 03:48:38 +0000139
Tim Petersba001a02001-10-04 19:44:10 +0000140def _rstrip(line, JUNK='\n \t'):
141 """Return line stripped of trailing spaces, tabs, newlines.
142
143 Note that line.rstrip() instead also strips sundry control characters,
144 but at least one known Emacs user expects to keep junk like that, not
145 mentioning Barry by name or anything <wink>.
146 """
147
148 i = len(line)
149 while i > 0 and line[i-1] in JUNK:
150 i -= 1
151 return line[:i]
152
Tim Petersad147202000-10-05 03:48:38 +0000153class Reindenter:
154
155 def __init__(self, f):
156 self.find_stmt = 1 # next token begins a fresh stmt?
157 self.level = 0 # current indent level
158
159 # Raw file lines.
160 self.raw = f.readlines()
161
162 # File lines, rstripped & tab-expanded. Dummy at start is so
163 # that we can use tokenize's 1-based line numbering easily.
164 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000165 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000166 for line in self.raw]
167 self.lines.insert(0, None)
168 self.index = 1 # index into self.lines of next line
169
170 # List of (lineno, indentlevel) pairs, one for each stmt and
171 # comment line. indentlevel is -1 for comment lines, as a
172 # signal that tokenize doesn't know what to do about them;
173 # indeed, they're our headache!
174 self.stats = []
175
176 def run(self):
Trent Nelson428de652008-03-18 22:41:35 +0000177 tokens = tokenize.generate_tokens(self.getline)
178 for _token in tokens:
179 self.tokeneater(*_token)
Tim Petersad147202000-10-05 03:48:38 +0000180 # Remove trailing empty lines.
181 lines = self.lines
182 while lines and lines[-1] == "\n":
183 lines.pop()
184 # Sentinel.
185 stats = self.stats
186 stats.append((len(lines), 0))
187 # Map count of leading spaces to # we want.
188 have2want = {}
189 # Program after transformation.
190 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000191 # Copy over initial empty lines -- there's nothing to do until
192 # we see a line with *something* on it.
193 i = stats[0][0]
194 after.extend(lines[1:i])
Tim Petersad147202000-10-05 03:48:38 +0000195 for i in range(len(stats)-1):
196 thisstmt, thislevel = stats[i]
197 nextstmt = stats[i+1][0]
198 have = getlspace(lines[thisstmt])
199 want = thislevel * 4
200 if want < 0:
201 # A comment line.
202 if have:
203 # An indented comment line. If we saw the same
204 # indentation before, reuse what it most recently
205 # mapped to.
206 want = have2want.get(have, -1)
207 if want < 0:
208 # Then it probably belongs to the next real stmt.
Guido van Rossum805365e2007-05-07 22:24:25 +0000209 for j in range(i+1, len(stats)-1):
Tim Petersad147202000-10-05 03:48:38 +0000210 jline, jlevel = stats[j]
211 if jlevel >= 0:
212 if have == getlspace(lines[jline]):
213 want = jlevel * 4
214 break
215 if want < 0: # Maybe it's a hanging
216 # comment like this one,
217 # in which case we should shift it like its base
218 # line got shifted.
Guido van Rossum805365e2007-05-07 22:24:25 +0000219 for j in range(i-1, -1, -1):
Tim Petersad147202000-10-05 03:48:38 +0000220 jline, jlevel = stats[j]
221 if jlevel >= 0:
222 want = have + getlspace(after[jline-1]) - \
223 getlspace(lines[jline])
224 break
225 if want < 0:
226 # Still no luck -- leave it alone.
227 want = have
228 else:
229 want = 0
230 assert want >= 0
231 have2want[have] = want
232 diff = want - have
233 if diff == 0 or have == 0:
234 after.extend(lines[thisstmt:nextstmt])
235 else:
236 for line in lines[thisstmt:nextstmt]:
237 if diff > 0:
238 if line == "\n":
239 after.append(line)
240 else:
241 after.append(" " * diff + line)
242 else:
243 remove = min(getlspace(line), -diff)
244 after.append(line[remove:])
245 return self.raw != self.after
246
247 def write(self, f):
248 f.writelines(self.after)
249
250 # Line-getter for tokenize.
251 def getline(self):
252 if self.index >= len(self.lines):
253 line = ""
254 else:
255 line = self.lines[self.index]
256 self.index += 1
257 return line
258
259 # Line-eater for tokenize.
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000260 def tokeneater(self, type, token, slinecol, end, line,
Tim Petersad147202000-10-05 03:48:38 +0000261 INDENT=tokenize.INDENT,
262 DEDENT=tokenize.DEDENT,
263 NEWLINE=tokenize.NEWLINE,
264 COMMENT=tokenize.COMMENT,
265 NL=tokenize.NL):
266
267 if type == NEWLINE:
268 # A program statement, or ENDMARKER, will eventually follow,
269 # after some (possibly empty) run of tokens of the form
270 # (NL | COMMENT)* (INDENT | DEDENT+)?
271 self.find_stmt = 1
272
273 elif type == INDENT:
274 self.find_stmt = 1
275 self.level += 1
276
277 elif type == DEDENT:
278 self.find_stmt = 1
279 self.level -= 1
280
281 elif type == COMMENT:
282 if self.find_stmt:
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000283 self.stats.append((slinecol[0], -1))
Tim Petersad147202000-10-05 03:48:38 +0000284 # but we're still looking for a new stmt, so leave
285 # find_stmt alone
286
287 elif type == NL:
288 pass
289
290 elif self.find_stmt:
291 # This is the first "real token" following a NEWLINE, so it
292 # must be the first token of the next program statement, or an
293 # ENDMARKER.
294 self.find_stmt = 0
295 if line: # not endmarker
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000296 self.stats.append((slinecol[0], self.level))
Tim Petersad147202000-10-05 03:48:38 +0000297
298# Count number of leading blanks.
299def getlspace(line):
300 i, n = 0, len(line)
301 while i < n and line[i] == " ":
302 i += 1
303 return i
304
305if __name__ == '__main__':
306 main()