blob: 11cb30a49951c2b16ae2c6e9d016facf0b91b89a [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Christian Heimes7131fd92008-02-19 14:21:46 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000012
13Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000014Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000016
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000017If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
Tim Petersad147202000-10-05 03:48:38 +000021
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000022You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000031
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
Christian Heimes7131fd92008-02-19 14:21:46 +000035
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000040"""
41
42__version__ = "1"
43
44import tokenize
Christian Heimes7131fd92008-02-19 14:21:46 +000045import os, shutil
Tim Petersad147202000-10-05 03:48:38 +000046import sys
47
Christian Heimes7131fd92008-02-19 14:21:46 +000048verbose = 0
49recurse = 0
50dryrun = 0
51makebackup = True
Tim Petersad147202000-10-05 03:48:38 +000052
Skip Montanaro165163f2004-03-27 18:43:56 +000053def usage(msg=None):
54 if msg is not None:
Guido van Rossum6247fdb2007-04-27 19:48:23 +000055 print(msg, file=sys.stderr)
56 print(__doc__, file=sys.stderr)
Skip Montanaro165163f2004-03-27 18:43:56 +000057
Tim Petersad147202000-10-05 03:48:38 +000058def errprint(*args):
59 sep = ""
60 for arg in args:
61 sys.stderr.write(sep + str(arg))
62 sep = " "
63 sys.stderr.write("\n")
64
65def main():
66 import getopt
Christian Heimes7131fd92008-02-19 14:21:46 +000067 global verbose, recurse, dryrun, makebackup
Tim Petersad147202000-10-05 03:48:38 +000068 try:
Christian Heimes7131fd92008-02-19 14:21:46 +000069 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
70 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Guido van Rossumb940e112007-01-10 16:19:56 +000071 except getopt.error as msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000072 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000073 return
74 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000075 if o in ('-d', '--dryrun'):
Tim Petersad147202000-10-05 03:48:38 +000076 dryrun += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000077 elif o in ('-r', '--recurse'):
Tim Petersad147202000-10-05 03:48:38 +000078 recurse += 1
Christian Heimes7131fd92008-02-19 14:21:46 +000079 elif o in ('-n', '--nobackup'):
80 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000081 elif o in ('-v', '--verbose'):
Tim Petersad147202000-10-05 03:48:38 +000082 verbose += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000083 elif o in ('-h', '--help'):
84 usage()
85 return
Tim Petersad147202000-10-05 03:48:38 +000086 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000087 r = Reindenter(sys.stdin)
88 r.run()
89 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000090 return
91 for arg in args:
92 check(arg)
93
94def check(file):
95 if os.path.isdir(file) and not os.path.islink(file):
96 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +000097 print("listing directory", file)
Tim Petersad147202000-10-05 03:48:38 +000098 names = os.listdir(file)
99 for name in names:
100 fullname = os.path.join(file, name)
101 if ((recurse and os.path.isdir(fullname) and
102 not os.path.islink(fullname))
103 or name.lower().endswith(".py")):
104 check(fullname)
105 return
106
107 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000108 print("checking", file, "...", end=' ')
Tim Petersad147202000-10-05 03:48:38 +0000109 try:
110 f = open(file)
Guido van Rossumb940e112007-01-10 16:19:56 +0000111 except IOError as msg:
Tim Petersad147202000-10-05 03:48:38 +0000112 errprint("%s: I/O Error: %s" % (file, str(msg)))
113 return
114
115 r = Reindenter(f)
116 f.close()
117 if r.run():
118 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000119 print("changed.")
Tim Petersad147202000-10-05 03:48:38 +0000120 if dryrun:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000121 print("But this is a dry run, so leaving it alone.")
Tim Petersad147202000-10-05 03:48:38 +0000122 if not dryrun:
123 bak = file + ".bak"
Christian Heimes7131fd92008-02-19 14:21:46 +0000124 if makebackup:
125 shutil.copyfile(file, bak)
126 if verbose:
127 print("backed up", file, "to", bak)
Tim Petersad147202000-10-05 03:48:38 +0000128 f = open(file, "w")
129 r.write(f)
130 f.close()
131 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000132 print("wrote new", file)
Tim Petersad147202000-10-05 03:48:38 +0000133 else:
134 if verbose:
Guido van Rossum6247fdb2007-04-27 19:48:23 +0000135 print("unchanged.")
Tim Petersad147202000-10-05 03:48:38 +0000136
Tim Petersba001a02001-10-04 19:44:10 +0000137def _rstrip(line, JUNK='\n \t'):
138 """Return line stripped of trailing spaces, tabs, newlines.
139
140 Note that line.rstrip() instead also strips sundry control characters,
141 but at least one known Emacs user expects to keep junk like that, not
142 mentioning Barry by name or anything <wink>.
143 """
144
145 i = len(line)
146 while i > 0 and line[i-1] in JUNK:
147 i -= 1
148 return line[:i]
149
Tim Petersad147202000-10-05 03:48:38 +0000150class Reindenter:
151
152 def __init__(self, f):
153 self.find_stmt = 1 # next token begins a fresh stmt?
154 self.level = 0 # current indent level
155
156 # Raw file lines.
157 self.raw = f.readlines()
158
159 # File lines, rstripped & tab-expanded. Dummy at start is so
160 # that we can use tokenize's 1-based line numbering easily.
161 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000162 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000163 for line in self.raw]
164 self.lines.insert(0, None)
165 self.index = 1 # index into self.lines of next line
166
167 # List of (lineno, indentlevel) pairs, one for each stmt and
168 # comment line. indentlevel is -1 for comment lines, as a
169 # signal that tokenize doesn't know what to do about them;
170 # indeed, they're our headache!
171 self.stats = []
172
173 def run(self):
174 tokenize.tokenize(self.getline, self.tokeneater)
175 # Remove trailing empty lines.
176 lines = self.lines
177 while lines and lines[-1] == "\n":
178 lines.pop()
179 # Sentinel.
180 stats = self.stats
181 stats.append((len(lines), 0))
182 # Map count of leading spaces to # we want.
183 have2want = {}
184 # Program after transformation.
185 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000186 # Copy over initial empty lines -- there's nothing to do until
187 # we see a line with *something* on it.
188 i = stats[0][0]
189 after.extend(lines[1:i])
Tim Petersad147202000-10-05 03:48:38 +0000190 for i in range(len(stats)-1):
191 thisstmt, thislevel = stats[i]
192 nextstmt = stats[i+1][0]
193 have = getlspace(lines[thisstmt])
194 want = thislevel * 4
195 if want < 0:
196 # A comment line.
197 if have:
198 # An indented comment line. If we saw the same
199 # indentation before, reuse what it most recently
200 # mapped to.
201 want = have2want.get(have, -1)
202 if want < 0:
203 # Then it probably belongs to the next real stmt.
Guido van Rossum805365e2007-05-07 22:24:25 +0000204 for j in range(i+1, len(stats)-1):
Tim Petersad147202000-10-05 03:48:38 +0000205 jline, jlevel = stats[j]
206 if jlevel >= 0:
207 if have == getlspace(lines[jline]):
208 want = jlevel * 4
209 break
210 if want < 0: # Maybe it's a hanging
211 # comment like this one,
212 # in which case we should shift it like its base
213 # line got shifted.
Guido van Rossum805365e2007-05-07 22:24:25 +0000214 for j in range(i-1, -1, -1):
Tim Petersad147202000-10-05 03:48:38 +0000215 jline, jlevel = stats[j]
216 if jlevel >= 0:
217 want = have + getlspace(after[jline-1]) - \
218 getlspace(lines[jline])
219 break
220 if want < 0:
221 # Still no luck -- leave it alone.
222 want = have
223 else:
224 want = 0
225 assert want >= 0
226 have2want[have] = want
227 diff = want - have
228 if diff == 0 or have == 0:
229 after.extend(lines[thisstmt:nextstmt])
230 else:
231 for line in lines[thisstmt:nextstmt]:
232 if diff > 0:
233 if line == "\n":
234 after.append(line)
235 else:
236 after.append(" " * diff + line)
237 else:
238 remove = min(getlspace(line), -diff)
239 after.append(line[remove:])
240 return self.raw != self.after
241
242 def write(self, f):
243 f.writelines(self.after)
244
245 # Line-getter for tokenize.
246 def getline(self):
247 if self.index >= len(self.lines):
248 line = ""
249 else:
250 line = self.lines[self.index]
251 self.index += 1
252 return line
253
254 # Line-eater for tokenize.
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000255 def tokeneater(self, type, token, slinecol, end, line,
Tim Petersad147202000-10-05 03:48:38 +0000256 INDENT=tokenize.INDENT,
257 DEDENT=tokenize.DEDENT,
258 NEWLINE=tokenize.NEWLINE,
259 COMMENT=tokenize.COMMENT,
260 NL=tokenize.NL):
261
262 if type == NEWLINE:
263 # A program statement, or ENDMARKER, will eventually follow,
264 # after some (possibly empty) run of tokens of the form
265 # (NL | COMMENT)* (INDENT | DEDENT+)?
266 self.find_stmt = 1
267
268 elif type == INDENT:
269 self.find_stmt = 1
270 self.level += 1
271
272 elif type == DEDENT:
273 self.find_stmt = 1
274 self.level -= 1
275
276 elif type == COMMENT:
277 if self.find_stmt:
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000278 self.stats.append((slinecol[0], -1))
Tim Petersad147202000-10-05 03:48:38 +0000279 # but we're still looking for a new stmt, so leave
280 # find_stmt alone
281
282 elif type == NL:
283 pass
284
285 elif self.find_stmt:
286 # This is the first "real token" following a NEWLINE, so it
287 # must be the first token of the next program statement, or an
288 # ENDMARKER.
289 self.find_stmt = 0
290 if line: # not endmarker
Guido van Rossum1bc535d2007-05-15 18:46:22 +0000291 self.stats.append((slinecol[0], self.level))
Tim Petersad147202000-10-05 03:48:38 +0000292
293# Count number of leading blanks.
294def getlspace(line):
295 i, n = 0, len(line)
296 while i < n and line[i] == " ":
297 i += 1
298 return i
299
300if __name__ == '__main__':
301 main()