blob: 21c553b7f805f25fe87cfe03a1c909a88314d3e8 [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] path ...
6
7-d Dry run. Analyze, but don't make any changes to, files.
8-r Recurse. Search for all .py files in subdirectories too.
9-v Verbose. Print informative msgs; else no output.
10
11Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000012Also trim excess spaces and tabs from ends of lines, and remove empty lines
13at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000014
15Pass one or more file and/or directory paths. When a directory path, all
16.py files within the directory will be examined, and, if the -r option is
17given, likewise recursively for subdirectories.
18
19Overwrites files in place, renaming the originals with a .bak extension.
20If reindent finds nothing to change, the file is left alone. If reindent
21does change a file, the changed file is a fixed-point for reindent (i.e.,
22running reindent on the resulting .py file won't change it again).
23
24The hard part of reindenting is figuring out what to do with comment
25lines. So long as the input files get a clean bill of health from
26tabnanny.py, reindent should do a good job.
27"""
28
29__version__ = "1"
30
31import tokenize
32import os
33import sys
34
35verbose = 0
36recurse = 0
37dryrun = 0
38
39def errprint(*args):
40 sep = ""
41 for arg in args:
42 sys.stderr.write(sep + str(arg))
43 sep = " "
44 sys.stderr.write("\n")
45
46def main():
47 import getopt
48 global verbose, recurse, dryrun
49 try:
50 opts, args = getopt.getopt(sys.argv[1:], "drv")
51 except getopt.error, msg:
52 errprint(msg)
53 return
54 for o, a in opts:
55 if o == '-d':
56 dryrun += 1
57 elif o == '-r':
58 recurse += 1
59 elif o == '-v':
60 verbose += 1
61 if not args:
62 errprint("Usage:", __doc__)
63 return
64 for arg in args:
65 check(arg)
66
67def check(file):
68 if os.path.isdir(file) and not os.path.islink(file):
69 if verbose:
70 print "listing directory", file
71 names = os.listdir(file)
72 for name in names:
73 fullname = os.path.join(file, name)
74 if ((recurse and os.path.isdir(fullname) and
75 not os.path.islink(fullname))
76 or name.lower().endswith(".py")):
77 check(fullname)
78 return
79
80 if verbose:
81 print "checking", file, "...",
82 try:
83 f = open(file)
84 except IOError, msg:
85 errprint("%s: I/O Error: %s" % (file, str(msg)))
86 return
87
88 r = Reindenter(f)
89 f.close()
90 if r.run():
91 if verbose:
92 print "changed."
93 if dryrun:
94 print "But this is a dry run, so leaving it alone."
95 if not dryrun:
96 bak = file + ".bak"
97 if os.path.exists(bak):
98 os.remove(bak)
99 os.rename(file, bak)
100 if verbose:
101 print "renamed", file, "to", bak
102 f = open(file, "w")
103 r.write(f)
104 f.close()
105 if verbose:
106 print "wrote new", file
107 else:
108 if verbose:
109 print "unchanged."
110
Tim Petersba001a02001-10-04 19:44:10 +0000111def _rstrip(line, JUNK='\n \t'):
112 """Return line stripped of trailing spaces, tabs, newlines.
113
114 Note that line.rstrip() instead also strips sundry control characters,
115 but at least one known Emacs user expects to keep junk like that, not
116 mentioning Barry by name or anything <wink>.
117 """
118
119 i = len(line)
120 while i > 0 and line[i-1] in JUNK:
121 i -= 1
122 return line[:i]
123
Tim Petersad147202000-10-05 03:48:38 +0000124class Reindenter:
125
126 def __init__(self, f):
127 self.find_stmt = 1 # next token begins a fresh stmt?
128 self.level = 0 # current indent level
129
130 # Raw file lines.
131 self.raw = f.readlines()
132
133 # File lines, rstripped & tab-expanded. Dummy at start is so
134 # that we can use tokenize's 1-based line numbering easily.
135 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000136 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000137 for line in self.raw]
138 self.lines.insert(0, None)
139 self.index = 1 # index into self.lines of next line
140
141 # List of (lineno, indentlevel) pairs, one for each stmt and
142 # comment line. indentlevel is -1 for comment lines, as a
143 # signal that tokenize doesn't know what to do about them;
144 # indeed, they're our headache!
145 self.stats = []
146
147 def run(self):
148 tokenize.tokenize(self.getline, self.tokeneater)
149 # Remove trailing empty lines.
150 lines = self.lines
151 while lines and lines[-1] == "\n":
152 lines.pop()
153 # Sentinel.
154 stats = self.stats
155 stats.append((len(lines), 0))
156 # Map count of leading spaces to # we want.
157 have2want = {}
158 # Program after transformation.
159 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000160 # Copy over initial empty lines -- there's nothing to do until
161 # we see a line with *something* on it.
162 i = stats[0][0]
163 after.extend(lines[1:i])
Tim Petersad147202000-10-05 03:48:38 +0000164 for i in range(len(stats)-1):
165 thisstmt, thislevel = stats[i]
166 nextstmt = stats[i+1][0]
167 have = getlspace(lines[thisstmt])
168 want = thislevel * 4
169 if want < 0:
170 # A comment line.
171 if have:
172 # An indented comment line. If we saw the same
173 # indentation before, reuse what it most recently
174 # mapped to.
175 want = have2want.get(have, -1)
176 if want < 0:
177 # Then it probably belongs to the next real stmt.
178 for j in xrange(i+1, len(stats)-1):
179 jline, jlevel = stats[j]
180 if jlevel >= 0:
181 if have == getlspace(lines[jline]):
182 want = jlevel * 4
183 break
184 if want < 0: # Maybe it's a hanging
185 # comment like this one,
186 # in which case we should shift it like its base
187 # line got shifted.
188 for j in xrange(i-1, -1, -1):
189 jline, jlevel = stats[j]
190 if jlevel >= 0:
191 want = have + getlspace(after[jline-1]) - \
192 getlspace(lines[jline])
193 break
194 if want < 0:
195 # Still no luck -- leave it alone.
196 want = have
197 else:
198 want = 0
199 assert want >= 0
200 have2want[have] = want
201 diff = want - have
202 if diff == 0 or have == 0:
203 after.extend(lines[thisstmt:nextstmt])
204 else:
205 for line in lines[thisstmt:nextstmt]:
206 if diff > 0:
207 if line == "\n":
208 after.append(line)
209 else:
210 after.append(" " * diff + line)
211 else:
212 remove = min(getlspace(line), -diff)
213 after.append(line[remove:])
214 return self.raw != self.after
215
216 def write(self, f):
217 f.writelines(self.after)
218
219 # Line-getter for tokenize.
220 def getline(self):
221 if self.index >= len(self.lines):
222 line = ""
223 else:
224 line = self.lines[self.index]
225 self.index += 1
226 return line
227
228 # Line-eater for tokenize.
229 def tokeneater(self, type, token, (sline, scol), end, line,
230 INDENT=tokenize.INDENT,
231 DEDENT=tokenize.DEDENT,
232 NEWLINE=tokenize.NEWLINE,
233 COMMENT=tokenize.COMMENT,
234 NL=tokenize.NL):
235
236 if type == NEWLINE:
237 # A program statement, or ENDMARKER, will eventually follow,
238 # after some (possibly empty) run of tokens of the form
239 # (NL | COMMENT)* (INDENT | DEDENT+)?
240 self.find_stmt = 1
241
242 elif type == INDENT:
243 self.find_stmt = 1
244 self.level += 1
245
246 elif type == DEDENT:
247 self.find_stmt = 1
248 self.level -= 1
249
250 elif type == COMMENT:
251 if self.find_stmt:
252 self.stats.append((sline, -1))
253 # but we're still looking for a new stmt, so leave
254 # find_stmt alone
255
256 elif type == NL:
257 pass
258
259 elif self.find_stmt:
260 # This is the first "real token" following a NEWLINE, so it
261 # must be the first token of the next program statement, or an
262 # ENDMARKER.
263 self.find_stmt = 0
264 if line: # not endmarker
265 self.stats.append((sline, self.level))
266
267# Count number of leading blanks.
268def getlspace(line):
269 i, n = 0, len(line)
270 while i < n and line[i] == " ":
271 i += 1
272 return i
273
274if __name__ == '__main__':
275 main()