blob: e646aed9c3ce08238bfc963128253f88f0cf88cc [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] path ...
6
7-d Dry run. Analyze, but don't make any changes to, files.
8-r Recurse. Search for all .py files in subdirectories too.
9-v Verbose. Print informative msgs; else no output.
10
11Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000012Also trim excess spaces and tabs from ends of lines, and remove empty lines
13at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000014
15Pass one or more file and/or directory paths. When a directory path, all
16.py files within the directory will be examined, and, if the -r option is
17given, likewise recursively for subdirectories.
18
19Overwrites files in place, renaming the originals with a .bak extension.
20If reindent finds nothing to change, the file is left alone. If reindent
21does change a file, the changed file is a fixed-point for reindent (i.e.,
22running reindent on the resulting .py file won't change it again).
23
24The hard part of reindenting is figuring out what to do with comment
25lines. So long as the input files get a clean bill of health from
26tabnanny.py, reindent should do a good job.
27"""
28
29__version__ = "1"
30
31import tokenize
32import os
33import sys
34
35verbose = 0
36recurse = 0
37dryrun = 0
38
39def errprint(*args):
40 sep = ""
41 for arg in args:
42 sys.stderr.write(sep + str(arg))
43 sep = " "
44 sys.stderr.write("\n")
45
46def main():
47 import getopt
48 global verbose, recurse, dryrun
49 try:
50 opts, args = getopt.getopt(sys.argv[1:], "drv")
51 except getopt.error, msg:
52 errprint(msg)
53 return
54 for o, a in opts:
55 if o == '-d':
56 dryrun += 1
57 elif o == '-r':
58 recurse += 1
59 elif o == '-v':
60 verbose += 1
61 if not args:
62 errprint("Usage:", __doc__)
63 return
64 for arg in args:
65 check(arg)
66
67def check(file):
68 if os.path.isdir(file) and not os.path.islink(file):
69 if verbose:
70 print "listing directory", file
71 names = os.listdir(file)
72 for name in names:
73 fullname = os.path.join(file, name)
74 if ((recurse and os.path.isdir(fullname) and
75 not os.path.islink(fullname))
76 or name.lower().endswith(".py")):
77 check(fullname)
78 return
79
80 if verbose:
81 print "checking", file, "...",
82 try:
83 f = open(file)
84 except IOError, msg:
85 errprint("%s: I/O Error: %s" % (file, str(msg)))
86 return
87
88 r = Reindenter(f)
89 f.close()
90 if r.run():
91 if verbose:
92 print "changed."
93 if dryrun:
94 print "But this is a dry run, so leaving it alone."
95 if not dryrun:
96 bak = file + ".bak"
97 if os.path.exists(bak):
98 os.remove(bak)
99 os.rename(file, bak)
100 if verbose:
101 print "renamed", file, "to", bak
102 f = open(file, "w")
103 r.write(f)
104 f.close()
105 if verbose:
106 print "wrote new", file
107 else:
108 if verbose:
109 print "unchanged."
110
Tim Petersba001a02001-10-04 19:44:10 +0000111def _rstrip(line, JUNK='\n \t'):
112 """Return line stripped of trailing spaces, tabs, newlines.
113
114 Note that line.rstrip() instead also strips sundry control characters,
115 but at least one known Emacs user expects to keep junk like that, not
116 mentioning Barry by name or anything <wink>.
117 """
118
119 i = len(line)
120 while i > 0 and line[i-1] in JUNK:
121 i -= 1
122 return line[:i]
123
Tim Petersad147202000-10-05 03:48:38 +0000124class Reindenter:
125
126 def __init__(self, f):
127 self.find_stmt = 1 # next token begins a fresh stmt?
128 self.level = 0 # current indent level
129
130 # Raw file lines.
131 self.raw = f.readlines()
132
133 # File lines, rstripped & tab-expanded. Dummy at start is so
134 # that we can use tokenize's 1-based line numbering easily.
135 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000136 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000137 for line in self.raw]
138 self.lines.insert(0, None)
139 self.index = 1 # index into self.lines of next line
140
141 # List of (lineno, indentlevel) pairs, one for each stmt and
142 # comment line. indentlevel is -1 for comment lines, as a
143 # signal that tokenize doesn't know what to do about them;
144 # indeed, they're our headache!
145 self.stats = []
146
147 def run(self):
148 tokenize.tokenize(self.getline, self.tokeneater)
149 # Remove trailing empty lines.
150 lines = self.lines
151 while lines and lines[-1] == "\n":
152 lines.pop()
153 # Sentinel.
154 stats = self.stats
155 stats.append((len(lines), 0))
156 # Map count of leading spaces to # we want.
157 have2want = {}
158 # Program after transformation.
159 after = self.after = []
160 for i in range(len(stats)-1):
161 thisstmt, thislevel = stats[i]
162 nextstmt = stats[i+1][0]
163 have = getlspace(lines[thisstmt])
164 want = thislevel * 4
165 if want < 0:
166 # A comment line.
167 if have:
168 # An indented comment line. If we saw the same
169 # indentation before, reuse what it most recently
170 # mapped to.
171 want = have2want.get(have, -1)
172 if want < 0:
173 # Then it probably belongs to the next real stmt.
174 for j in xrange(i+1, len(stats)-1):
175 jline, jlevel = stats[j]
176 if jlevel >= 0:
177 if have == getlspace(lines[jline]):
178 want = jlevel * 4
179 break
180 if want < 0: # Maybe it's a hanging
181 # comment like this one,
182 # in which case we should shift it like its base
183 # line got shifted.
184 for j in xrange(i-1, -1, -1):
185 jline, jlevel = stats[j]
186 if jlevel >= 0:
187 want = have + getlspace(after[jline-1]) - \
188 getlspace(lines[jline])
189 break
190 if want < 0:
191 # Still no luck -- leave it alone.
192 want = have
193 else:
194 want = 0
195 assert want >= 0
196 have2want[have] = want
197 diff = want - have
198 if diff == 0 or have == 0:
199 after.extend(lines[thisstmt:nextstmt])
200 else:
201 for line in lines[thisstmt:nextstmt]:
202 if diff > 0:
203 if line == "\n":
204 after.append(line)
205 else:
206 after.append(" " * diff + line)
207 else:
208 remove = min(getlspace(line), -diff)
209 after.append(line[remove:])
210 return self.raw != self.after
211
212 def write(self, f):
213 f.writelines(self.after)
214
215 # Line-getter for tokenize.
216 def getline(self):
217 if self.index >= len(self.lines):
218 line = ""
219 else:
220 line = self.lines[self.index]
221 self.index += 1
222 return line
223
224 # Line-eater for tokenize.
225 def tokeneater(self, type, token, (sline, scol), end, line,
226 INDENT=tokenize.INDENT,
227 DEDENT=tokenize.DEDENT,
228 NEWLINE=tokenize.NEWLINE,
229 COMMENT=tokenize.COMMENT,
230 NL=tokenize.NL):
231
232 if type == NEWLINE:
233 # A program statement, or ENDMARKER, will eventually follow,
234 # after some (possibly empty) run of tokens of the form
235 # (NL | COMMENT)* (INDENT | DEDENT+)?
236 self.find_stmt = 1
237
238 elif type == INDENT:
239 self.find_stmt = 1
240 self.level += 1
241
242 elif type == DEDENT:
243 self.find_stmt = 1
244 self.level -= 1
245
246 elif type == COMMENT:
247 if self.find_stmt:
248 self.stats.append((sline, -1))
249 # but we're still looking for a new stmt, so leave
250 # find_stmt alone
251
252 elif type == NL:
253 pass
254
255 elif self.find_stmt:
256 # This is the first "real token" following a NEWLINE, so it
257 # must be the first token of the next program statement, or an
258 # ENDMARKER.
259 self.find_stmt = 0
260 if line: # not endmarker
261 self.stats.append((sline, self.level))
262
263# Count number of leading blanks.
264def getlspace(line):
265 i, n = 0, len(line)
266 while i < n and line[i] == " ":
267 i += 1
268 return i
269
270if __name__ == '__main__':
271 main()