blob: 4b742a8c42abb82903e7e1d71cb6aacbb7698fa4 [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] path ...
6
7-d Dry run. Analyze, but don't make any changes to, files.
8-r Recurse. Search for all .py files in subdirectories too.
9-v Verbose. Print informative msgs; else no output.
10
11Change Python (.py) files to use 4-space indents and no hard tab characters.
12Also trim excess whitespace from ends of lines, and empty lines at the ends
13of files. Ensure the last line ends with a newline.
14
15Pass one or more file and/or directory paths. When a directory path, all
16.py files within the directory will be examined, and, if the -r option is
17given, likewise recursively for subdirectories.
18
19Overwrites files in place, renaming the originals with a .bak extension.
20If reindent finds nothing to change, the file is left alone. If reindent
21does change a file, the changed file is a fixed-point for reindent (i.e.,
22running reindent on the resulting .py file won't change it again).
23
24The hard part of reindenting is figuring out what to do with comment
25lines. So long as the input files get a clean bill of health from
26tabnanny.py, reindent should do a good job.
27"""
28
29__version__ = "1"
30
31import tokenize
32import os
33import sys
34
35verbose = 0
36recurse = 0
37dryrun = 0
38
39def errprint(*args):
40 sep = ""
41 for arg in args:
42 sys.stderr.write(sep + str(arg))
43 sep = " "
44 sys.stderr.write("\n")
45
46def main():
47 import getopt
48 global verbose, recurse, dryrun
49 try:
50 opts, args = getopt.getopt(sys.argv[1:], "drv")
51 except getopt.error, msg:
52 errprint(msg)
53 return
54 for o, a in opts:
55 if o == '-d':
56 dryrun += 1
57 elif o == '-r':
58 recurse += 1
59 elif o == '-v':
60 verbose += 1
61 if not args:
62 errprint("Usage:", __doc__)
63 return
64 for arg in args:
65 check(arg)
66
67def check(file):
68 if os.path.isdir(file) and not os.path.islink(file):
69 if verbose:
70 print "listing directory", file
71 names = os.listdir(file)
72 for name in names:
73 fullname = os.path.join(file, name)
74 if ((recurse and os.path.isdir(fullname) and
75 not os.path.islink(fullname))
76 or name.lower().endswith(".py")):
77 check(fullname)
78 return
79
80 if verbose:
81 print "checking", file, "...",
82 try:
83 f = open(file)
84 except IOError, msg:
85 errprint("%s: I/O Error: %s" % (file, str(msg)))
86 return
87
88 r = Reindenter(f)
89 f.close()
90 if r.run():
91 if verbose:
92 print "changed."
93 if dryrun:
94 print "But this is a dry run, so leaving it alone."
95 if not dryrun:
96 bak = file + ".bak"
97 if os.path.exists(bak):
98 os.remove(bak)
99 os.rename(file, bak)
100 if verbose:
101 print "renamed", file, "to", bak
102 f = open(file, "w")
103 r.write(f)
104 f.close()
105 if verbose:
106 print "wrote new", file
107 else:
108 if verbose:
109 print "unchanged."
110
111class Reindenter:
112
113 def __init__(self, f):
114 self.find_stmt = 1 # next token begins a fresh stmt?
115 self.level = 0 # current indent level
116
117 # Raw file lines.
118 self.raw = f.readlines()
119
120 # File lines, rstripped & tab-expanded. Dummy at start is so
121 # that we can use tokenize's 1-based line numbering easily.
122 # Note that a line is all-blank iff it's "\n".
123 self.lines = [line.rstrip().expandtabs() + "\n"
124 for line in self.raw]
125 self.lines.insert(0, None)
126 self.index = 1 # index into self.lines of next line
127
128 # List of (lineno, indentlevel) pairs, one for each stmt and
129 # comment line. indentlevel is -1 for comment lines, as a
130 # signal that tokenize doesn't know what to do about them;
131 # indeed, they're our headache!
132 self.stats = []
133
134 def run(self):
135 tokenize.tokenize(self.getline, self.tokeneater)
136 # Remove trailing empty lines.
137 lines = self.lines
138 while lines and lines[-1] == "\n":
139 lines.pop()
140 # Sentinel.
141 stats = self.stats
142 stats.append((len(lines), 0))
143 # Map count of leading spaces to # we want.
144 have2want = {}
145 # Program after transformation.
146 after = self.after = []
147 for i in range(len(stats)-1):
148 thisstmt, thislevel = stats[i]
149 nextstmt = stats[i+1][0]
150 have = getlspace(lines[thisstmt])
151 want = thislevel * 4
152 if want < 0:
153 # A comment line.
154 if have:
155 # An indented comment line. If we saw the same
156 # indentation before, reuse what it most recently
157 # mapped to.
158 want = have2want.get(have, -1)
159 if want < 0:
160 # Then it probably belongs to the next real stmt.
161 for j in xrange(i+1, len(stats)-1):
162 jline, jlevel = stats[j]
163 if jlevel >= 0:
164 if have == getlspace(lines[jline]):
165 want = jlevel * 4
166 break
167 if want < 0: # Maybe it's a hanging
168 # comment like this one,
169 # in which case we should shift it like its base
170 # line got shifted.
171 for j in xrange(i-1, -1, -1):
172 jline, jlevel = stats[j]
173 if jlevel >= 0:
174 want = have + getlspace(after[jline-1]) - \
175 getlspace(lines[jline])
176 break
177 if want < 0:
178 # Still no luck -- leave it alone.
179 want = have
180 else:
181 want = 0
182 assert want >= 0
183 have2want[have] = want
184 diff = want - have
185 if diff == 0 or have == 0:
186 after.extend(lines[thisstmt:nextstmt])
187 else:
188 for line in lines[thisstmt:nextstmt]:
189 if diff > 0:
190 if line == "\n":
191 after.append(line)
192 else:
193 after.append(" " * diff + line)
194 else:
195 remove = min(getlspace(line), -diff)
196 after.append(line[remove:])
197 return self.raw != self.after
198
199 def write(self, f):
200 f.writelines(self.after)
201
202 # Line-getter for tokenize.
203 def getline(self):
204 if self.index >= len(self.lines):
205 line = ""
206 else:
207 line = self.lines[self.index]
208 self.index += 1
209 return line
210
211 # Line-eater for tokenize.
212 def tokeneater(self, type, token, (sline, scol), end, line,
213 INDENT=tokenize.INDENT,
214 DEDENT=tokenize.DEDENT,
215 NEWLINE=tokenize.NEWLINE,
216 COMMENT=tokenize.COMMENT,
217 NL=tokenize.NL):
218
219 if type == NEWLINE:
220 # A program statement, or ENDMARKER, will eventually follow,
221 # after some (possibly empty) run of tokens of the form
222 # (NL | COMMENT)* (INDENT | DEDENT+)?
223 self.find_stmt = 1
224
225 elif type == INDENT:
226 self.find_stmt = 1
227 self.level += 1
228
229 elif type == DEDENT:
230 self.find_stmt = 1
231 self.level -= 1
232
233 elif type == COMMENT:
234 if self.find_stmt:
235 self.stats.append((sline, -1))
236 # but we're still looking for a new stmt, so leave
237 # find_stmt alone
238
239 elif type == NL:
240 pass
241
242 elif self.find_stmt:
243 # This is the first "real token" following a NEWLINE, so it
244 # must be the first token of the next program statement, or an
245 # ENDMARKER.
246 self.find_stmt = 0
247 if line: # not endmarker
248 self.stats.append((sline, self.level))
249
250# Count number of leading blanks.
251def getlspace(line):
252 i, n = 0, len(line)
253 while i < n and line[i] == " ":
254 i += 1
255 return i
256
257if __name__ == '__main__':
258 main()