blob: 13150f9eef1705287841daa18f09c990c5530cc6 [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
7-d Dry run. Analyze, but don't make any changes to, files.
8-r Recurse. Search for all .py files in subdirectories too.
9-v Verbose. Print informative msgs; else no output.
10
11Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000012Also trim excess spaces and tabs from ends of lines, and remove empty lines
13at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000014
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000015If no paths are given on the command line, reindent operates as a filter,
16reading a single source file from standard input and writing the transformed
17source to standard output. In this case, the -d, -r and -v flags are
18ignored.
Tim Petersad147202000-10-05 03:48:38 +000019
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000020You can pass one or more file and/or directory paths. When a directory
21path, all .py files within the directory will be examined, and, if the -r
22option is given, likewise recursively for subdirectories.
23
24If output is not to standard output, reindent overwrites files in place,
25renaming the originals with a .bak extension. If it finds nothing to
26change, the file is left alone. If reindent does change a file, the changed
27file is a fixed-point for future runs (i.e., running reindent on the
28resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000029
30The hard part of reindenting is figuring out what to do with comment
31lines. So long as the input files get a clean bill of health from
32tabnanny.py, reindent should do a good job.
33"""
34
35__version__ = "1"
36
37import tokenize
38import os
39import sys
40
41verbose = 0
42recurse = 0
43dryrun = 0
44
45def errprint(*args):
46 sep = ""
47 for arg in args:
48 sys.stderr.write(sep + str(arg))
49 sep = " "
50 sys.stderr.write("\n")
51
52def main():
53 import getopt
54 global verbose, recurse, dryrun
55 try:
56 opts, args = getopt.getopt(sys.argv[1:], "drv")
57 except getopt.error, msg:
58 errprint(msg)
59 return
60 for o, a in opts:
61 if o == '-d':
62 dryrun += 1
63 elif o == '-r':
64 recurse += 1
65 elif o == '-v':
66 verbose += 1
67 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000068 r = Reindenter(sys.stdin)
69 r.run()
70 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000071 return
72 for arg in args:
73 check(arg)
74
75def check(file):
76 if os.path.isdir(file) and not os.path.islink(file):
77 if verbose:
78 print "listing directory", file
79 names = os.listdir(file)
80 for name in names:
81 fullname = os.path.join(file, name)
82 if ((recurse and os.path.isdir(fullname) and
83 not os.path.islink(fullname))
84 or name.lower().endswith(".py")):
85 check(fullname)
86 return
87
88 if verbose:
89 print "checking", file, "...",
90 try:
91 f = open(file)
92 except IOError, msg:
93 errprint("%s: I/O Error: %s" % (file, str(msg)))
94 return
95
96 r = Reindenter(f)
97 f.close()
98 if r.run():
99 if verbose:
100 print "changed."
101 if dryrun:
102 print "But this is a dry run, so leaving it alone."
103 if not dryrun:
104 bak = file + ".bak"
105 if os.path.exists(bak):
106 os.remove(bak)
107 os.rename(file, bak)
108 if verbose:
109 print "renamed", file, "to", bak
110 f = open(file, "w")
111 r.write(f)
112 f.close()
113 if verbose:
114 print "wrote new", file
115 else:
116 if verbose:
117 print "unchanged."
118
Tim Petersba001a02001-10-04 19:44:10 +0000119def _rstrip(line, JUNK='\n \t'):
120 """Return line stripped of trailing spaces, tabs, newlines.
121
122 Note that line.rstrip() instead also strips sundry control characters,
123 but at least one known Emacs user expects to keep junk like that, not
124 mentioning Barry by name or anything <wink>.
125 """
126
127 i = len(line)
128 while i > 0 and line[i-1] in JUNK:
129 i -= 1
130 return line[:i]
131
Tim Petersad147202000-10-05 03:48:38 +0000132class Reindenter:
133
134 def __init__(self, f):
135 self.find_stmt = 1 # next token begins a fresh stmt?
136 self.level = 0 # current indent level
137
138 # Raw file lines.
139 self.raw = f.readlines()
140
141 # File lines, rstripped & tab-expanded. Dummy at start is so
142 # that we can use tokenize's 1-based line numbering easily.
143 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000144 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000145 for line in self.raw]
146 self.lines.insert(0, None)
147 self.index = 1 # index into self.lines of next line
148
149 # List of (lineno, indentlevel) pairs, one for each stmt and
150 # comment line. indentlevel is -1 for comment lines, as a
151 # signal that tokenize doesn't know what to do about them;
152 # indeed, they're our headache!
153 self.stats = []
154
155 def run(self):
156 tokenize.tokenize(self.getline, self.tokeneater)
157 # Remove trailing empty lines.
158 lines = self.lines
159 while lines and lines[-1] == "\n":
160 lines.pop()
161 # Sentinel.
162 stats = self.stats
163 stats.append((len(lines), 0))
164 # Map count of leading spaces to # we want.
165 have2want = {}
166 # Program after transformation.
167 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000168 # Copy over initial empty lines -- there's nothing to do until
169 # we see a line with *something* on it.
170 i = stats[0][0]
171 after.extend(lines[1:i])
Tim Petersad147202000-10-05 03:48:38 +0000172 for i in range(len(stats)-1):
173 thisstmt, thislevel = stats[i]
174 nextstmt = stats[i+1][0]
175 have = getlspace(lines[thisstmt])
176 want = thislevel * 4
177 if want < 0:
178 # A comment line.
179 if have:
180 # An indented comment line. If we saw the same
181 # indentation before, reuse what it most recently
182 # mapped to.
183 want = have2want.get(have, -1)
184 if want < 0:
185 # Then it probably belongs to the next real stmt.
186 for j in xrange(i+1, len(stats)-1):
187 jline, jlevel = stats[j]
188 if jlevel >= 0:
189 if have == getlspace(lines[jline]):
190 want = jlevel * 4
191 break
192 if want < 0: # Maybe it's a hanging
193 # comment like this one,
194 # in which case we should shift it like its base
195 # line got shifted.
196 for j in xrange(i-1, -1, -1):
197 jline, jlevel = stats[j]
198 if jlevel >= 0:
199 want = have + getlspace(after[jline-1]) - \
200 getlspace(lines[jline])
201 break
202 if want < 0:
203 # Still no luck -- leave it alone.
204 want = have
205 else:
206 want = 0
207 assert want >= 0
208 have2want[have] = want
209 diff = want - have
210 if diff == 0 or have == 0:
211 after.extend(lines[thisstmt:nextstmt])
212 else:
213 for line in lines[thisstmt:nextstmt]:
214 if diff > 0:
215 if line == "\n":
216 after.append(line)
217 else:
218 after.append(" " * diff + line)
219 else:
220 remove = min(getlspace(line), -diff)
221 after.append(line[remove:])
222 return self.raw != self.after
223
224 def write(self, f):
225 f.writelines(self.after)
226
227 # Line-getter for tokenize.
228 def getline(self):
229 if self.index >= len(self.lines):
230 line = ""
231 else:
232 line = self.lines[self.index]
233 self.index += 1
234 return line
235
236 # Line-eater for tokenize.
237 def tokeneater(self, type, token, (sline, scol), end, line,
238 INDENT=tokenize.INDENT,
239 DEDENT=tokenize.DEDENT,
240 NEWLINE=tokenize.NEWLINE,
241 COMMENT=tokenize.COMMENT,
242 NL=tokenize.NL):
243
244 if type == NEWLINE:
245 # A program statement, or ENDMARKER, will eventually follow,
246 # after some (possibly empty) run of tokens of the form
247 # (NL | COMMENT)* (INDENT | DEDENT+)?
248 self.find_stmt = 1
249
250 elif type == INDENT:
251 self.find_stmt = 1
252 self.level += 1
253
254 elif type == DEDENT:
255 self.find_stmt = 1
256 self.level -= 1
257
258 elif type == COMMENT:
259 if self.find_stmt:
260 self.stats.append((sline, -1))
261 # but we're still looking for a new stmt, so leave
262 # find_stmt alone
263
264 elif type == NL:
265 pass
266
267 elif self.find_stmt:
268 # This is the first "real token" following a NEWLINE, so it
269 # must be the first token of the next program statement, or an
270 # ENDMARKER.
271 self.find_stmt = 0
272 if line: # not endmarker
273 self.stats.append((sline, self.level))
274
275# Count number of leading blanks.
276def getlspace(line):
277 i, n = 0, len(line)
278 while i < n and line[i] == " ":
279 i += 1
280 return i
281
282if __name__ == '__main__':
283 main()