blob: 65749f0be53ab86b1fcbfa8d2940fd84062db2d2 [file] [log] [blame]
mbligh09a025e2008-06-06 20:29:49 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] [ path ... ]
6
7-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
12
13Change Python (.py) files to use 4-space indents and no hard tab characters.
14Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
16
17If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
21
22You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
31
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
35
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
40"""
41
42__version__ = "1"
43
44import tokenize
45import os, shutil
46import sys
47
48verbose = 0
49recurse = 0
50dryrun = 0
51makebackup = True
52
53def usage(msg=None):
54 if msg is not None:
55 print >> sys.stderr, msg
56 print >> sys.stderr, __doc__
57
58def errprint(*args):
59 sep = ""
60 for arg in args:
61 sys.stderr.write(sep + str(arg))
62 sep = " "
63 sys.stderr.write("\n")
64
65def main():
66 import getopt
67 global verbose, recurse, dryrun, makebackup
68 try:
69 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
70 ["dryrun", "recurse", "nobackup", "verbose", "help"])
71 except getopt.error, msg:
72 usage(msg)
73 return
74 for o, a in opts:
75 if o in ('-d', '--dryrun'):
76 dryrun += 1
77 elif o in ('-r', '--recurse'):
78 recurse += 1
79 elif o in ('-n', '--nobackup'):
80 makebackup = False
81 elif o in ('-v', '--verbose'):
82 verbose += 1
83 elif o in ('-h', '--help'):
84 usage()
85 return
86 if not args:
87 r = Reindenter(sys.stdin)
88 r.run()
89 r.write(sys.stdout)
90 return
91 for arg in args:
92 check(arg)
93
94def check(file):
95 if os.path.isdir(file) and not os.path.islink(file):
96 if verbose:
97 print "listing directory", file
98 names = os.listdir(file)
99 for name in names:
100 fullname = os.path.join(file, name)
101 if ((recurse and os.path.isdir(fullname) and
102 not os.path.islink(fullname))
103 or name.lower().endswith(".py")):
104 check(fullname)
105 return
106
107 if verbose:
108 print "checking", file, "...",
109 try:
110 f = open(file)
111 except IOError, msg:
112 errprint("%s: I/O Error: %s" % (file, str(msg)))
113 return
114
115 r = Reindenter(f)
116 f.close()
117 if r.run():
118 if verbose:
119 print "changed."
120 if dryrun:
121 print "But this is a dry run, so leaving it alone."
122 if not dryrun:
123 bak = file + ".bak"
124 if makebackup:
125 shutil.copyfile(file, bak)
126 if verbose:
127 print "backed up", file, "to", bak
128 f = open(file, "w")
129 r.write(f)
130 f.close()
131 if verbose:
132 print "wrote new", file
133 return True
134 else:
135 if verbose:
136 print "unchanged."
137 return False
138
139def _rstrip(line, JUNK='\n \t'):
140 """Return line stripped of trailing spaces, tabs, newlines.
141
142 Note that line.rstrip() instead also strips sundry control characters,
143 but at least one known Emacs user expects to keep junk like that, not
144 mentioning Barry by name or anything <wink>.
145 """
146
147 i = len(line)
148 while i > 0 and line[i-1] in JUNK:
149 i -= 1
150 return line[:i]
151
152class Reindenter:
153
154 def __init__(self, f):
155 self.find_stmt = 1 # next token begins a fresh stmt?
156 self.level = 0 # current indent level
157
158 # Raw file lines.
159 self.raw = f.readlines()
160
161 # File lines, rstripped & tab-expanded. Dummy at start is so
162 # that we can use tokenize's 1-based line numbering easily.
163 # Note that a line is all-blank iff it's "\n".
164 self.lines = [_rstrip(line).expandtabs() + "\n"
165 for line in self.raw]
166 self.lines.insert(0, None)
167 self.index = 1 # index into self.lines of next line
168
169 # List of (lineno, indentlevel) pairs, one for each stmt and
170 # comment line. indentlevel is -1 for comment lines, as a
171 # signal that tokenize doesn't know what to do about them;
172 # indeed, they're our headache!
173 self.stats = []
174
175 def run(self):
176 tokenize.tokenize(self.getline, self.tokeneater)
177 # Remove trailing empty lines.
178 lines = self.lines
179 while lines and lines[-1] == "\n":
180 lines.pop()
181 # Sentinel.
182 stats = self.stats
183 stats.append((len(lines), 0))
184 # Map count of leading spaces to # we want.
185 have2want = {}
186 # Program after transformation.
187 after = self.after = []
188 # Copy over initial empty lines -- there's nothing to do until
189 # we see a line with *something* on it.
190 i = stats[0][0]
191 after.extend(lines[1:i])
192 for i in range(len(stats)-1):
193 thisstmt, thislevel = stats[i]
194 nextstmt = stats[i+1][0]
195 have = getlspace(lines[thisstmt])
196 want = thislevel * 4
197 if want < 0:
198 # A comment line.
199 if have:
200 # An indented comment line. If we saw the same
201 # indentation before, reuse what it most recently
202 # mapped to.
203 want = have2want.get(have, -1)
204 if want < 0:
205 # Then it probably belongs to the next real stmt.
206 for j in xrange(i+1, len(stats)-1):
207 jline, jlevel = stats[j]
208 if jlevel >= 0:
209 if have == getlspace(lines[jline]):
210 want = jlevel * 4
211 break
212 if want < 0: # Maybe it's a hanging
213 # comment like this one,
214 # in which case we should shift it like its base
215 # line got shifted.
216 for j in xrange(i-1, -1, -1):
217 jline, jlevel = stats[j]
218 if jlevel >= 0:
219 want = have + getlspace(after[jline-1]) - \
220 getlspace(lines[jline])
221 break
222 if want < 0:
223 # Still no luck -- leave it alone.
224 want = have
225 else:
226 want = 0
227 assert want >= 0
228 have2want[have] = want
229 diff = want - have
230 if diff == 0 or have == 0:
231 after.extend(lines[thisstmt:nextstmt])
232 else:
233 for line in lines[thisstmt:nextstmt]:
234 if diff > 0:
235 if line == "\n":
236 after.append(line)
237 else:
238 after.append(" " * diff + line)
239 else:
240 remove = min(getlspace(line), -diff)
241 after.append(line[remove:])
242 return self.raw != self.after
243
244 def write(self, f):
245 f.writelines(self.after)
246
247 # Line-getter for tokenize.
248 def getline(self):
249 if self.index >= len(self.lines):
250 line = ""
251 else:
252 line = self.lines[self.index]
253 self.index += 1
254 return line
255
256 # Line-eater for tokenize.
257 def tokeneater(self, type, token, (sline, scol), end, line,
258 INDENT=tokenize.INDENT,
259 DEDENT=tokenize.DEDENT,
260 NEWLINE=tokenize.NEWLINE,
261 COMMENT=tokenize.COMMENT,
262 NL=tokenize.NL):
263
264 if type == NEWLINE:
265 # A program statement, or ENDMARKER, will eventually follow,
266 # after some (possibly empty) run of tokens of the form
267 # (NL | COMMENT)* (INDENT | DEDENT+)?
268 self.find_stmt = 1
269
270 elif type == INDENT:
271 self.find_stmt = 1
272 self.level += 1
273
274 elif type == DEDENT:
275 self.find_stmt = 1
276 self.level -= 1
277
278 elif type == COMMENT:
279 if self.find_stmt:
280 self.stats.append((sline, -1))
281 # but we're still looking for a new stmt, so leave
282 # find_stmt alone
283
284 elif type == NL:
285 pass
286
287 elif self.find_stmt:
288 # This is the first "real token" following a NEWLINE, so it
289 # must be the first token of the next program statement, or an
290 # ENDMARKER.
291 self.find_stmt = 0
292 if line: # not endmarker
293 self.stats.append((sline, self.level))
294
295# Count number of leading blanks.
296def getlspace(line):
297 i, n = 0, len(line)
298 while i < n and line[i] == " ":
299 i += 1
300 return i
301
302if __name__ == '__main__':
303 main()