blob: 5ac98c7f7bef2249b4aad432d7941da3f0435eb0 [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Skip Montanaro165163f2004-03-27 18:43:56 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-v (--verbose) Verbose. Print informative msgs; else no output.
10-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000011
12Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000013Also trim excess spaces and tabs from ends of lines, and remove empty lines
14at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000015
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000016If no paths are given on the command line, reindent operates as a filter,
17reading a single source file from standard input and writing the transformed
18source to standard output. In this case, the -d, -r and -v flags are
19ignored.
Tim Petersad147202000-10-05 03:48:38 +000020
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000021You can pass one or more file and/or directory paths. When a directory
22path, all .py files within the directory will be examined, and, if the -r
23option is given, likewise recursively for subdirectories.
24
25If output is not to standard output, reindent overwrites files in place,
26renaming the originals with a .bak extension. If it finds nothing to
27change, the file is left alone. If reindent does change a file, the changed
28file is a fixed-point for future runs (i.e., running reindent on the
29resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000030
31The hard part of reindenting is figuring out what to do with comment
32lines. So long as the input files get a clean bill of health from
33tabnanny.py, reindent should do a good job.
34"""
35
36__version__ = "1"
37
38import tokenize
39import os
40import sys
41
42verbose = 0
43recurse = 0
44dryrun = 0
45
Skip Montanaro165163f2004-03-27 18:43:56 +000046def usage(msg=None):
47 if msg is not None:
48 print >> sys.stderr, msg
49 print >> sys.stderr, __doc__
50
Tim Petersad147202000-10-05 03:48:38 +000051def errprint(*args):
52 sep = ""
53 for arg in args:
54 sys.stderr.write(sep + str(arg))
55 sep = " "
56 sys.stderr.write("\n")
57
58def main():
59 import getopt
60 global verbose, recurse, dryrun
61 try:
Skip Montanaro165163f2004-03-27 18:43:56 +000062 opts, args = getopt.getopt(sys.argv[1:], "drvh",
63 ["dryrun", "recurse", "verbose", "help"])
Tim Petersad147202000-10-05 03:48:38 +000064 except getopt.error, msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000065 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000066 return
67 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000068 if o in ('-d', '--dryrun'):
Tim Petersad147202000-10-05 03:48:38 +000069 dryrun += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000070 elif o in ('-r', '--recurse'):
Tim Petersad147202000-10-05 03:48:38 +000071 recurse += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000072 elif o in ('-v', '--verbose'):
Tim Petersad147202000-10-05 03:48:38 +000073 verbose += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000074 elif o in ('-h', '--help'):
75 usage()
76 return
Tim Petersad147202000-10-05 03:48:38 +000077 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000078 r = Reindenter(sys.stdin)
79 r.run()
80 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000081 return
82 for arg in args:
83 check(arg)
84
85def check(file):
86 if os.path.isdir(file) and not os.path.islink(file):
87 if verbose:
88 print "listing directory", file
89 names = os.listdir(file)
90 for name in names:
91 fullname = os.path.join(file, name)
92 if ((recurse and os.path.isdir(fullname) and
93 not os.path.islink(fullname))
94 or name.lower().endswith(".py")):
95 check(fullname)
96 return
97
98 if verbose:
99 print "checking", file, "...",
100 try:
101 f = open(file)
102 except IOError, msg:
103 errprint("%s: I/O Error: %s" % (file, str(msg)))
104 return
105
106 r = Reindenter(f)
107 f.close()
108 if r.run():
109 if verbose:
110 print "changed."
111 if dryrun:
112 print "But this is a dry run, so leaving it alone."
113 if not dryrun:
114 bak = file + ".bak"
115 if os.path.exists(bak):
116 os.remove(bak)
117 os.rename(file, bak)
118 if verbose:
119 print "renamed", file, "to", bak
120 f = open(file, "w")
121 r.write(f)
122 f.close()
123 if verbose:
124 print "wrote new", file
125 else:
126 if verbose:
127 print "unchanged."
128
Tim Petersba001a02001-10-04 19:44:10 +0000129def _rstrip(line, JUNK='\n \t'):
130 """Return line stripped of trailing spaces, tabs, newlines.
131
132 Note that line.rstrip() instead also strips sundry control characters,
133 but at least one known Emacs user expects to keep junk like that, not
134 mentioning Barry by name or anything <wink>.
135 """
136
137 i = len(line)
138 while i > 0 and line[i-1] in JUNK:
139 i -= 1
140 return line[:i]
141
Tim Petersad147202000-10-05 03:48:38 +0000142class Reindenter:
143
144 def __init__(self, f):
145 self.find_stmt = 1 # next token begins a fresh stmt?
146 self.level = 0 # current indent level
147
148 # Raw file lines.
149 self.raw = f.readlines()
150
151 # File lines, rstripped & tab-expanded. Dummy at start is so
152 # that we can use tokenize's 1-based line numbering easily.
153 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000154 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000155 for line in self.raw]
156 self.lines.insert(0, None)
157 self.index = 1 # index into self.lines of next line
158
159 # List of (lineno, indentlevel) pairs, one for each stmt and
160 # comment line. indentlevel is -1 for comment lines, as a
161 # signal that tokenize doesn't know what to do about them;
162 # indeed, they're our headache!
163 self.stats = []
164
165 def run(self):
166 tokenize.tokenize(self.getline, self.tokeneater)
167 # Remove trailing empty lines.
168 lines = self.lines
169 while lines and lines[-1] == "\n":
170 lines.pop()
171 # Sentinel.
172 stats = self.stats
173 stats.append((len(lines), 0))
174 # Map count of leading spaces to # we want.
175 have2want = {}
176 # Program after transformation.
177 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000178 # Copy over initial empty lines -- there's nothing to do until
179 # we see a line with *something* on it.
180 i = stats[0][0]
181 after.extend(lines[1:i])
Tim Petersad147202000-10-05 03:48:38 +0000182 for i in range(len(stats)-1):
183 thisstmt, thislevel = stats[i]
184 nextstmt = stats[i+1][0]
185 have = getlspace(lines[thisstmt])
186 want = thislevel * 4
187 if want < 0:
188 # A comment line.
189 if have:
190 # An indented comment line. If we saw the same
191 # indentation before, reuse what it most recently
192 # mapped to.
193 want = have2want.get(have, -1)
194 if want < 0:
195 # Then it probably belongs to the next real stmt.
196 for j in xrange(i+1, len(stats)-1):
197 jline, jlevel = stats[j]
198 if jlevel >= 0:
199 if have == getlspace(lines[jline]):
200 want = jlevel * 4
201 break
202 if want < 0: # Maybe it's a hanging
203 # comment like this one,
204 # in which case we should shift it like its base
205 # line got shifted.
206 for j in xrange(i-1, -1, -1):
207 jline, jlevel = stats[j]
208 if jlevel >= 0:
209 want = have + getlspace(after[jline-1]) - \
210 getlspace(lines[jline])
211 break
212 if want < 0:
213 # Still no luck -- leave it alone.
214 want = have
215 else:
216 want = 0
217 assert want >= 0
218 have2want[have] = want
219 diff = want - have
220 if diff == 0 or have == 0:
221 after.extend(lines[thisstmt:nextstmt])
222 else:
223 for line in lines[thisstmt:nextstmt]:
224 if diff > 0:
225 if line == "\n":
226 after.append(line)
227 else:
228 after.append(" " * diff + line)
229 else:
230 remove = min(getlspace(line), -diff)
231 after.append(line[remove:])
232 return self.raw != self.after
233
234 def write(self, f):
235 f.writelines(self.after)
236
237 # Line-getter for tokenize.
238 def getline(self):
239 if self.index >= len(self.lines):
240 line = ""
241 else:
242 line = self.lines[self.index]
243 self.index += 1
244 return line
245
246 # Line-eater for tokenize.
247 def tokeneater(self, type, token, (sline, scol), end, line,
248 INDENT=tokenize.INDENT,
249 DEDENT=tokenize.DEDENT,
250 NEWLINE=tokenize.NEWLINE,
251 COMMENT=tokenize.COMMENT,
252 NL=tokenize.NL):
253
254 if type == NEWLINE:
255 # A program statement, or ENDMARKER, will eventually follow,
256 # after some (possibly empty) run of tokens of the form
257 # (NL | COMMENT)* (INDENT | DEDENT+)?
258 self.find_stmt = 1
259
260 elif type == INDENT:
261 self.find_stmt = 1
262 self.level += 1
263
264 elif type == DEDENT:
265 self.find_stmt = 1
266 self.level -= 1
267
268 elif type == COMMENT:
269 if self.find_stmt:
270 self.stats.append((sline, -1))
271 # but we're still looking for a new stmt, so leave
272 # find_stmt alone
273
274 elif type == NL:
275 pass
276
277 elif self.find_stmt:
278 # This is the first "real token" following a NEWLINE, so it
279 # must be the first token of the next program statement, or an
280 # ENDMARKER.
281 self.find_stmt = 0
282 if line: # not endmarker
283 self.stats.append((sline, self.level))
284
285# Count number of leading blanks.
286def getlspace(line):
287 i, n = 0, len(line)
288 while i < n and line[i] == " ":
289 i += 1
290 return i
291
292if __name__ == '__main__':
293 main()