blob: df15edbc8d6502b7a6aedc94b8aff766c60d06d3 [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Facundo Batistaf88a0772008-02-17 16:21:13 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000012
13Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000014Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000016
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000017If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
Tim Petersad147202000-10-05 03:48:38 +000021
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000022You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000031
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
Facundo Batistaf88a0772008-02-17 16:21:13 +000035
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
Jason R. Coombsfee77452011-07-27 14:05:37 -040038user/group and permissions could leave the backup file more readable than
Facundo Batistaf88a0772008-02-17 16:21:13 +000039you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000040"""
41
42__version__ = "1"
43
44import tokenize
Facundo Batistaf88a0772008-02-17 16:21:13 +000045import os, shutil
Tim Petersad147202000-10-05 03:48:38 +000046import sys
Jason R. Coombsfee77452011-07-27 14:05:37 -040047import io
Tim Petersad147202000-10-05 03:48:38 +000048
Facundo Batistaf88a0772008-02-17 16:21:13 +000049verbose = 0
50recurse = 0
51dryrun = 0
52makebackup = True
Tim Petersad147202000-10-05 03:48:38 +000053
Skip Montanaro165163f2004-03-27 18:43:56 +000054def usage(msg=None):
55 if msg is not None:
56 print >> sys.stderr, msg
57 print >> sys.stderr, __doc__
58
Tim Petersad147202000-10-05 03:48:38 +000059def errprint(*args):
60 sep = ""
61 for arg in args:
62 sys.stderr.write(sep + str(arg))
63 sep = " "
64 sys.stderr.write("\n")
65
66def main():
67 import getopt
Facundo Batistaf88a0772008-02-17 16:21:13 +000068 global verbose, recurse, dryrun, makebackup
Tim Petersad147202000-10-05 03:48:38 +000069 try:
Facundo Batistaf88a0772008-02-17 16:21:13 +000070 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
71 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Tim Petersad147202000-10-05 03:48:38 +000072 except getopt.error, msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000073 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000074 return
75 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000076 if o in ('-d', '--dryrun'):
Tim Petersad147202000-10-05 03:48:38 +000077 dryrun += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000078 elif o in ('-r', '--recurse'):
Tim Petersad147202000-10-05 03:48:38 +000079 recurse += 1
Facundo Batistaf88a0772008-02-17 16:21:13 +000080 elif o in ('-n', '--nobackup'):
81 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000082 elif o in ('-v', '--verbose'):
Tim Petersad147202000-10-05 03:48:38 +000083 verbose += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000084 elif o in ('-h', '--help'):
85 usage()
86 return
Tim Petersad147202000-10-05 03:48:38 +000087 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000088 r = Reindenter(sys.stdin)
89 r.run()
90 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000091 return
92 for arg in args:
93 check(arg)
94
95def check(file):
96 if os.path.isdir(file) and not os.path.islink(file):
97 if verbose:
98 print "listing directory", file
99 names = os.listdir(file)
100 for name in names:
101 fullname = os.path.join(file, name)
102 if ((recurse and os.path.isdir(fullname) and
Benjamin Peterson501447d2008-10-15 20:34:09 +0000103 not os.path.islink(fullname) and
104 not os.path.split(fullname)[1].startswith("."))
Tim Petersad147202000-10-05 03:48:38 +0000105 or name.lower().endswith(".py")):
106 check(fullname)
107 return
108
109 if verbose:
110 print "checking", file, "...",
111 try:
Jason R. Coombsfee77452011-07-27 14:05:37 -0400112 f = io.open(file)
Tim Petersad147202000-10-05 03:48:38 +0000113 except IOError, msg:
114 errprint("%s: I/O Error: %s" % (file, str(msg)))
115 return
116
117 r = Reindenter(f)
118 f.close()
Jason R. Coombsfee77452011-07-27 14:05:37 -0400119
120 newline = r.newlines
121 if isinstance(newline, tuple):
122 errprint("%s: mixed newlines detected; cannot process file" % file)
123 return
124
Tim Petersad147202000-10-05 03:48:38 +0000125 if r.run():
126 if verbose:
127 print "changed."
128 if dryrun:
129 print "But this is a dry run, so leaving it alone."
130 if not dryrun:
131 bak = file + ".bak"
Facundo Batistaf88a0772008-02-17 16:21:13 +0000132 if makebackup:
133 shutil.copyfile(file, bak)
134 if verbose:
135 print "backed up", file, "to", bak
Jason R. Coombsfee77452011-07-27 14:05:37 -0400136 f = io.open(file, "w", newline=newline)
Tim Petersad147202000-10-05 03:48:38 +0000137 r.write(f)
138 f.close()
139 if verbose:
140 print "wrote new", file
Brett Cannona8b09fd2008-03-18 17:25:13 +0000141 return True
Tim Petersad147202000-10-05 03:48:38 +0000142 else:
143 if verbose:
144 print "unchanged."
Brett Cannona8b09fd2008-03-18 17:25:13 +0000145 return False
Tim Petersad147202000-10-05 03:48:38 +0000146
Tim Petersba001a02001-10-04 19:44:10 +0000147def _rstrip(line, JUNK='\n \t'):
148 """Return line stripped of trailing spaces, tabs, newlines.
149
150 Note that line.rstrip() instead also strips sundry control characters,
151 but at least one known Emacs user expects to keep junk like that, not
152 mentioning Barry by name or anything <wink>.
153 """
154
155 i = len(line)
156 while i > 0 and line[i-1] in JUNK:
157 i -= 1
158 return line[:i]
159
Tim Petersad147202000-10-05 03:48:38 +0000160class Reindenter:
161
162 def __init__(self, f):
163 self.find_stmt = 1 # next token begins a fresh stmt?
164 self.level = 0 # current indent level
165
166 # Raw file lines.
167 self.raw = f.readlines()
168
169 # File lines, rstripped & tab-expanded. Dummy at start is so
170 # that we can use tokenize's 1-based line numbering easily.
171 # Note that a line is all-blank iff it's "\n".
Tim Petersba001a02001-10-04 19:44:10 +0000172 self.lines = [_rstrip(line).expandtabs() + "\n"
Tim Petersad147202000-10-05 03:48:38 +0000173 for line in self.raw]
174 self.lines.insert(0, None)
175 self.index = 1 # index into self.lines of next line
176
177 # List of (lineno, indentlevel) pairs, one for each stmt and
178 # comment line. indentlevel is -1 for comment lines, as a
179 # signal that tokenize doesn't know what to do about them;
180 # indeed, they're our headache!
181 self.stats = []
182
Jason R. Coombsfee77452011-07-27 14:05:37 -0400183 # Save the newlines found in the file so they can be used to
184 # create output without mutating the newlines.
185 self.newlines = f.newlines
186
Tim Petersad147202000-10-05 03:48:38 +0000187 def run(self):
188 tokenize.tokenize(self.getline, self.tokeneater)
189 # Remove trailing empty lines.
190 lines = self.lines
191 while lines and lines[-1] == "\n":
192 lines.pop()
193 # Sentinel.
194 stats = self.stats
195 stats.append((len(lines), 0))
196 # Map count of leading spaces to # we want.
197 have2want = {}
198 # Program after transformation.
199 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000200 # Copy over initial empty lines -- there's nothing to do until
201 # we see a line with *something* on it.
202 i = stats[0][0]
203 after.extend(lines[1:i])
Tim Petersad147202000-10-05 03:48:38 +0000204 for i in range(len(stats)-1):
205 thisstmt, thislevel = stats[i]
206 nextstmt = stats[i+1][0]
207 have = getlspace(lines[thisstmt])
208 want = thislevel * 4
209 if want < 0:
210 # A comment line.
211 if have:
212 # An indented comment line. If we saw the same
213 # indentation before, reuse what it most recently
214 # mapped to.
215 want = have2want.get(have, -1)
216 if want < 0:
217 # Then it probably belongs to the next real stmt.
218 for j in xrange(i+1, len(stats)-1):
219 jline, jlevel = stats[j]
220 if jlevel >= 0:
221 if have == getlspace(lines[jline]):
222 want = jlevel * 4
223 break
224 if want < 0: # Maybe it's a hanging
225 # comment like this one,
226 # in which case we should shift it like its base
227 # line got shifted.
228 for j in xrange(i-1, -1, -1):
229 jline, jlevel = stats[j]
230 if jlevel >= 0:
231 want = have + getlspace(after[jline-1]) - \
232 getlspace(lines[jline])
233 break
234 if want < 0:
235 # Still no luck -- leave it alone.
236 want = have
237 else:
238 want = 0
239 assert want >= 0
240 have2want[have] = want
241 diff = want - have
242 if diff == 0 or have == 0:
243 after.extend(lines[thisstmt:nextstmt])
244 else:
245 for line in lines[thisstmt:nextstmt]:
246 if diff > 0:
247 if line == "\n":
248 after.append(line)
249 else:
250 after.append(" " * diff + line)
251 else:
252 remove = min(getlspace(line), -diff)
253 after.append(line[remove:])
254 return self.raw != self.after
255
256 def write(self, f):
257 f.writelines(self.after)
258
259 # Line-getter for tokenize.
260 def getline(self):
261 if self.index >= len(self.lines):
262 line = ""
263 else:
264 line = self.lines[self.index]
265 self.index += 1
266 return line
267
268 # Line-eater for tokenize.
269 def tokeneater(self, type, token, (sline, scol), end, line,
270 INDENT=tokenize.INDENT,
271 DEDENT=tokenize.DEDENT,
272 NEWLINE=tokenize.NEWLINE,
273 COMMENT=tokenize.COMMENT,
274 NL=tokenize.NL):
275
276 if type == NEWLINE:
277 # A program statement, or ENDMARKER, will eventually follow,
278 # after some (possibly empty) run of tokens of the form
279 # (NL | COMMENT)* (INDENT | DEDENT+)?
280 self.find_stmt = 1
281
282 elif type == INDENT:
283 self.find_stmt = 1
284 self.level += 1
285
286 elif type == DEDENT:
287 self.find_stmt = 1
288 self.level -= 1
289
290 elif type == COMMENT:
291 if self.find_stmt:
292 self.stats.append((sline, -1))
293 # but we're still looking for a new stmt, so leave
294 # find_stmt alone
295
296 elif type == NL:
297 pass
298
299 elif self.find_stmt:
300 # This is the first "real token" following a NEWLINE, so it
301 # must be the first token of the next program statement, or an
302 # ENDMARKER.
303 self.find_stmt = 0
304 if line: # not endmarker
305 self.stats.append((sline, self.level))
306
307# Count number of leading blanks.
308def getlspace(line):
309 i, n = 0, len(line)
310 while i < n and line[i] == " ":
311 i += 1
312 return i
313
314if __name__ == '__main__':
315 main()