blob: 540901ba7d7092224629497cbd0681a32ffba5f1 [file] [log] [blame]
Tim Petersad147202000-10-05 03:48:38 +00001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
Skip Montanaro9a29e7a2002-03-26 11:39:26 +00005"""reindent [-d][-r][-v] [ path ... ]
Tim Petersad147202000-10-05 03:48:38 +00006
Facundo Batistaf88a0772008-02-17 16:21:13 +00007-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
Tim Petersad147202000-10-05 03:48:38 +000012
13Change Python (.py) files to use 4-space indents and no hard tab characters.
Tim Petersba001a02001-10-04 19:44:10 +000014Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
Tim Petersad147202000-10-05 03:48:38 +000016
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000017If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
Tim Petersad147202000-10-05 03:48:38 +000021
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000022You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
Tim Petersad147202000-10-05 03:48:38 +000031
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
Facundo Batistaf88a0772008-02-17 16:21:13 +000035
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
Jason R. Coombsfee77452011-07-27 14:05:37 -040038user/group and permissions could leave the backup file more readable than
Facundo Batistaf88a0772008-02-17 16:21:13 +000039you'd prefer. You can always use the --nobackup option to prevent this.
Tim Petersad147202000-10-05 03:48:38 +000040"""
41
42__version__ = "1"
43
44import tokenize
Facundo Batistaf88a0772008-02-17 16:21:13 +000045import os, shutil
Tim Petersad147202000-10-05 03:48:38 +000046import sys
Jason R. Coombsfee77452011-07-27 14:05:37 -040047import io
Tim Petersad147202000-10-05 03:48:38 +000048
Facundo Batistaf88a0772008-02-17 16:21:13 +000049verbose = 0
50recurse = 0
51dryrun = 0
52makebackup = True
Tim Petersad147202000-10-05 03:48:38 +000053
Skip Montanaro165163f2004-03-27 18:43:56 +000054def usage(msg=None):
55 if msg is not None:
56 print >> sys.stderr, msg
57 print >> sys.stderr, __doc__
58
Tim Petersad147202000-10-05 03:48:38 +000059def errprint(*args):
60 sep = ""
61 for arg in args:
62 sys.stderr.write(sep + str(arg))
63 sep = " "
64 sys.stderr.write("\n")
65
66def main():
67 import getopt
Facundo Batistaf88a0772008-02-17 16:21:13 +000068 global verbose, recurse, dryrun, makebackup
Tim Petersad147202000-10-05 03:48:38 +000069 try:
Facundo Batistaf88a0772008-02-17 16:21:13 +000070 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
71 ["dryrun", "recurse", "nobackup", "verbose", "help"])
Tim Petersad147202000-10-05 03:48:38 +000072 except getopt.error, msg:
Skip Montanaro165163f2004-03-27 18:43:56 +000073 usage(msg)
Tim Petersad147202000-10-05 03:48:38 +000074 return
75 for o, a in opts:
Skip Montanaro165163f2004-03-27 18:43:56 +000076 if o in ('-d', '--dryrun'):
Tim Petersad147202000-10-05 03:48:38 +000077 dryrun += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000078 elif o in ('-r', '--recurse'):
Tim Petersad147202000-10-05 03:48:38 +000079 recurse += 1
Facundo Batistaf88a0772008-02-17 16:21:13 +000080 elif o in ('-n', '--nobackup'):
81 makebackup = False
Skip Montanaro165163f2004-03-27 18:43:56 +000082 elif o in ('-v', '--verbose'):
Tim Petersad147202000-10-05 03:48:38 +000083 verbose += 1
Skip Montanaro165163f2004-03-27 18:43:56 +000084 elif o in ('-h', '--help'):
85 usage()
86 return
Tim Petersad147202000-10-05 03:48:38 +000087 if not args:
Skip Montanaro9a29e7a2002-03-26 11:39:26 +000088 r = Reindenter(sys.stdin)
89 r.run()
90 r.write(sys.stdout)
Tim Petersad147202000-10-05 03:48:38 +000091 return
92 for arg in args:
93 check(arg)
94
95def check(file):
96 if os.path.isdir(file) and not os.path.islink(file):
97 if verbose:
98 print "listing directory", file
99 names = os.listdir(file)
100 for name in names:
101 fullname = os.path.join(file, name)
102 if ((recurse and os.path.isdir(fullname) and
Benjamin Peterson501447d2008-10-15 20:34:09 +0000103 not os.path.islink(fullname) and
104 not os.path.split(fullname)[1].startswith("."))
Tim Petersad147202000-10-05 03:48:38 +0000105 or name.lower().endswith(".py")):
106 check(fullname)
107 return
108
109 if verbose:
110 print "checking", file, "...",
111 try:
Serhiy Storchaka17cec702018-02-12 20:16:42 +0200112 f = open(file, "rb")
Tim Petersad147202000-10-05 03:48:38 +0000113 except IOError, msg:
114 errprint("%s: I/O Error: %s" % (file, str(msg)))
115 return
116
117 r = Reindenter(f)
118 f.close()
Jason R. Coombsfee77452011-07-27 14:05:37 -0400119
120 newline = r.newlines
121 if isinstance(newline, tuple):
122 errprint("%s: mixed newlines detected; cannot process file" % file)
123 return
124
Tim Petersad147202000-10-05 03:48:38 +0000125 if r.run():
126 if verbose:
127 print "changed."
128 if dryrun:
129 print "But this is a dry run, so leaving it alone."
130 if not dryrun:
131 bak = file + ".bak"
Facundo Batistaf88a0772008-02-17 16:21:13 +0000132 if makebackup:
133 shutil.copyfile(file, bak)
134 if verbose:
135 print "backed up", file, "to", bak
Serhiy Storchaka17cec702018-02-12 20:16:42 +0200136 f = open(file, "wb")
Tim Petersad147202000-10-05 03:48:38 +0000137 r.write(f)
138 f.close()
139 if verbose:
140 print "wrote new", file
Brett Cannona8b09fd2008-03-18 17:25:13 +0000141 return True
Tim Petersad147202000-10-05 03:48:38 +0000142 else:
143 if verbose:
144 print "unchanged."
Brett Cannona8b09fd2008-03-18 17:25:13 +0000145 return False
Tim Petersad147202000-10-05 03:48:38 +0000146
Serhiy Storchaka17cec702018-02-12 20:16:42 +0200147def _detect_newlines(lines):
148 newlines = {'\r\n' if line[-2:] == '\r\n' else
149 '\n' if line[-1:] == '\n' else
150 '\r' if line[-1:] == '\r' else
151 ''
152 for line in lines}
153 newlines.discard('')
154 newlines = tuple(sorted(newlines))
155 if not newlines:
156 return '\n'
157 if len(newlines) == 1:
158 return newlines[0]
159 return newlines
160
161def _rstrip(line, JUNK='\r\n \t'):
Tim Petersba001a02001-10-04 19:44:10 +0000162 """Return line stripped of trailing spaces, tabs, newlines.
163
164 Note that line.rstrip() instead also strips sundry control characters,
165 but at least one known Emacs user expects to keep junk like that, not
166 mentioning Barry by name or anything <wink>.
167 """
168
169 i = len(line)
170 while i > 0 and line[i-1] in JUNK:
171 i -= 1
172 return line[:i]
173
Tim Petersad147202000-10-05 03:48:38 +0000174class Reindenter:
175
176 def __init__(self, f):
177 self.find_stmt = 1 # next token begins a fresh stmt?
178 self.level = 0 # current indent level
179
180 # Raw file lines.
181 self.raw = f.readlines()
182
Serhiy Storchaka17cec702018-02-12 20:16:42 +0200183 # Save the newlines found in the file so they can be used to
184 # create output without mutating the newlines.
185 self.newlines = _detect_newlines(self.raw)
186 if isinstance(self.newlines, tuple):
187 self.newline = self.newlines[0]
188 else:
189 self.newline = self.newlines
190
Tim Petersad147202000-10-05 03:48:38 +0000191 # File lines, rstripped & tab-expanded. Dummy at start is so
192 # that we can use tokenize's 1-based line numbering easily.
Serhiy Storchaka17cec702018-02-12 20:16:42 +0200193 # Note that a line is all-blank iff it's newline.
194 self.lines = [_rstrip(line).expandtabs() + self.newline
Tim Petersad147202000-10-05 03:48:38 +0000195 for line in self.raw]
196 self.lines.insert(0, None)
197 self.index = 1 # index into self.lines of next line
198
199 # List of (lineno, indentlevel) pairs, one for each stmt and
200 # comment line. indentlevel is -1 for comment lines, as a
201 # signal that tokenize doesn't know what to do about them;
202 # indeed, they're our headache!
203 self.stats = []
204
205 def run(self):
206 tokenize.tokenize(self.getline, self.tokeneater)
207 # Remove trailing empty lines.
208 lines = self.lines
Serhiy Storchaka17cec702018-02-12 20:16:42 +0200209 while lines and lines[-1] == self.newline:
Tim Petersad147202000-10-05 03:48:38 +0000210 lines.pop()
211 # Sentinel.
212 stats = self.stats
213 stats.append((len(lines), 0))
214 # Map count of leading spaces to # we want.
215 have2want = {}
216 # Program after transformation.
217 after = self.after = []
Tim Peters54e5b892002-02-17 07:03:05 +0000218 # Copy over initial empty lines -- there's nothing to do until
219 # we see a line with *something* on it.
220 i = stats[0][0]
221 after.extend(lines[1:i])
Tim Petersad147202000-10-05 03:48:38 +0000222 for i in range(len(stats)-1):
223 thisstmt, thislevel = stats[i]
224 nextstmt = stats[i+1][0]
225 have = getlspace(lines[thisstmt])
226 want = thislevel * 4
227 if want < 0:
228 # A comment line.
229 if have:
230 # An indented comment line. If we saw the same
231 # indentation before, reuse what it most recently
232 # mapped to.
233 want = have2want.get(have, -1)
234 if want < 0:
235 # Then it probably belongs to the next real stmt.
236 for j in xrange(i+1, len(stats)-1):
237 jline, jlevel = stats[j]
238 if jlevel >= 0:
239 if have == getlspace(lines[jline]):
240 want = jlevel * 4
241 break
242 if want < 0: # Maybe it's a hanging
243 # comment like this one,
244 # in which case we should shift it like its base
245 # line got shifted.
246 for j in xrange(i-1, -1, -1):
247 jline, jlevel = stats[j]
248 if jlevel >= 0:
249 want = have + getlspace(after[jline-1]) - \
250 getlspace(lines[jline])
251 break
252 if want < 0:
253 # Still no luck -- leave it alone.
254 want = have
255 else:
256 want = 0
257 assert want >= 0
258 have2want[have] = want
259 diff = want - have
260 if diff == 0 or have == 0:
261 after.extend(lines[thisstmt:nextstmt])
262 else:
263 for line in lines[thisstmt:nextstmt]:
264 if diff > 0:
Serhiy Storchaka17cec702018-02-12 20:16:42 +0200265 if line == self.newline:
Tim Petersad147202000-10-05 03:48:38 +0000266 after.append(line)
267 else:
268 after.append(" " * diff + line)
269 else:
270 remove = min(getlspace(line), -diff)
271 after.append(line[remove:])
272 return self.raw != self.after
273
274 def write(self, f):
275 f.writelines(self.after)
276
277 # Line-getter for tokenize.
278 def getline(self):
279 if self.index >= len(self.lines):
280 line = ""
281 else:
282 line = self.lines[self.index]
283 self.index += 1
284 return line
285
286 # Line-eater for tokenize.
287 def tokeneater(self, type, token, (sline, scol), end, line,
288 INDENT=tokenize.INDENT,
289 DEDENT=tokenize.DEDENT,
290 NEWLINE=tokenize.NEWLINE,
291 COMMENT=tokenize.COMMENT,
292 NL=tokenize.NL):
293
294 if type == NEWLINE:
295 # A program statement, or ENDMARKER, will eventually follow,
296 # after some (possibly empty) run of tokens of the form
297 # (NL | COMMENT)* (INDENT | DEDENT+)?
298 self.find_stmt = 1
299
300 elif type == INDENT:
301 self.find_stmt = 1
302 self.level += 1
303
304 elif type == DEDENT:
305 self.find_stmt = 1
306 self.level -= 1
307
308 elif type == COMMENT:
309 if self.find_stmt:
310 self.stats.append((sline, -1))
311 # but we're still looking for a new stmt, so leave
312 # find_stmt alone
313
314 elif type == NL:
315 pass
316
317 elif self.find_stmt:
318 # This is the first "real token" following a NEWLINE, so it
319 # must be the first token of the next program statement, or an
320 # ENDMARKER.
321 self.find_stmt = 0
322 if line: # not endmarker
323 self.stats.append((sline, self.level))
324
325# Count number of leading blanks.
326def getlspace(line):
327 i, n = 0, len(line)
328 while i < n and line[i] == " ":
329 i += 1
330 return i
331
332if __name__ == '__main__':
333 main()