blob: 7973f26f98b8b245de3569580ca57e11736be7e9 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00002
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +00003"""The Tab Nanny despises ambiguous indentation. She knows no mercy.
4
Tim Peters8ac14952002-05-23 15:15:30 +00005tabnanny -- Detection of ambiguous indentation
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +00006
7For the time being this module is intended to be called as a script.
8However it is possible to import it into an IDE and use the function
Tim Peters8ac14952002-05-23 15:15:30 +00009check() described below.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000010
11Warning: The API provided by this module is likely to change in future
Tim Peters8ac14952002-05-23 15:15:30 +000012releases; such changes may not be backward compatible.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000013"""
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000014
Guido van Rossumaa2a7a41998-06-09 19:02:21 +000015# Released to the public domain, by Tim Peters, 15 April 1998.
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000016
Guido van Rossumdc688332000-02-23 15:32:19 +000017# XXX Note: this is now a standard library module.
18# XXX The API needs to undergo changes however; the current code is too
19# XXX script-like. This will be addressed later.
20
Guido van Rossuma74c5561999-07-30 17:48:20 +000021__version__ = "6"
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000022
23import os
24import sys
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000025import tokenize
Tim Peters4efb6e92001-06-29 23:51:08 +000026if not hasattr(tokenize, 'NL'):
27 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000028
Guido van Rossumc5943b12001-08-07 17:19:25 +000029__all__ = ["check", "NannyNag", "process_tokens"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000030
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000031verbose = 0
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000032filename_only = 0
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000033
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000034def errprint(*args):
35 sep = ""
36 for arg in args:
37 sys.stderr.write(sep + str(arg))
38 sep = " "
39 sys.stderr.write("\n")
40
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000041def main():
Serhiy Storchaka7e4db2f2017-05-04 08:17:47 +030042 import getopt
43
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000044 global verbose, filename_only
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000045 try:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000046 opts, args = getopt.getopt(sys.argv[1:], "qv")
Guido van Rossumb940e112007-01-10 16:19:56 +000047 except getopt.error as msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000048 errprint(msg)
Guido van Rossum8053d891998-04-06 14:45:26 +000049 return
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000050 for o, a in opts:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000051 if o == '-q':
52 filename_only = filename_only + 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000053 if o == '-v':
54 verbose = verbose + 1
Guido van Rossum8053d891998-04-06 14:45:26 +000055 if not args:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000056 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
Guido van Rossum8053d891998-04-06 14:45:26 +000057 return
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000058 for arg in args:
59 check(arg)
60
Neal Norwitzf74e46c2002-03-31 13:59:18 +000061class NannyNag(Exception):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000062 """
Jelle Zijlstra75b6cf82017-03-21 23:53:57 -070063 Raised by process_tokens() if detecting an ambiguous indent.
Tim Peters8ac14952002-05-23 15:15:30 +000064 Captured and handled in check().
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000065 """
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000066 def __init__(self, lineno, msg, line):
67 self.lineno, self.msg, self.line = lineno, msg, line
68 def get_lineno(self):
69 return self.lineno
70 def get_msg(self):
71 return self.msg
72 def get_line(self):
73 return self.line
74
75def check(file):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000076 """check(file_or_dir)
Tim Peters8ac14952002-05-23 15:15:30 +000077
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000078 If file_or_dir is a directory and not a symbolic link, then recursively
79 descend the directory tree named by file_or_dir, checking all .py files
80 along the way. If file_or_dir is an ordinary Python source file, it is
81 checked for whitespace related problems. The diagnostic messages are
Tim Peters8ac14952002-05-23 15:15:30 +000082 written to standard output using the print statement.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000083 """
Tim Peters8ac14952002-05-23 15:15:30 +000084
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000085 if os.path.isdir(file) and not os.path.islink(file):
86 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +000087 print("%r: listing directory" % (file,))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000088 names = os.listdir(file)
89 for name in names:
90 fullname = os.path.join(file, name)
91 if (os.path.isdir(fullname) and
92 not os.path.islink(fullname) or
93 os.path.normcase(name[-3:]) == ".py"):
94 check(fullname)
95 return
96
97 try:
Victor Stinner58c07522010-11-09 01:08:59 +000098 f = tokenize.open(file)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020099 except OSError as msg:
Walter Dörwald70a6b492004-02-12 17:35:32 +0000100 errprint("%r: I/O Error: %s" % (file, msg))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000101 return
102
103 if verbose > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000104 print("checking %r ..." % file)
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000105
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000106 try:
Tim Peters5ca576e2001-06-18 22:08:13 +0000107 process_tokens(tokenize.generate_tokens(f.readline))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000108
Guido van Rossumb940e112007-01-10 16:19:56 +0000109 except tokenize.TokenError as msg:
Walter Dörwald70a6b492004-02-12 17:35:32 +0000110 errprint("%r: Token Error: %s" % (file, msg))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000111 return
112
Guido van Rossumb940e112007-01-10 16:19:56 +0000113 except IndentationError as msg:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000114 errprint("%r: Indentation Error: %s" % (file, msg))
115 return
116
Guido van Rossumb940e112007-01-10 16:19:56 +0000117 except NannyNag as nag:
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000118 badline = nag.get_lineno()
119 line = nag.get_line()
120 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000121 print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
122 print("offending line: %r" % (line,))
123 print(nag.get_msg())
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000124 else:
Guido van Rossuma74c5561999-07-30 17:48:20 +0000125 if ' ' in file: file = '"' + file + '"'
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000126 if filename_only: print(file)
127 else: print(file, badline, repr(line))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000128 return
129
Ezio Melotti103f17e2012-11-16 13:17:08 +0200130 finally:
131 f.close()
132
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000133 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000134 print("%r: Clean bill of health." % (file,))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000135
136class Whitespace:
137 # the characters used for space and tab
138 S, T = ' \t'
139
140 # members:
141 # raw
142 # the original string
143 # n
144 # the number of leading whitespace characters in raw
145 # nt
146 # the number of tabs in raw[:n]
147 # norm
148 # the normal form as a pair (count, trailing), where:
149 # count
150 # a tuple such that raw[:n] contains count[i]
151 # instances of S * i + T
152 # trailing
153 # the number of trailing spaces in raw[:n]
154 # It's A Theorem that m.indent_level(t) ==
155 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
156 # is_simple
157 # true iff raw[:n] is of the form (T*)(S*)
158
159 def __init__(self, ws):
160 self.raw = ws
161 S, T = Whitespace.S, Whitespace.T
162 count = []
163 b = n = nt = 0
164 for ch in self.raw:
165 if ch == S:
166 n = n + 1
167 b = b + 1
168 elif ch == T:
169 n = n + 1
170 nt = nt + 1
171 if b >= len(count):
172 count = count + [0] * (b - len(count) + 1)
173 count[b] = count[b] + 1
174 b = 0
175 else:
176 break
177 self.n = n
178 self.nt = nt
179 self.norm = tuple(count), b
180 self.is_simple = len(count) <= 1
181
182 # return length of longest contiguous run of spaces (whether or not
183 # preceding a tab)
184 def longest_run_of_spaces(self):
185 count, trailing = self.norm
186 return max(len(count)-1, trailing)
187
188 def indent_level(self, tabsize):
189 # count, il = self.norm
190 # for i in range(len(count)):
191 # if count[i]:
Ezio Melotti103f17e2012-11-16 13:17:08 +0200192 # il = il + (i//tabsize + 1)*tabsize * count[i]
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000193 # return il
194
195 # quicker:
Ezio Melotti103f17e2012-11-16 13:17:08 +0200196 # il = trailing + sum (i//ts + 1)*ts*count[i] =
197 # trailing + ts * sum (i//ts + 1)*count[i] =
198 # trailing + ts * sum i//ts*count[i] + count[i] =
199 # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
200 # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
201 # and note that i//ts*count[i] is 0 when i < ts
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000202
203 count, trailing = self.norm
204 il = 0
205 for i in range(tabsize, len(count)):
Ezio Melotti103f17e2012-11-16 13:17:08 +0200206 il = il + i//tabsize * count[i]
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000207 return trailing + tabsize * (il + self.nt)
208
209 # return true iff self.indent_level(t) == other.indent_level(t)
210 # for all t >= 1
211 def equal(self, other):
212 return self.norm == other.norm
213
214 # return a list of tuples (ts, i1, i2) such that
215 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
216 # Intended to be used after not self.equal(other) is known, in which
217 # case it will return at least one witnessing tab size.
218 def not_equal_witness(self, other):
219 n = max(self.longest_run_of_spaces(),
220 other.longest_run_of_spaces()) + 1
221 a = []
222 for ts in range(1, n+1):
223 if self.indent_level(ts) != other.indent_level(ts):
224 a.append( (ts,
225 self.indent_level(ts),
226 other.indent_level(ts)) )
227 return a
228
Tim Petersbc0e9102002-04-04 22:55:58 +0000229 # Return True iff self.indent_level(t) < other.indent_level(t)
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000230 # for all t >= 1.
231 # The algorithm is due to Vincent Broman.
232 # Easy to prove it's correct.
233 # XXXpost that.
234 # Trivial to prove n is sharp (consider T vs ST).
235 # Unknown whether there's a faster general way. I suspected so at
236 # first, but no longer.
237 # For the special (but common!) case where M and N are both of the
238 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
239 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
240 # XXXwrite that up.
241 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
242 def less(self, other):
243 if self.n >= other.n:
Tim Petersbc0e9102002-04-04 22:55:58 +0000244 return False
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000245 if self.is_simple and other.is_simple:
246 return self.nt <= other.nt
247 n = max(self.longest_run_of_spaces(),
248 other.longest_run_of_spaces()) + 1
249 # the self.n >= other.n test already did it for ts=1
250 for ts in range(2, n+1):
251 if self.indent_level(ts) >= other.indent_level(ts):
Tim Petersbc0e9102002-04-04 22:55:58 +0000252 return False
253 return True
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000254
255 # return a list of tuples (ts, i1, i2) such that
256 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
257 # Intended to be used after not self.less(other) is known, in which
258 # case it will return at least one witnessing tab size.
259 def not_less_witness(self, other):
260 n = max(self.longest_run_of_spaces(),
261 other.longest_run_of_spaces()) + 1
262 a = []
263 for ts in range(1, n+1):
264 if self.indent_level(ts) >= other.indent_level(ts):
265 a.append( (ts,
266 self.indent_level(ts),
267 other.indent_level(ts)) )
268 return a
269
270def format_witnesses(w):
Georg Brandlcbd2ab12010-12-04 10:39:14 +0000271 firsts = (str(tup[0]) for tup in w)
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000272 prefix = "at tab size"
273 if len(w) > 1:
274 prefix = prefix + "s"
Neal Norwitz7ce734c2002-05-31 14:13:04 +0000275 return prefix + " " + ', '.join(firsts)
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000276
Tim Peters4efb6e92001-06-29 23:51:08 +0000277def process_tokens(tokens):
278 INDENT = tokenize.INDENT
279 DEDENT = tokenize.DEDENT
280 NEWLINE = tokenize.NEWLINE
281 JUNK = tokenize.COMMENT, tokenize.NL
Tim Peters5ca576e2001-06-18 22:08:13 +0000282 indents = [Whitespace("")]
283 check_equal = 0
284
285 for (type, token, start, end, line) in tokens:
Tim Petersb90f89a2001-01-15 03:26:36 +0000286 if type == NEWLINE:
287 # a program statement, or ENDMARKER, will eventually follow,
288 # after some (possibly empty) run of tokens of the form
289 # (NL | COMMENT)* (INDENT | DEDENT+)?
290 # If an INDENT appears, setting check_equal is wrong, and will
291 # be undone when we see the INDENT.
292 check_equal = 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000293
Tim Petersb90f89a2001-01-15 03:26:36 +0000294 elif type == INDENT:
295 check_equal = 0
296 thisguy = Whitespace(token)
297 if not indents[-1].less(thisguy):
298 witness = indents[-1].not_less_witness(thisguy)
299 msg = "indent not greater e.g. " + format_witnesses(witness)
300 raise NannyNag(start[0], msg, line)
301 indents.append(thisguy)
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000302
Tim Petersb90f89a2001-01-15 03:26:36 +0000303 elif type == DEDENT:
304 # there's nothing we need to check here! what's important is
305 # that when the run of DEDENTs ends, the indentation of the
306 # program statement (or ENDMARKER) that triggered the run is
307 # equal to what's left at the top of the indents stack
Guido van Rossumaa2a7a41998-06-09 19:02:21 +0000308
Tim Petersb90f89a2001-01-15 03:26:36 +0000309 # Ouch! This assert triggers if the last line of the source
310 # is indented *and* lacks a newline -- then DEDENTs pop out
311 # of thin air.
312 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
313 check_equal = 1
Guido van Rossumaa2a7a41998-06-09 19:02:21 +0000314
Tim Petersb90f89a2001-01-15 03:26:36 +0000315 del indents[-1]
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000316
Tim Petersb90f89a2001-01-15 03:26:36 +0000317 elif check_equal and type not in JUNK:
318 # this is the first "real token" following a NEWLINE, so it
319 # must be the first token of the next program statement, or an
320 # ENDMARKER; the "line" argument exposes the leading whitespace
321 # for this statement; in the case of ENDMARKER, line is an empty
322 # string, so will properly match the empty string with which the
323 # "indents" stack was seeded
324 check_equal = 0
325 thisguy = Whitespace(line)
326 if not indents[-1].equal(thisguy):
327 witness = indents[-1].not_equal_witness(thisguy)
328 msg = "indent not equal e.g. " + format_witnesses(witness)
329 raise NannyNag(start[0], msg, line)
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000330
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000331
332if __name__ == '__main__':
333 main()