blob: 5b10474f8634dc6b7edc715345923617b23938f0 [file] [log] [blame]
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00001#! /usr/bin/env python
2
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +00003"""The Tab Nanny despises ambiguous indentation. She knows no mercy.
4
5tabnanny -- Detection of ambiguous indentation
6
7For the time being this module is intended to be called as a script.
8However it is possible to import it into an IDE and use the function
9check() described below.
10
11Warning: The API provided by this module is likely to change in future
12releases; such changes may not be backward compatible.
13"""
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000014
Guido van Rossumaa2a7a41998-06-09 19:02:21 +000015# Released to the public domain, by Tim Peters, 15 April 1998.
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000016
Guido van Rossumdc688332000-02-23 15:32:19 +000017# XXX Note: this is now a standard library module.
18# XXX The API needs to undergo changes however; the current code is too
19# XXX script-like. This will be addressed later.
20
Guido van Rossuma74c5561999-07-30 17:48:20 +000021__version__ = "6"
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000022
23import os
24import sys
25import getopt
26import tokenize
Tim Peters4efb6e92001-06-29 23:51:08 +000027if not hasattr(tokenize, 'NL'):
28 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000029
Guido van Rossumc5943b12001-08-07 17:19:25 +000030__all__ = ["check", "NannyNag", "process_tokens"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000031
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000032verbose = 0
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000033filename_only = 0
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000034
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000035def errprint(*args):
36 sep = ""
37 for arg in args:
38 sys.stderr.write(sep + str(arg))
39 sep = " "
40 sys.stderr.write("\n")
41
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000042def main():
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000043 global verbose, filename_only
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000044 try:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000045 opts, args = getopt.getopt(sys.argv[1:], "qv")
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000046 except getopt.error, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000047 errprint(msg)
Guido van Rossum8053d891998-04-06 14:45:26 +000048 return
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000049 for o, a in opts:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000050 if o == '-q':
51 filename_only = filename_only + 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000052 if o == '-v':
53 verbose = verbose + 1
Guido van Rossum8053d891998-04-06 14:45:26 +000054 if not args:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000055 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
Guido van Rossum8053d891998-04-06 14:45:26 +000056 return
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000057 for arg in args:
58 check(arg)
59
Neal Norwitzf74e46c2002-03-31 13:59:18 +000060class NannyNag(Exception):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000061 """
62 Raised by tokeneater() if detecting an ambiguous indent.
63 Captured and handled in check().
64 """
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000065 def __init__(self, lineno, msg, line):
66 self.lineno, self.msg, self.line = lineno, msg, line
67 def get_lineno(self):
68 return self.lineno
69 def get_msg(self):
70 return self.msg
71 def get_line(self):
72 return self.line
73
74def check(file):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000075 """check(file_or_dir)
76
77 If file_or_dir is a directory and not a symbolic link, then recursively
78 descend the directory tree named by file_or_dir, checking all .py files
79 along the way. If file_or_dir is an ordinary Python source file, it is
80 checked for whitespace related problems. The diagnostic messages are
81 written to standard output using the print statement.
82 """
83
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000084 if os.path.isdir(file) and not os.path.islink(file):
85 if verbose:
86 print "%s: listing directory" % `file`
87 names = os.listdir(file)
88 for name in names:
89 fullname = os.path.join(file, name)
90 if (os.path.isdir(fullname) and
91 not os.path.islink(fullname) or
92 os.path.normcase(name[-3:]) == ".py"):
93 check(fullname)
94 return
95
96 try:
97 f = open(file)
98 except IOError, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000099 errprint("%s: I/O Error: %s" % (`file`, str(msg)))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000100 return
101
102 if verbose > 1:
103 print "checking", `file`, "..."
104
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000105 try:
Tim Peters5ca576e2001-06-18 22:08:13 +0000106 process_tokens(tokenize.generate_tokens(f.readline))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000107
108 except tokenize.TokenError, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +0000109 errprint("%s: Token Error: %s" % (`file`, str(msg)))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000110 return
111
112 except NannyNag, nag:
113 badline = nag.get_lineno()
114 line = nag.get_line()
115 if verbose:
116 print "%s: *** Line %d: trouble in tab city! ***" % (
117 `file`, badline)
118 print "offending line:", `line`
119 print nag.get_msg()
120 else:
Guido van Rossuma74c5561999-07-30 17:48:20 +0000121 if ' ' in file: file = '"' + file + '"'
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +0000122 if filename_only: print file
123 else: print file, badline, `line`
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000124 return
125
126 if verbose:
127 print "%s: Clean bill of health." % `file`
128
129class Whitespace:
130 # the characters used for space and tab
131 S, T = ' \t'
132
133 # members:
134 # raw
135 # the original string
136 # n
137 # the number of leading whitespace characters in raw
138 # nt
139 # the number of tabs in raw[:n]
140 # norm
141 # the normal form as a pair (count, trailing), where:
142 # count
143 # a tuple such that raw[:n] contains count[i]
144 # instances of S * i + T
145 # trailing
146 # the number of trailing spaces in raw[:n]
147 # It's A Theorem that m.indent_level(t) ==
148 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
149 # is_simple
150 # true iff raw[:n] is of the form (T*)(S*)
151
152 def __init__(self, ws):
153 self.raw = ws
154 S, T = Whitespace.S, Whitespace.T
155 count = []
156 b = n = nt = 0
157 for ch in self.raw:
158 if ch == S:
159 n = n + 1
160 b = b + 1
161 elif ch == T:
162 n = n + 1
163 nt = nt + 1
164 if b >= len(count):
165 count = count + [0] * (b - len(count) + 1)
166 count[b] = count[b] + 1
167 b = 0
168 else:
169 break
170 self.n = n
171 self.nt = nt
172 self.norm = tuple(count), b
173 self.is_simple = len(count) <= 1
174
175 # return length of longest contiguous run of spaces (whether or not
176 # preceding a tab)
177 def longest_run_of_spaces(self):
178 count, trailing = self.norm
179 return max(len(count)-1, trailing)
180
181 def indent_level(self, tabsize):
182 # count, il = self.norm
183 # for i in range(len(count)):
184 # if count[i]:
185 # il = il + (i/tabsize + 1)*tabsize * count[i]
186 # return il
187
188 # quicker:
189 # il = trailing + sum (i/ts + 1)*ts*count[i] =
190 # trailing + ts * sum (i/ts + 1)*count[i] =
191 # trailing + ts * sum i/ts*count[i] + count[i] =
192 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
193 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
194 # and note that i/ts*count[i] is 0 when i < ts
195
196 count, trailing = self.norm
197 il = 0
198 for i in range(tabsize, len(count)):
199 il = il + i/tabsize * count[i]
200 return trailing + tabsize * (il + self.nt)
201
202 # return true iff self.indent_level(t) == other.indent_level(t)
203 # for all t >= 1
204 def equal(self, other):
205 return self.norm == other.norm
206
207 # return a list of tuples (ts, i1, i2) such that
208 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
209 # Intended to be used after not self.equal(other) is known, in which
210 # case it will return at least one witnessing tab size.
211 def not_equal_witness(self, other):
212 n = max(self.longest_run_of_spaces(),
213 other.longest_run_of_spaces()) + 1
214 a = []
215 for ts in range(1, n+1):
216 if self.indent_level(ts) != other.indent_level(ts):
217 a.append( (ts,
218 self.indent_level(ts),
219 other.indent_level(ts)) )
220 return a
221
Tim Petersbc0e9102002-04-04 22:55:58 +0000222 # Return True iff self.indent_level(t) < other.indent_level(t)
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000223 # for all t >= 1.
224 # The algorithm is due to Vincent Broman.
225 # Easy to prove it's correct.
226 # XXXpost that.
227 # Trivial to prove n is sharp (consider T vs ST).
228 # Unknown whether there's a faster general way. I suspected so at
229 # first, but no longer.
230 # For the special (but common!) case where M and N are both of the
231 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
232 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
233 # XXXwrite that up.
234 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
235 def less(self, other):
236 if self.n >= other.n:
Tim Petersbc0e9102002-04-04 22:55:58 +0000237 return False
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000238 if self.is_simple and other.is_simple:
239 return self.nt <= other.nt
240 n = max(self.longest_run_of_spaces(),
241 other.longest_run_of_spaces()) + 1
242 # the self.n >= other.n test already did it for ts=1
243 for ts in range(2, n+1):
244 if self.indent_level(ts) >= other.indent_level(ts):
Tim Petersbc0e9102002-04-04 22:55:58 +0000245 return False
246 return True
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000247
248 # return a list of tuples (ts, i1, i2) such that
249 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
250 # Intended to be used after not self.less(other) is known, in which
251 # case it will return at least one witnessing tab size.
252 def not_less_witness(self, other):
253 n = max(self.longest_run_of_spaces(),
254 other.longest_run_of_spaces()) + 1
255 a = []
256 for ts in range(1, n+1):
257 if self.indent_level(ts) >= other.indent_level(ts):
258 a.append( (ts,
259 self.indent_level(ts),
260 other.indent_level(ts)) )
261 return a
262
263def format_witnesses(w):
264 import string
265 firsts = map(lambda tup: str(tup[0]), w)
266 prefix = "at tab size"
267 if len(w) > 1:
268 prefix = prefix + "s"
269 return prefix + " " + string.join(firsts, ', ')
270
Tim Peters4efb6e92001-06-29 23:51:08 +0000271def process_tokens(tokens):
272 INDENT = tokenize.INDENT
273 DEDENT = tokenize.DEDENT
274 NEWLINE = tokenize.NEWLINE
275 JUNK = tokenize.COMMENT, tokenize.NL
Tim Peters5ca576e2001-06-18 22:08:13 +0000276 indents = [Whitespace("")]
277 check_equal = 0
278
279 for (type, token, start, end, line) in tokens:
Tim Petersb90f89a2001-01-15 03:26:36 +0000280 if type == NEWLINE:
281 # a program statement, or ENDMARKER, will eventually follow,
282 # after some (possibly empty) run of tokens of the form
283 # (NL | COMMENT)* (INDENT | DEDENT+)?
284 # If an INDENT appears, setting check_equal is wrong, and will
285 # be undone when we see the INDENT.
286 check_equal = 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000287
Tim Petersb90f89a2001-01-15 03:26:36 +0000288 elif type == INDENT:
289 check_equal = 0
290 thisguy = Whitespace(token)
291 if not indents[-1].less(thisguy):
292 witness = indents[-1].not_less_witness(thisguy)
293 msg = "indent not greater e.g. " + format_witnesses(witness)
294 raise NannyNag(start[0], msg, line)
295 indents.append(thisguy)
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000296
Tim Petersb90f89a2001-01-15 03:26:36 +0000297 elif type == DEDENT:
298 # there's nothing we need to check here! what's important is
299 # that when the run of DEDENTs ends, the indentation of the
300 # program statement (or ENDMARKER) that triggered the run is
301 # equal to what's left at the top of the indents stack
Guido van Rossumaa2a7a41998-06-09 19:02:21 +0000302
Tim Petersb90f89a2001-01-15 03:26:36 +0000303 # Ouch! This assert triggers if the last line of the source
304 # is indented *and* lacks a newline -- then DEDENTs pop out
305 # of thin air.
306 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
307 check_equal = 1
Guido van Rossumaa2a7a41998-06-09 19:02:21 +0000308
Tim Petersb90f89a2001-01-15 03:26:36 +0000309 del indents[-1]
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000310
Tim Petersb90f89a2001-01-15 03:26:36 +0000311 elif check_equal and type not in JUNK:
312 # this is the first "real token" following a NEWLINE, so it
313 # must be the first token of the next program statement, or an
314 # ENDMARKER; the "line" argument exposes the leading whitespace
315 # for this statement; in the case of ENDMARKER, line is an empty
316 # string, so will properly match the empty string with which the
317 # "indents" stack was seeded
318 check_equal = 0
319 thisguy = Whitespace(line)
320 if not indents[-1].equal(thisguy):
321 witness = indents[-1].not_equal_witness(thisguy)
322 msg = "indent not equal e.g. " + format_witnesses(witness)
323 raise NannyNag(start[0], msg, line)
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000324
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000325
326if __name__ == '__main__':
327 main()