blob: c5adfb50085b10c712091c90262e846fd0da9910 [file] [log] [blame]
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00001#! /usr/bin/env python
2
Guido van Rossumf4b44fa1998-04-06 14:41:20 +00003"""The Tab Nanny despises ambiguous indentation. She knows no mercy."""
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00004
Guido van Rossumaa2a7a41998-06-09 19:02:21 +00005# Released to the public domain, by Tim Peters, 15 April 1998.
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00006
Guido van Rossuma74c5561999-07-30 17:48:20 +00007__version__ = "6"
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00008
9import os
10import sys
Guido van Rossuma74c5561999-07-30 17:48:20 +000011import string
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000012import getopt
13import tokenize
14
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000015verbose = 0
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000016filename_only = 0
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000017
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000018def errprint(*args):
19 sep = ""
20 for arg in args:
21 sys.stderr.write(sep + str(arg))
22 sep = " "
23 sys.stderr.write("\n")
24
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000025def main():
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000026 global verbose, filename_only
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000027 try:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000028 opts, args = getopt.getopt(sys.argv[1:], "qv")
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000029 except getopt.error, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000030 errprint(msg)
Guido van Rossum8053d891998-04-06 14:45:26 +000031 return
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000032 for o, a in opts:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000033 if o == '-q':
34 filename_only = filename_only + 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000035 if o == '-v':
36 verbose = verbose + 1
Guido van Rossum8053d891998-04-06 14:45:26 +000037 if not args:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000038 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
Guido van Rossum8053d891998-04-06 14:45:26 +000039 return
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000040 for arg in args:
41 check(arg)
42
43class NannyNag:
44 def __init__(self, lineno, msg, line):
45 self.lineno, self.msg, self.line = lineno, msg, line
46 def get_lineno(self):
47 return self.lineno
48 def get_msg(self):
49 return self.msg
50 def get_line(self):
51 return self.line
52
53def check(file):
54 if os.path.isdir(file) and not os.path.islink(file):
55 if verbose:
56 print "%s: listing directory" % `file`
57 names = os.listdir(file)
58 for name in names:
59 fullname = os.path.join(file, name)
60 if (os.path.isdir(fullname) and
61 not os.path.islink(fullname) or
62 os.path.normcase(name[-3:]) == ".py"):
63 check(fullname)
64 return
65
66 try:
67 f = open(file)
68 except IOError, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000069 errprint("%s: I/O Error: %s" % (`file`, str(msg)))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000070 return
71
72 if verbose > 1:
73 print "checking", `file`, "..."
74
75 reset_globals()
76 try:
77 tokenize.tokenize(f.readline, tokeneater)
78
79 except tokenize.TokenError, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000080 errprint("%s: Token Error: %s" % (`file`, str(msg)))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000081 return
82
83 except NannyNag, nag:
84 badline = nag.get_lineno()
85 line = nag.get_line()
86 if verbose:
87 print "%s: *** Line %d: trouble in tab city! ***" % (
88 `file`, badline)
89 print "offending line:", `line`
90 print nag.get_msg()
91 else:
Guido van Rossuma74c5561999-07-30 17:48:20 +000092 if ' ' in file: file = '"' + file + '"'
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000093 if filename_only: print file
94 else: print file, badline, `line`
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000095 return
96
97 if verbose:
98 print "%s: Clean bill of health." % `file`
99
100class Whitespace:
101 # the characters used for space and tab
102 S, T = ' \t'
103
104 # members:
105 # raw
106 # the original string
107 # n
108 # the number of leading whitespace characters in raw
109 # nt
110 # the number of tabs in raw[:n]
111 # norm
112 # the normal form as a pair (count, trailing), where:
113 # count
114 # a tuple such that raw[:n] contains count[i]
115 # instances of S * i + T
116 # trailing
117 # the number of trailing spaces in raw[:n]
118 # It's A Theorem that m.indent_level(t) ==
119 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
120 # is_simple
121 # true iff raw[:n] is of the form (T*)(S*)
122
123 def __init__(self, ws):
124 self.raw = ws
125 S, T = Whitespace.S, Whitespace.T
126 count = []
127 b = n = nt = 0
128 for ch in self.raw:
129 if ch == S:
130 n = n + 1
131 b = b + 1
132 elif ch == T:
133 n = n + 1
134 nt = nt + 1
135 if b >= len(count):
136 count = count + [0] * (b - len(count) + 1)
137 count[b] = count[b] + 1
138 b = 0
139 else:
140 break
141 self.n = n
142 self.nt = nt
143 self.norm = tuple(count), b
144 self.is_simple = len(count) <= 1
145
146 # return length of longest contiguous run of spaces (whether or not
147 # preceding a tab)
148 def longest_run_of_spaces(self):
149 count, trailing = self.norm
150 return max(len(count)-1, trailing)
151
152 def indent_level(self, tabsize):
153 # count, il = self.norm
154 # for i in range(len(count)):
155 # if count[i]:
156 # il = il + (i/tabsize + 1)*tabsize * count[i]
157 # return il
158
159 # quicker:
160 # il = trailing + sum (i/ts + 1)*ts*count[i] =
161 # trailing + ts * sum (i/ts + 1)*count[i] =
162 # trailing + ts * sum i/ts*count[i] + count[i] =
163 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
164 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
165 # and note that i/ts*count[i] is 0 when i < ts
166
167 count, trailing = self.norm
168 il = 0
169 for i in range(tabsize, len(count)):
170 il = il + i/tabsize * count[i]
171 return trailing + tabsize * (il + self.nt)
172
173 # return true iff self.indent_level(t) == other.indent_level(t)
174 # for all t >= 1
175 def equal(self, other):
176 return self.norm == other.norm
177
178 # return a list of tuples (ts, i1, i2) such that
179 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
180 # Intended to be used after not self.equal(other) is known, in which
181 # case it will return at least one witnessing tab size.
182 def not_equal_witness(self, other):
183 n = max(self.longest_run_of_spaces(),
184 other.longest_run_of_spaces()) + 1
185 a = []
186 for ts in range(1, n+1):
187 if self.indent_level(ts) != other.indent_level(ts):
188 a.append( (ts,
189 self.indent_level(ts),
190 other.indent_level(ts)) )
191 return a
192
193 # Return true iff self.indent_level(t) < other.indent_level(t)
194 # for all t >= 1.
195 # The algorithm is due to Vincent Broman.
196 # Easy to prove it's correct.
197 # XXXpost that.
198 # Trivial to prove n is sharp (consider T vs ST).
199 # Unknown whether there's a faster general way. I suspected so at
200 # first, but no longer.
201 # For the special (but common!) case where M and N are both of the
202 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
203 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
204 # XXXwrite that up.
205 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
206 def less(self, other):
207 if self.n >= other.n:
208 return 0
209 if self.is_simple and other.is_simple:
210 return self.nt <= other.nt
211 n = max(self.longest_run_of_spaces(),
212 other.longest_run_of_spaces()) + 1
213 # the self.n >= other.n test already did it for ts=1
214 for ts in range(2, n+1):
215 if self.indent_level(ts) >= other.indent_level(ts):
216 return 0
217 return 1
218
219 # return a list of tuples (ts, i1, i2) such that
220 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
221 # Intended to be used after not self.less(other) is known, in which
222 # case it will return at least one witnessing tab size.
223 def not_less_witness(self, other):
224 n = max(self.longest_run_of_spaces(),
225 other.longest_run_of_spaces()) + 1
226 a = []
227 for ts in range(1, n+1):
228 if self.indent_level(ts) >= other.indent_level(ts):
229 a.append( (ts,
230 self.indent_level(ts),
231 other.indent_level(ts)) )
232 return a
233
234def format_witnesses(w):
235 import string
236 firsts = map(lambda tup: str(tup[0]), w)
237 prefix = "at tab size"
238 if len(w) > 1:
239 prefix = prefix + "s"
240 return prefix + " " + string.join(firsts, ', ')
241
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000242# The collection of globals, the reset_globals() function, and the
243# tokeneater() function, depend on which version of tokenize is
244# in use.
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000245
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000246if hasattr(tokenize, 'NL'):
247 # take advantage of Guido's patch!
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000248
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000249 indents = []
250 check_equal = 0
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000251
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000252 def reset_globals():
253 global indents, check_equal
254 check_equal = 0
255 indents = [Whitespace("")]
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000256
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000257 def tokeneater(type, token, start, end, line,
258 INDENT=tokenize.INDENT,
259 DEDENT=tokenize.DEDENT,
260 NEWLINE=tokenize.NEWLINE,
Guido van Rossumce73acf1998-04-10 19:14:59 +0000261 JUNK=(tokenize.COMMENT, tokenize.NL) ):
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000262 global indents, check_equal
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000263
Guido van Rossumce73acf1998-04-10 19:14:59 +0000264 if type == NEWLINE:
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000265 # a program statement, or ENDMARKER, will eventually follow,
266 # after some (possibly empty) run of tokens of the form
267 # (NL | COMMENT)* (INDENT | DEDENT+)?
268 # If an INDENT appears, setting check_equal is wrong, and will
269 # be undone when we see the INDENT.
270 check_equal = 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000271
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000272 elif type == INDENT:
273 check_equal = 0
274 thisguy = Whitespace(token)
275 if not indents[-1].less(thisguy):
276 witness = indents[-1].not_less_witness(thisguy)
277 msg = "indent not greater e.g. " + format_witnesses(witness)
278 raise NannyNag(start[0], msg, line)
279 indents.append(thisguy)
280
281 elif type == DEDENT:
282 # there's nothing we need to check here! what's important is
283 # that when the run of DEDENTs ends, the indentation of the
284 # program statement (or ENDMARKER) that triggered the run is
285 # equal to what's left at the top of the indents stack
Guido van Rossumaa2a7a41998-06-09 19:02:21 +0000286
287 # Ouch! This assert triggers if the last line of the source
288 # is indented *and* lacks a newline -- then DEDENTs pop out
289 # of thin air.
290 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
291 check_equal = 1
292
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000293 del indents[-1]
294
Guido van Rossumce73acf1998-04-10 19:14:59 +0000295 elif check_equal and type not in JUNK:
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000296 # this is the first "real token" following a NEWLINE, so it
297 # must be the first token of the next program statement, or an
298 # ENDMARKER; the "line" argument exposes the leading whitespace
299 # for this statement; in the case of ENDMARKER, line is an empty
300 # string, so will properly match the empty string with which the
301 # "indents" stack was seeded
302 check_equal = 0
303 thisguy = Whitespace(line)
304 if not indents[-1].equal(thisguy):
305 witness = indents[-1].not_equal_witness(thisguy)
306 msg = "indent not equal e.g. " + format_witnesses(witness)
307 raise NannyNag(start[0], msg, line)
308
309else:
310 # unpatched version of tokenize
311
312 nesting_level = 0
313 indents = []
314 check_equal = 0
315
316 def reset_globals():
317 global nesting_level, indents, check_equal
318 nesting_level = check_equal = 0
319 indents = [Whitespace("")]
320
321 def tokeneater(type, token, start, end, line,
322 INDENT=tokenize.INDENT,
323 DEDENT=tokenize.DEDENT,
324 NEWLINE=tokenize.NEWLINE,
325 COMMENT=tokenize.COMMENT,
326 OP=tokenize.OP):
327 global nesting_level, indents, check_equal
328
329 if type == INDENT:
330 check_equal = 0
331 thisguy = Whitespace(token)
332 if not indents[-1].less(thisguy):
333 witness = indents[-1].not_less_witness(thisguy)
334 msg = "indent not greater e.g. " + format_witnesses(witness)
335 raise NannyNag(start[0], msg, line)
336 indents.append(thisguy)
337
338 elif type == DEDENT:
339 del indents[-1]
340
341 elif type == NEWLINE:
342 if nesting_level == 0:
343 check_equal = 1
344
345 elif type == COMMENT:
346 pass
347
348 elif check_equal:
349 check_equal = 0
350 thisguy = Whitespace(line)
351 if not indents[-1].equal(thisguy):
352 witness = indents[-1].not_equal_witness(thisguy)
353 msg = "indent not equal e.g. " + format_witnesses(witness)
354 raise NannyNag(start[0], msg, line)
355
356 if type == OP and token in ('{', '[', '('):
357 nesting_level = nesting_level + 1
358
359 elif type == OP and token in ('}', ']', ')'):
360 if nesting_level == 0:
361 raise NannyNag(start[0],
362 "unbalanced bracket '" + token + "'",
363 line)
364 nesting_level = nesting_level - 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000365
366if __name__ == '__main__':
367 main()
368