blob: f68ed2da40f7d0255d31540cd6421765fae65a8d [file] [log] [blame]
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00001#! /usr/bin/env python
2
Guido van Rossumf4b44fa1998-04-06 14:41:20 +00003"""The Tab Nanny despises ambiguous indentation. She knows no mercy."""
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00004
Guido van Rossumaa2a7a41998-06-09 19:02:21 +00005# Released to the public domain, by Tim Peters, 15 April 1998.
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00006
Guido van Rossumaa2a7a41998-06-09 19:02:21 +00007__version__ = "5"
Guido van Rossum9ab75cb1998-03-31 14:31:39 +00008
9import os
10import sys
11import getopt
12import tokenize
13
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000014verbose = 0
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000015filename_only = 0
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000016
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000017def errprint(*args):
18 sep = ""
19 for arg in args:
20 sys.stderr.write(sep + str(arg))
21 sep = " "
22 sys.stderr.write("\n")
23
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000024def main():
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000025 global verbose, filename_only
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000026 try:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000027 opts, args = getopt.getopt(sys.argv[1:], "qv")
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000028 except getopt.error, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000029 errprint(msg)
Guido van Rossum8053d891998-04-06 14:45:26 +000030 return
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000031 for o, a in opts:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000032 if o == '-q':
33 filename_only = filename_only + 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000034 if o == '-v':
35 verbose = verbose + 1
Guido van Rossum8053d891998-04-06 14:45:26 +000036 if not args:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000037 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
Guido van Rossum8053d891998-04-06 14:45:26 +000038 return
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000039 for arg in args:
40 check(arg)
41
42class NannyNag:
43 def __init__(self, lineno, msg, line):
44 self.lineno, self.msg, self.line = lineno, msg, line
45 def get_lineno(self):
46 return self.lineno
47 def get_msg(self):
48 return self.msg
49 def get_line(self):
50 return self.line
51
52def check(file):
53 if os.path.isdir(file) and not os.path.islink(file):
54 if verbose:
55 print "%s: listing directory" % `file`
56 names = os.listdir(file)
57 for name in names:
58 fullname = os.path.join(file, name)
59 if (os.path.isdir(fullname) and
60 not os.path.islink(fullname) or
61 os.path.normcase(name[-3:]) == ".py"):
62 check(fullname)
63 return
64
65 try:
66 f = open(file)
67 except IOError, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000068 errprint("%s: I/O Error: %s" % (`file`, str(msg)))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000069 return
70
71 if verbose > 1:
72 print "checking", `file`, "..."
73
74 reset_globals()
75 try:
76 tokenize.tokenize(f.readline, tokeneater)
77
78 except tokenize.TokenError, msg:
Guido van Rossumf9a6d7d1998-09-14 16:22:21 +000079 errprint("%s: Token Error: %s" % (`file`, str(msg)))
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000080 return
81
82 except NannyNag, nag:
83 badline = nag.get_lineno()
84 line = nag.get_line()
85 if verbose:
86 print "%s: *** Line %d: trouble in tab city! ***" % (
87 `file`, badline)
88 print "offending line:", `line`
89 print nag.get_msg()
90 else:
Andrew M. Kuchlingdc86a4e1998-12-18 13:56:58 +000091 if filename_only: print file
92 else: print file, badline, `line`
Guido van Rossum9ab75cb1998-03-31 14:31:39 +000093 return
94
95 if verbose:
96 print "%s: Clean bill of health." % `file`
97
98class Whitespace:
99 # the characters used for space and tab
100 S, T = ' \t'
101
102 # members:
103 # raw
104 # the original string
105 # n
106 # the number of leading whitespace characters in raw
107 # nt
108 # the number of tabs in raw[:n]
109 # norm
110 # the normal form as a pair (count, trailing), where:
111 # count
112 # a tuple such that raw[:n] contains count[i]
113 # instances of S * i + T
114 # trailing
115 # the number of trailing spaces in raw[:n]
116 # It's A Theorem that m.indent_level(t) ==
117 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
118 # is_simple
119 # true iff raw[:n] is of the form (T*)(S*)
120
121 def __init__(self, ws):
122 self.raw = ws
123 S, T = Whitespace.S, Whitespace.T
124 count = []
125 b = n = nt = 0
126 for ch in self.raw:
127 if ch == S:
128 n = n + 1
129 b = b + 1
130 elif ch == T:
131 n = n + 1
132 nt = nt + 1
133 if b >= len(count):
134 count = count + [0] * (b - len(count) + 1)
135 count[b] = count[b] + 1
136 b = 0
137 else:
138 break
139 self.n = n
140 self.nt = nt
141 self.norm = tuple(count), b
142 self.is_simple = len(count) <= 1
143
144 # return length of longest contiguous run of spaces (whether or not
145 # preceding a tab)
146 def longest_run_of_spaces(self):
147 count, trailing = self.norm
148 return max(len(count)-1, trailing)
149
150 def indent_level(self, tabsize):
151 # count, il = self.norm
152 # for i in range(len(count)):
153 # if count[i]:
154 # il = il + (i/tabsize + 1)*tabsize * count[i]
155 # return il
156
157 # quicker:
158 # il = trailing + sum (i/ts + 1)*ts*count[i] =
159 # trailing + ts * sum (i/ts + 1)*count[i] =
160 # trailing + ts * sum i/ts*count[i] + count[i] =
161 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
162 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
163 # and note that i/ts*count[i] is 0 when i < ts
164
165 count, trailing = self.norm
166 il = 0
167 for i in range(tabsize, len(count)):
168 il = il + i/tabsize * count[i]
169 return trailing + tabsize * (il + self.nt)
170
171 # return true iff self.indent_level(t) == other.indent_level(t)
172 # for all t >= 1
173 def equal(self, other):
174 return self.norm == other.norm
175
176 # return a list of tuples (ts, i1, i2) such that
177 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
178 # Intended to be used after not self.equal(other) is known, in which
179 # case it will return at least one witnessing tab size.
180 def not_equal_witness(self, other):
181 n = max(self.longest_run_of_spaces(),
182 other.longest_run_of_spaces()) + 1
183 a = []
184 for ts in range(1, n+1):
185 if self.indent_level(ts) != other.indent_level(ts):
186 a.append( (ts,
187 self.indent_level(ts),
188 other.indent_level(ts)) )
189 return a
190
191 # Return true iff self.indent_level(t) < other.indent_level(t)
192 # for all t >= 1.
193 # The algorithm is due to Vincent Broman.
194 # Easy to prove it's correct.
195 # XXXpost that.
196 # Trivial to prove n is sharp (consider T vs ST).
197 # Unknown whether there's a faster general way. I suspected so at
198 # first, but no longer.
199 # For the special (but common!) case where M and N are both of the
200 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
201 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
202 # XXXwrite that up.
203 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
204 def less(self, other):
205 if self.n >= other.n:
206 return 0
207 if self.is_simple and other.is_simple:
208 return self.nt <= other.nt
209 n = max(self.longest_run_of_spaces(),
210 other.longest_run_of_spaces()) + 1
211 # the self.n >= other.n test already did it for ts=1
212 for ts in range(2, n+1):
213 if self.indent_level(ts) >= other.indent_level(ts):
214 return 0
215 return 1
216
217 # return a list of tuples (ts, i1, i2) such that
218 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
219 # Intended to be used after not self.less(other) is known, in which
220 # case it will return at least one witnessing tab size.
221 def not_less_witness(self, other):
222 n = max(self.longest_run_of_spaces(),
223 other.longest_run_of_spaces()) + 1
224 a = []
225 for ts in range(1, n+1):
226 if self.indent_level(ts) >= other.indent_level(ts):
227 a.append( (ts,
228 self.indent_level(ts),
229 other.indent_level(ts)) )
230 return a
231
232def format_witnesses(w):
233 import string
234 firsts = map(lambda tup: str(tup[0]), w)
235 prefix = "at tab size"
236 if len(w) > 1:
237 prefix = prefix + "s"
238 return prefix + " " + string.join(firsts, ', ')
239
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000240# The collection of globals, the reset_globals() function, and the
241# tokeneater() function, depend on which version of tokenize is
242# in use.
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000243
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000244if hasattr(tokenize, 'NL'):
245 # take advantage of Guido's patch!
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000246
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000247 indents = []
248 check_equal = 0
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000249
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000250 def reset_globals():
251 global indents, check_equal
252 check_equal = 0
253 indents = [Whitespace("")]
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000254
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000255 def tokeneater(type, token, start, end, line,
256 INDENT=tokenize.INDENT,
257 DEDENT=tokenize.DEDENT,
258 NEWLINE=tokenize.NEWLINE,
Guido van Rossumce73acf1998-04-10 19:14:59 +0000259 JUNK=(tokenize.COMMENT, tokenize.NL) ):
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000260 global indents, check_equal
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000261
Guido van Rossumce73acf1998-04-10 19:14:59 +0000262 if type == NEWLINE:
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000263 # a program statement, or ENDMARKER, will eventually follow,
264 # after some (possibly empty) run of tokens of the form
265 # (NL | COMMENT)* (INDENT | DEDENT+)?
266 # If an INDENT appears, setting check_equal is wrong, and will
267 # be undone when we see the INDENT.
268 check_equal = 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000269
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000270 elif type == INDENT:
271 check_equal = 0
272 thisguy = Whitespace(token)
273 if not indents[-1].less(thisguy):
274 witness = indents[-1].not_less_witness(thisguy)
275 msg = "indent not greater e.g. " + format_witnesses(witness)
276 raise NannyNag(start[0], msg, line)
277 indents.append(thisguy)
278
279 elif type == DEDENT:
280 # there's nothing we need to check here! what's important is
281 # that when the run of DEDENTs ends, the indentation of the
282 # program statement (or ENDMARKER) that triggered the run is
283 # equal to what's left at the top of the indents stack
Guido van Rossumaa2a7a41998-06-09 19:02:21 +0000284
285 # Ouch! This assert triggers if the last line of the source
286 # is indented *and* lacks a newline -- then DEDENTs pop out
287 # of thin air.
288 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
289 check_equal = 1
290
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000291 del indents[-1]
292
Guido van Rossumce73acf1998-04-10 19:14:59 +0000293 elif check_equal and type not in JUNK:
Guido van Rossumf4b44fa1998-04-06 14:41:20 +0000294 # this is the first "real token" following a NEWLINE, so it
295 # must be the first token of the next program statement, or an
296 # ENDMARKER; the "line" argument exposes the leading whitespace
297 # for this statement; in the case of ENDMARKER, line is an empty
298 # string, so will properly match the empty string with which the
299 # "indents" stack was seeded
300 check_equal = 0
301 thisguy = Whitespace(line)
302 if not indents[-1].equal(thisguy):
303 witness = indents[-1].not_equal_witness(thisguy)
304 msg = "indent not equal e.g. " + format_witnesses(witness)
305 raise NannyNag(start[0], msg, line)
306
307else:
308 # unpatched version of tokenize
309
310 nesting_level = 0
311 indents = []
312 check_equal = 0
313
314 def reset_globals():
315 global nesting_level, indents, check_equal
316 nesting_level = check_equal = 0
317 indents = [Whitespace("")]
318
319 def tokeneater(type, token, start, end, line,
320 INDENT=tokenize.INDENT,
321 DEDENT=tokenize.DEDENT,
322 NEWLINE=tokenize.NEWLINE,
323 COMMENT=tokenize.COMMENT,
324 OP=tokenize.OP):
325 global nesting_level, indents, check_equal
326
327 if type == INDENT:
328 check_equal = 0
329 thisguy = Whitespace(token)
330 if not indents[-1].less(thisguy):
331 witness = indents[-1].not_less_witness(thisguy)
332 msg = "indent not greater e.g. " + format_witnesses(witness)
333 raise NannyNag(start[0], msg, line)
334 indents.append(thisguy)
335
336 elif type == DEDENT:
337 del indents[-1]
338
339 elif type == NEWLINE:
340 if nesting_level == 0:
341 check_equal = 1
342
343 elif type == COMMENT:
344 pass
345
346 elif check_equal:
347 check_equal = 0
348 thisguy = Whitespace(line)
349 if not indents[-1].equal(thisguy):
350 witness = indents[-1].not_equal_witness(thisguy)
351 msg = "indent not equal e.g. " + format_witnesses(witness)
352 raise NannyNag(start[0], msg, line)
353
354 if type == OP and token in ('{', '[', '('):
355 nesting_level = nesting_level + 1
356
357 elif type == OP and token in ('}', ']', ')'):
358 if nesting_level == 0:
359 raise NannyNag(start[0],
360 "unbalanced bracket '" + token + "'",
361 line)
362 nesting_level = nesting_level - 1
Guido van Rossum9ab75cb1998-03-31 14:31:39 +0000363
364if __name__ == '__main__':
365 main()
366