Blame - Lib/tabnanny.py - platform/external/python/cpython3

blob: 9af8ecb847d454782c6e6aed76d1ea01eeab7dae [file] [log] [blame]

Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	3	"""The Tab Nanny despises ambiguous indentation. She knows no mercy."""
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	4
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	5	# Released to the public domain, by Tim Peters, 4 April 1998.
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	6
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	7	__version__ = "3"
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	8
				9	import os
				10	import sys
				11	import getopt
				12	import tokenize
				13
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	14	verbose = 0
				15
				16	def main():
				17	global verbose
				18	try:
				19	opts, args = getopt.getopt(sys.argv[1:], "v")
				20	except getopt.error, msg:
				21	print msg
				22	for o, a in opts:
				23	if o == '-v':
				24	verbose = verbose + 1
				25	for arg in args:
				26	check(arg)
				27
				28	class NannyNag:
				29	def __init__(self, lineno, msg, line):
				30	self.lineno, self.msg, self.line = lineno, msg, line
				31	def get_lineno(self):
				32	return self.lineno
				33	def get_msg(self):
				34	return self.msg
				35	def get_line(self):
				36	return self.line
				37
				38	def check(file):
				39	if os.path.isdir(file) and not os.path.islink(file):
				40	if verbose:
				41	print "%s: listing directory" % `file`
				42	names = os.listdir(file)
				43	for name in names:
				44	fullname = os.path.join(file, name)
				45	if (os.path.isdir(fullname) and
				46	not os.path.islink(fullname) or
				47	os.path.normcase(name[-3:]) == ".py"):
				48	check(fullname)
				49	return
				50
				51	try:
				52	f = open(file)
				53	except IOError, msg:
				54	print "%s: I/O Error: %s" % (`file`, str(msg))
				55	return
				56
				57	if verbose > 1:
				58	print "checking", `file`, "..."
				59
				60	reset_globals()
				61	try:
				62	tokenize.tokenize(f.readline, tokeneater)
				63
				64	except tokenize.TokenError, msg:
				65	print "%s: Token Error: %s" % (`fname`, str(msg))
				66	return
				67
				68	except NannyNag, nag:
				69	badline = nag.get_lineno()
				70	line = nag.get_line()
				71	if verbose:
				72	print "%s: * Line %d: trouble in tab city! *" % (
				73	`file`, badline)
				74	print "offending line:", `line`
				75	print nag.get_msg()
				76	else:
				77	print file, badline, `line`
				78	return
				79
				80	if verbose:
				81	print "%s: Clean bill of health." % `file`
				82
				83	class Whitespace:
				84	# the characters used for space and tab
				85	S, T = ' \t'
				86
				87	# members:
				88	# raw
				89	# the original string
				90	# n
				91	# the number of leading whitespace characters in raw
				92	# nt
				93	# the number of tabs in raw[:n]
				94	# norm
				95	# the normal form as a pair (count, trailing), where:
				96	# count
				97	# a tuple such that raw[:n] contains count[i]
				98	# instances of S * i + T
				99	# trailing
				100	# the number of trailing spaces in raw[:n]
				101	# It's A Theorem that m.indent_level(t) ==
				102	# n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
				103	# is_simple
				104	# true iff raw[:n] is of the form (T)(S)
				105
				106	def __init__(self, ws):
				107	self.raw = ws
				108	S, T = Whitespace.S, Whitespace.T
				109	count = []
				110	b = n = nt = 0
				111	for ch in self.raw:
				112	if ch == S:
				113	n = n + 1
				114	b = b + 1
				115	elif ch == T:
				116	n = n + 1
				117	nt = nt + 1
				118	if b >= len(count):
				119	count = count + [0] * (b - len(count) + 1)
				120	count[b] = count[b] + 1
				121	b = 0
				122	else:
				123	break
				124	self.n = n
				125	self.nt = nt
				126	self.norm = tuple(count), b
				127	self.is_simple = len(count) <= 1
				128
				129	# return length of longest contiguous run of spaces (whether or not
				130	# preceding a tab)
				131	def longest_run_of_spaces(self):
				132	count, trailing = self.norm
				133	return max(len(count)-1, trailing)
				134
				135	def indent_level(self, tabsize):
				136	# count, il = self.norm
				137	# for i in range(len(count)):
				138	# if count[i]:
				139	# il = il + (i/tabsize + 1)tabsize count[i]
				140	# return il
				141
				142	# quicker:
				143	# il = trailing + sum (i/ts + 1)tscount[i] =
				144	# trailing + ts * sum (i/ts + 1)*count[i] =
				145	# trailing + ts * sum i/ts*count[i] + count[i] =
				146	# trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
				147	# trailing + ts * [(sum i/ts*count[i]) + num_tabs]
				148	# and note that i/ts*count[i] is 0 when i < ts
				149
				150	count, trailing = self.norm
				151	il = 0
				152	for i in range(tabsize, len(count)):
				153	il = il + i/tabsize * count[i]
				154	return trailing + tabsize * (il + self.nt)
				155
				156	# return true iff self.indent_level(t) == other.indent_level(t)
				157	# for all t >= 1
				158	def equal(self, other):
				159	return self.norm == other.norm
				160
				161	# return a list of tuples (ts, i1, i2) such that
				162	# i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
				163	# Intended to be used after not self.equal(other) is known, in which
				164	# case it will return at least one witnessing tab size.
				165	def not_equal_witness(self, other):
				166	n = max(self.longest_run_of_spaces(),
				167	other.longest_run_of_spaces()) + 1
				168	a = []
				169	for ts in range(1, n+1):
				170	if self.indent_level(ts) != other.indent_level(ts):
				171	a.append( (ts,
				172	self.indent_level(ts),
				173	other.indent_level(ts)) )
				174	return a
				175
				176	# Return true iff self.indent_level(t) < other.indent_level(t)
				177	# for all t >= 1.
				178	# The algorithm is due to Vincent Broman.
				179	# Easy to prove it's correct.
				180	# XXXpost that.
				181	# Trivial to prove n is sharp (consider T vs ST).
				182	# Unknown whether there's a faster general way. I suspected so at
				183	# first, but no longer.
				184	# For the special (but common!) case where M and N are both of the
				185	# form (T)(S), M.less(N) iff M.len() < N.len() and
				186	# M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
				187	# XXXwrite that up.
				188	# Note that M is of the form (T)(S) iff len(M.norm[0]) <= 1.
				189	def less(self, other):
				190	if self.n >= other.n:
				191	return 0
				192	if self.is_simple and other.is_simple:
				193	return self.nt <= other.nt
				194	n = max(self.longest_run_of_spaces(),
				195	other.longest_run_of_spaces()) + 1
				196	# the self.n >= other.n test already did it for ts=1
				197	for ts in range(2, n+1):
				198	if self.indent_level(ts) >= other.indent_level(ts):
				199	return 0
				200	return 1
				201
				202	# return a list of tuples (ts, i1, i2) such that
				203	# i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
				204	# Intended to be used after not self.less(other) is known, in which
				205	# case it will return at least one witnessing tab size.
				206	def not_less_witness(self, other):
				207	n = max(self.longest_run_of_spaces(),
				208	other.longest_run_of_spaces()) + 1
				209	a = []
				210	for ts in range(1, n+1):
				211	if self.indent_level(ts) >= other.indent_level(ts):
				212	a.append( (ts,
				213	self.indent_level(ts),
				214	other.indent_level(ts)) )
				215	return a
				216
				217	def format_witnesses(w):
				218	import string
				219	firsts = map(lambda tup: str(tup[0]), w)
				220	prefix = "at tab size"
				221	if len(w) > 1:
				222	prefix = prefix + "s"
				223	return prefix + " " + string.join(firsts, ', ')
				224
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	225	# The collection of globals, the reset_globals() function, and the
				226	# tokeneater() function, depend on which version of tokenize is
				227	# in use.
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	228
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	229	if hasattr(tokenize, 'NL'):
				230	# take advantage of Guido's patch!
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	231
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	232	indents = []
				233	check_equal = 0
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	234
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	235	def reset_globals():
				236	global indents, check_equal
				237	check_equal = 0
				238	indents = [Whitespace("")]
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	239
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	240	def tokeneater(type, token, start, end, line,
				241	INDENT=tokenize.INDENT,
				242	DEDENT=tokenize.DEDENT,
				243	NEWLINE=tokenize.NEWLINE,
				244	COMMENT=tokenize.COMMENT,
				245	NL=tokenize.NL):
				246	global indents, check_equal
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	247
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	248	# test in decreasing order of frequency, although the check_equal
				249	# test must be last; INDENT and DEDENT appear equally often
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	250
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	251	if type in (COMMENT, NL):
				252	# the indentation of these guys is meaningless
				253	pass
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	254
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	255	elif type == NEWLINE:
				256	# a program statement, or ENDMARKER, will eventually follow,
				257	# after some (possibly empty) run of tokens of the form
				258	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				259	# If an INDENT appears, setting check_equal is wrong, and will
				260	# be undone when we see the INDENT.
				261	check_equal = 1
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	262
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame^]	263	elif type == INDENT:
				264	check_equal = 0
				265	thisguy = Whitespace(token)
				266	if not indents[-1].less(thisguy):
				267	witness = indents[-1].not_less_witness(thisguy)
				268	msg = "indent not greater e.g. " + format_witnesses(witness)
				269	raise NannyNag(start[0], msg, line)
				270	indents.append(thisguy)
				271
				272	elif type == DEDENT:
				273	# there's nothing we need to check here! what's important is
				274	# that when the run of DEDENTs ends, the indentation of the
				275	# program statement (or ENDMARKER) that triggered the run is
				276	# equal to what's left at the top of the indents stack
				277	assert check_equal # else no earlier NEWLINE, or an earlier INDENT
				278	del indents[-1]
				279
				280	elif check_equal:
				281	# this is the first "real token" following a NEWLINE, so it
				282	# must be the first token of the next program statement, or an
				283	# ENDMARKER; the "line" argument exposes the leading whitespace
				284	# for this statement; in the case of ENDMARKER, line is an empty
				285	# string, so will properly match the empty string with which the
				286	# "indents" stack was seeded
				287	check_equal = 0
				288	thisguy = Whitespace(line)
				289	if not indents[-1].equal(thisguy):
				290	witness = indents[-1].not_equal_witness(thisguy)
				291	msg = "indent not equal e.g. " + format_witnesses(witness)
				292	raise NannyNag(start[0], msg, line)
				293
				294	else:
				295	# unpatched version of tokenize
				296
				297	nesting_level = 0
				298	indents = []
				299	check_equal = 0
				300
				301	def reset_globals():
				302	global nesting_level, indents, check_equal
				303	nesting_level = check_equal = 0
				304	indents = [Whitespace("")]
				305
				306	def tokeneater(type, token, start, end, line,
				307	INDENT=tokenize.INDENT,
				308	DEDENT=tokenize.DEDENT,
				309	NEWLINE=tokenize.NEWLINE,
				310	COMMENT=tokenize.COMMENT,
				311	OP=tokenize.OP):
				312	global nesting_level, indents, check_equal
				313
				314	if type == INDENT:
				315	check_equal = 0
				316	thisguy = Whitespace(token)
				317	if not indents[-1].less(thisguy):
				318	witness = indents[-1].not_less_witness(thisguy)
				319	msg = "indent not greater e.g. " + format_witnesses(witness)
				320	raise NannyNag(start[0], msg, line)
				321	indents.append(thisguy)
				322
				323	elif type == DEDENT:
				324	del indents[-1]
				325
				326	elif type == NEWLINE:
				327	if nesting_level == 0:
				328	check_equal = 1
				329
				330	elif type == COMMENT:
				331	pass
				332
				333	elif check_equal:
				334	check_equal = 0
				335	thisguy = Whitespace(line)
				336	if not indents[-1].equal(thisguy):
				337	witness = indents[-1].not_equal_witness(thisguy)
				338	msg = "indent not equal e.g. " + format_witnesses(witness)
				339	raise NannyNag(start[0], msg, line)
				340
				341	if type == OP and token in ('{', '[', '('):
				342	nesting_level = nesting_level + 1
				343
				344	elif type == OP and token in ('}', ']', ')'):
				345	if nesting_level == 0:
				346	raise NannyNag(start[0],
				347	"unbalanced bracket '" + token + "'",
				348	line)
				349	nesting_level = nesting_level - 1
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	350
				351	if __name__ == '__main__':
				352	main()
				353