Blame - Lib/tabnanny.py - platform/external/python/cpython3

blob: 8d3eab56d03fd199a75abb66e0064bd9022afb01 [file] [log] [blame]

Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	3	"""The Tab Nanny despises ambiguous indentation. She knows no mercy."""
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	4
Guido van Rossum	aa2a7a4	1998-06-09 19:02:21 +0000	[diff] [blame]	5	# Released to the public domain, by Tim Peters, 15 April 1998.
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	6
Guido van Rossum	dc68833	2000-02-23 15:32:19 +0000	[diff] [blame^]	7	# XXX Note: this is now a standard library module.
				8	# XXX The API needs to undergo changes however; the current code is too
				9	# XXX script-like. This will be addressed later.
				10
Guido van Rossum	a74c556	1999-07-30 17:48:20 +0000	[diff] [blame]	11	__version__ = "6"
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	12
				13	import os
				14	import sys
Guido van Rossum	a74c556	1999-07-30 17:48:20 +0000	[diff] [blame]	15	import string
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	16	import getopt
				17	import tokenize
				18
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	19	verbose = 0
Andrew M. Kuchling	dc86a4e	1998-12-18 13:56:58 +0000	[diff] [blame]	20	filename_only = 0
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	21
Guido van Rossum	f9a6d7d	1998-09-14 16:22:21 +0000	[diff] [blame]	22	def errprint(*args):
				23	sep = ""
				24	for arg in args:
				25	sys.stderr.write(sep + str(arg))
				26	sep = " "
				27	sys.stderr.write("\n")
				28
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	29	def main():
Andrew M. Kuchling	dc86a4e	1998-12-18 13:56:58 +0000	[diff] [blame]	30	global verbose, filename_only
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	31	try:
Andrew M. Kuchling	dc86a4e	1998-12-18 13:56:58 +0000	[diff] [blame]	32	opts, args = getopt.getopt(sys.argv[1:], "qv")
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	33	except getopt.error, msg:
Guido van Rossum	f9a6d7d	1998-09-14 16:22:21 +0000	[diff] [blame]	34	errprint(msg)
Guido van Rossum	8053d89	1998-04-06 14:45:26 +0000	[diff] [blame]	35	return
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	36	for o, a in opts:
Andrew M. Kuchling	dc86a4e	1998-12-18 13:56:58 +0000	[diff] [blame]	37	if o == '-q':
				38	filename_only = filename_only + 1
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	39	if o == '-v':
				40	verbose = verbose + 1
Guido van Rossum	8053d89	1998-04-06 14:45:26 +0000	[diff] [blame]	41	if not args:
Guido van Rossum	f9a6d7d	1998-09-14 16:22:21 +0000	[diff] [blame]	42	errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
Guido van Rossum	8053d89	1998-04-06 14:45:26 +0000	[diff] [blame]	43	return
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	44	for arg in args:
				45	check(arg)
				46
				47	class NannyNag:
				48	def __init__(self, lineno, msg, line):
				49	self.lineno, self.msg, self.line = lineno, msg, line
				50	def get_lineno(self):
				51	return self.lineno
				52	def get_msg(self):
				53	return self.msg
				54	def get_line(self):
				55	return self.line
				56
				57	def check(file):
				58	if os.path.isdir(file) and not os.path.islink(file):
				59	if verbose:
				60	print "%s: listing directory" % `file`
				61	names = os.listdir(file)
				62	for name in names:
				63	fullname = os.path.join(file, name)
				64	if (os.path.isdir(fullname) and
				65	not os.path.islink(fullname) or
				66	os.path.normcase(name[-3:]) == ".py"):
				67	check(fullname)
				68	return
				69
				70	try:
				71	f = open(file)
				72	except IOError, msg:
Guido van Rossum	f9a6d7d	1998-09-14 16:22:21 +0000	[diff] [blame]	73	errprint("%s: I/O Error: %s" % (`file`, str(msg)))
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	74	return
				75
				76	if verbose > 1:
				77	print "checking", `file`, "..."
				78
				79	reset_globals()
				80	try:
				81	tokenize.tokenize(f.readline, tokeneater)
				82
				83	except tokenize.TokenError, msg:
Guido van Rossum	f9a6d7d	1998-09-14 16:22:21 +0000	[diff] [blame]	84	errprint("%s: Token Error: %s" % (`file`, str(msg)))
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	85	return
				86
				87	except NannyNag, nag:
				88	badline = nag.get_lineno()
				89	line = nag.get_line()
				90	if verbose:
				91	print "%s: * Line %d: trouble in tab city! *" % (
				92	`file`, badline)
				93	print "offending line:", `line`
				94	print nag.get_msg()
				95	else:
Guido van Rossum	a74c556	1999-07-30 17:48:20 +0000	[diff] [blame]	96	if ' ' in file: file = '"' + file + '"'
Andrew M. Kuchling	dc86a4e	1998-12-18 13:56:58 +0000	[diff] [blame]	97	if filename_only: print file
				98	else: print file, badline, `line`
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	99	return
				100
				101	if verbose:
				102	print "%s: Clean bill of health." % `file`
				103
				104	class Whitespace:
				105	# the characters used for space and tab
				106	S, T = ' \t'
				107
				108	# members:
				109	# raw
				110	# the original string
				111	# n
				112	# the number of leading whitespace characters in raw
				113	# nt
				114	# the number of tabs in raw[:n]
				115	# norm
				116	# the normal form as a pair (count, trailing), where:
				117	# count
				118	# a tuple such that raw[:n] contains count[i]
				119	# instances of S * i + T
				120	# trailing
				121	# the number of trailing spaces in raw[:n]
				122	# It's A Theorem that m.indent_level(t) ==
				123	# n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
				124	# is_simple
				125	# true iff raw[:n] is of the form (T)(S)
				126
				127	def __init__(self, ws):
				128	self.raw = ws
				129	S, T = Whitespace.S, Whitespace.T
				130	count = []
				131	b = n = nt = 0
				132	for ch in self.raw:
				133	if ch == S:
				134	n = n + 1
				135	b = b + 1
				136	elif ch == T:
				137	n = n + 1
				138	nt = nt + 1
				139	if b >= len(count):
				140	count = count + [0] * (b - len(count) + 1)
				141	count[b] = count[b] + 1
				142	b = 0
				143	else:
				144	break
				145	self.n = n
				146	self.nt = nt
				147	self.norm = tuple(count), b
				148	self.is_simple = len(count) <= 1
				149
				150	# return length of longest contiguous run of spaces (whether or not
				151	# preceding a tab)
				152	def longest_run_of_spaces(self):
				153	count, trailing = self.norm
				154	return max(len(count)-1, trailing)
				155
				156	def indent_level(self, tabsize):
				157	# count, il = self.norm
				158	# for i in range(len(count)):
				159	# if count[i]:
				160	# il = il + (i/tabsize + 1)tabsize count[i]
				161	# return il
				162
				163	# quicker:
				164	# il = trailing + sum (i/ts + 1)tscount[i] =
				165	# trailing + ts * sum (i/ts + 1)*count[i] =
				166	# trailing + ts * sum i/ts*count[i] + count[i] =
				167	# trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
				168	# trailing + ts * [(sum i/ts*count[i]) + num_tabs]
				169	# and note that i/ts*count[i] is 0 when i < ts
				170
				171	count, trailing = self.norm
				172	il = 0
				173	for i in range(tabsize, len(count)):
				174	il = il + i/tabsize * count[i]
				175	return trailing + tabsize * (il + self.nt)
				176
				177	# return true iff self.indent_level(t) == other.indent_level(t)
				178	# for all t >= 1
				179	def equal(self, other):
				180	return self.norm == other.norm
				181
				182	# return a list of tuples (ts, i1, i2) such that
				183	# i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
				184	# Intended to be used after not self.equal(other) is known, in which
				185	# case it will return at least one witnessing tab size.
				186	def not_equal_witness(self, other):
				187	n = max(self.longest_run_of_spaces(),
				188	other.longest_run_of_spaces()) + 1
				189	a = []
				190	for ts in range(1, n+1):
				191	if self.indent_level(ts) != other.indent_level(ts):
				192	a.append( (ts,
				193	self.indent_level(ts),
				194	other.indent_level(ts)) )
				195	return a
				196
				197	# Return true iff self.indent_level(t) < other.indent_level(t)
				198	# for all t >= 1.
				199	# The algorithm is due to Vincent Broman.
				200	# Easy to prove it's correct.
				201	# XXXpost that.
				202	# Trivial to prove n is sharp (consider T vs ST).
				203	# Unknown whether there's a faster general way. I suspected so at
				204	# first, but no longer.
				205	# For the special (but common!) case where M and N are both of the
				206	# form (T)(S), M.less(N) iff M.len() < N.len() and
				207	# M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
				208	# XXXwrite that up.
				209	# Note that M is of the form (T)(S) iff len(M.norm[0]) <= 1.
				210	def less(self, other):
				211	if self.n >= other.n:
				212	return 0
				213	if self.is_simple and other.is_simple:
				214	return self.nt <= other.nt
				215	n = max(self.longest_run_of_spaces(),
				216	other.longest_run_of_spaces()) + 1
				217	# the self.n >= other.n test already did it for ts=1
				218	for ts in range(2, n+1):
				219	if self.indent_level(ts) >= other.indent_level(ts):
				220	return 0
				221	return 1
				222
				223	# return a list of tuples (ts, i1, i2) such that
				224	# i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
				225	# Intended to be used after not self.less(other) is known, in which
				226	# case it will return at least one witnessing tab size.
				227	def not_less_witness(self, other):
				228	n = max(self.longest_run_of_spaces(),
				229	other.longest_run_of_spaces()) + 1
				230	a = []
				231	for ts in range(1, n+1):
				232	if self.indent_level(ts) >= other.indent_level(ts):
				233	a.append( (ts,
				234	self.indent_level(ts),
				235	other.indent_level(ts)) )
				236	return a
				237
				238	def format_witnesses(w):
				239	import string
				240	firsts = map(lambda tup: str(tup[0]), w)
				241	prefix = "at tab size"
				242	if len(w) > 1:
				243	prefix = prefix + "s"
				244	return prefix + " " + string.join(firsts, ', ')
				245
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	246	# The collection of globals, the reset_globals() function, and the
				247	# tokeneater() function, depend on which version of tokenize is
				248	# in use.
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	249
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	250	if hasattr(tokenize, 'NL'):
				251	# take advantage of Guido's patch!
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	252
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	253	indents = []
				254	check_equal = 0
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	255
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	256	def reset_globals():
				257	global indents, check_equal
				258	check_equal = 0
				259	indents = [Whitespace("")]
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	260
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	261	def tokeneater(type, token, start, end, line,
				262	INDENT=tokenize.INDENT,
				263	DEDENT=tokenize.DEDENT,
				264	NEWLINE=tokenize.NEWLINE,
Guido van Rossum	ce73acf	1998-04-10 19:14:59 +0000	[diff] [blame]	265	JUNK=(tokenize.COMMENT, tokenize.NL) ):
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	266	global indents, check_equal
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	267
Guido van Rossum	ce73acf	1998-04-10 19:14:59 +0000	[diff] [blame]	268	if type == NEWLINE:
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	269	# a program statement, or ENDMARKER, will eventually follow,
				270	# after some (possibly empty) run of tokens of the form
				271	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				272	# If an INDENT appears, setting check_equal is wrong, and will
				273	# be undone when we see the INDENT.
				274	check_equal = 1
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	275
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	276	elif type == INDENT:
				277	check_equal = 0
				278	thisguy = Whitespace(token)
				279	if not indents[-1].less(thisguy):
				280	witness = indents[-1].not_less_witness(thisguy)
				281	msg = "indent not greater e.g. " + format_witnesses(witness)
				282	raise NannyNag(start[0], msg, line)
				283	indents.append(thisguy)
				284
				285	elif type == DEDENT:
				286	# there's nothing we need to check here! what's important is
				287	# that when the run of DEDENTs ends, the indentation of the
				288	# program statement (or ENDMARKER) that triggered the run is
				289	# equal to what's left at the top of the indents stack
Guido van Rossum	aa2a7a4	1998-06-09 19:02:21 +0000	[diff] [blame]	290
				291	# Ouch! This assert triggers if the last line of the source
				292	# is indented and lacks a newline -- then DEDENTs pop out
				293	# of thin air.
				294	# assert check_equal # else no earlier NEWLINE, or an earlier INDENT
				295	check_equal = 1
				296
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	297	del indents[-1]
				298
Guido van Rossum	ce73acf	1998-04-10 19:14:59 +0000	[diff] [blame]	299	elif check_equal and type not in JUNK:
Guido van Rossum	f4b44fa	1998-04-06 14:41:20 +0000	[diff] [blame]	300	# this is the first "real token" following a NEWLINE, so it
				301	# must be the first token of the next program statement, or an
				302	# ENDMARKER; the "line" argument exposes the leading whitespace
				303	# for this statement; in the case of ENDMARKER, line is an empty
				304	# string, so will properly match the empty string with which the
				305	# "indents" stack was seeded
				306	check_equal = 0
				307	thisguy = Whitespace(line)
				308	if not indents[-1].equal(thisguy):
				309	witness = indents[-1].not_equal_witness(thisguy)
				310	msg = "indent not equal e.g. " + format_witnesses(witness)
				311	raise NannyNag(start[0], msg, line)
				312
				313	else:
				314	# unpatched version of tokenize
				315
				316	nesting_level = 0
				317	indents = []
				318	check_equal = 0
				319
				320	def reset_globals():
				321	global nesting_level, indents, check_equal
				322	nesting_level = check_equal = 0
				323	indents = [Whitespace("")]
				324
				325	def tokeneater(type, token, start, end, line,
				326	INDENT=tokenize.INDENT,
				327	DEDENT=tokenize.DEDENT,
				328	NEWLINE=tokenize.NEWLINE,
				329	COMMENT=tokenize.COMMENT,
				330	OP=tokenize.OP):
				331	global nesting_level, indents, check_equal
				332
				333	if type == INDENT:
				334	check_equal = 0
				335	thisguy = Whitespace(token)
				336	if not indents[-1].less(thisguy):
				337	witness = indents[-1].not_less_witness(thisguy)
				338	msg = "indent not greater e.g. " + format_witnesses(witness)
				339	raise NannyNag(start[0], msg, line)
				340	indents.append(thisguy)
				341
				342	elif type == DEDENT:
				343	del indents[-1]
				344
				345	elif type == NEWLINE:
				346	if nesting_level == 0:
				347	check_equal = 1
				348
				349	elif type == COMMENT:
				350	pass
				351
				352	elif check_equal:
				353	check_equal = 0
				354	thisguy = Whitespace(line)
				355	if not indents[-1].equal(thisguy):
				356	witness = indents[-1].not_equal_witness(thisguy)
				357	msg = "indent not equal e.g. " + format_witnesses(witness)
				358	raise NannyNag(start[0], msg, line)
				359
				360	if type == OP and token in ('{', '[', '('):
				361	nesting_level = nesting_level + 1
				362
				363	elif type == OP and token in ('}', ']', ')'):
				364	if nesting_level == 0:
				365	raise NannyNag(start[0],
				366	"unbalanced bracket '" + token + "'",
				367	line)
				368	nesting_level = nesting_level - 1
Guido van Rossum	9ab75cb	1998-03-31 14:31:39 +0000	[diff] [blame]	369
				370	if __name__ == '__main__':
				371	main()
				372