Blame - python/helpers/pep8.py - platform/tools/idea

blob: 2ce7554840ccdc1713b75729b9a3c9f5c5a0e2d9 [file] [log] [blame]

Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	1	#!/usr/bin/env python
				2	# pep8.py - Check Python source code formatting, according to PEP 8
				3	# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
				4	# Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
				5	#
				6	# Permission is hereby granted, free of charge, to any person
				7	# obtaining a copy of this software and associated documentation files
				8	# (the "Software"), to deal in the Software without restriction,
				9	# including without limitation the rights to use, copy, modify, merge,
				10	# publish, distribute, sublicense, and/or sell copies of the Software,
				11	# and to permit persons to whom the Software is furnished to do so,
				12	# subject to the following conditions:
				13	#
				14	# The above copyright notice and this permission notice shall be
				15	# included in all copies or substantial portions of the Software.
				16	#
				17	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
				18	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
				19	# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
				20	# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
				21	# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
				22	# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				23	# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				24	# SOFTWARE.
				25
				26	r"""
				27	Check Python source code formatting, according to PEP 8:
				28	http://www.python.org/dev/peps/pep-0008/
				29
				30	For usage and a list of options, try this:
				31	$ python pep8.py -h
				32
				33	This program and its regression test suite live here:
				34	http://github.com/jcrocholl/pep8
				35
				36	Groups of errors and warnings:
				37	E errors
				38	W warnings
				39	100 indentation
				40	200 whitespace
				41	300 blank lines
				42	400 imports
				43	500 line length
				44	600 deprecation
				45	700 statements
				46	900 syntax error
				47	"""
				48	__version__ = '1.4.5a0'
				49
				50	import os
				51	import sys
				52	import re
				53	import time
				54	import inspect
				55	import keyword
				56	import tokenize
				57	from optparse import OptionParser
				58	from fnmatch import fnmatch
				59	try:
				60	from configparser import RawConfigParser
				61	from io import TextIOWrapper
				62	except ImportError:
				63	from ConfigParser import RawConfigParser
				64
				65	DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
				66	DEFAULT_IGNORE = 'E226,E24'
				67	if sys.platform == 'win32':
				68	DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
				69	else:
				70	DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
				71	os.path.expanduser('~/.config'), 'pep8')
				72	PROJECT_CONFIG = ('.pep8', 'tox.ini', 'setup.cfg')
				73	TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
				74	MAX_LINE_LENGTH = 79
				75	REPORT_FORMAT = {
				76	'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
				77	'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
				78	}
				79
				80	PyCF_ONLY_AST = 1024
				81	SINGLETONS = frozenset(['False', 'None', 'True'])
				82	KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
				83	UNARY_OPERATORS = frozenset(['>>', '*', '', '+', '-'])
				84	ARITHMETIC_OP = frozenset(['*', '', '/', '//', '+', '-'])
				85	WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '\|', '<<', '>>', '%'])
				86	WS_NEEDED_OPERATORS = frozenset([
				87	'*=', '=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
				88	'%=', '^=', '&=', '\|=', '==', '<=', '>=', '<<=', '>>=', '='])
				89	WHITESPACE = frozenset(' \t')
				90	SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE,
				91	tokenize.INDENT, tokenize.DEDENT])
				92	BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
				93
				94	INDENT_REGEX = re.compile(r'([ \t]*)')
				95	RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
				96	RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s,\s\w+\s,\s\w+')
				97	ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
				98	DOCSTRING_REGEX = re.compile(r'u?r?["\']')
				99	EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] \| []}),;:]')
				100	WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: \|\t)')
				101	COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None\|False\|True)')
				102	COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=\|is(?:\s+not)?)\s*type(?:s.\w+Type'
				103	r'\|\s$\s([^)][^ )])\s$)')
				104	KEYWORD_REGEX = re.compile(r'(\s)\b(?:%s)\b(\s)' % r'\|'.join(KEYWORDS))
				105	OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s)(?:[-+/\|!<=>%&^]+)(\s*)')
				106	LAMBDA_REGEX = re.compile(r'\blambda\b')
				107	HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
				108
				109	# Work around Python < 2.6 behaviour, which does not generate NL after
				110	# a comment which is on a line by itself.
				111	COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
				112
				113
				114	##############################################################################
				115	# Plugins (check functions) for physical lines
				116	##############################################################################
				117
				118
				119	def tabs_or_spaces(physical_line, indent_char):
				120	r"""
				121	Never mix tabs and spaces.
				122
				123	The most popular way of indenting Python is with spaces only. The
				124	second-most popular way is with tabs only. Code indented with a mixture
				125	of tabs and spaces should be converted to using spaces exclusively. When
				126	invoking the Python command line interpreter with the -t option, it issues
				127	warnings about code that illegally mixes tabs and spaces. When using -tt
				128	these warnings become errors. These options are highly recommended!
				129
				130	Okay: if a == 0:\n a = 1\n b = 1
				131	E101: if a == 0:\n a = 1\n\tb = 1
				132	"""
				133	indent = INDENT_REGEX.match(physical_line).group(1)
				134	for offset, char in enumerate(indent):
				135	if char != indent_char:
				136	return offset, "E101 indentation contains mixed spaces and tabs"
				137
				138
				139	def tabs_obsolete(physical_line):
				140	r"""
				141	For new projects, spaces-only are strongly recommended over tabs. Most
				142	editors have features that make this easy to do.
				143
				144	Okay: if True:\n return
				145	W191: if True:\n\treturn
				146	"""
				147	indent = INDENT_REGEX.match(physical_line).group(1)
				148	if '\t' in indent:
				149	return indent.index('\t'), "W191 indentation contains tabs"
				150
				151
				152	def trailing_whitespace(physical_line):
				153	r"""
				154	JCR: Trailing whitespace is superfluous.
				155	FBM: Except when it occurs as part of a blank line (i.e. the line is
				156	nothing but whitespace). According to Python docs[1] a line with only
				157	whitespace is considered a blank line, and is to be ignored. However,
				158	matching a blank line to its indentation level avoids mistakenly
				159	terminating a multi-line statement (e.g. class declaration) when
				160	pasting code into the standard Python interpreter.
				161
				162	[1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
				163
				164	The warning returned varies on whether the line itself is blank, for easier
				165	filtering for those who want to indent their blank lines.
				166
				167	Okay: spam(1)\n#
				168	W291: spam(1) \n#
				169	W293: class Foo(object):\n \n bang = 12
				170	"""
				171	physical_line = physical_line.rstrip('\n') # chr(10), newline
				172	physical_line = physical_line.rstrip('\r') # chr(13), carriage return
				173	physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
				174	stripped = physical_line.rstrip(' \t\v')
				175	if physical_line != stripped:
				176	if stripped:
				177	return len(stripped), "W291 trailing whitespace"
				178	else:
				179	return 0, "W293 blank line contains whitespace"
				180
				181
				182	def trailing_blank_lines(physical_line, lines, line_number):
				183	r"""
				184	JCR: Trailing blank lines are superfluous.
				185
				186	Okay: spam(1)
				187	W391: spam(1)\n
				188	"""
				189	if not physical_line.rstrip() and line_number == len(lines):
				190	return 0, "W391 blank line at end of file"
				191
				192
				193	def missing_newline(physical_line):
				194	"""
				195	JCR: The last line should have a newline.
				196
				197	Reports warning W292.
				198	"""
				199	if physical_line.rstrip() == physical_line:
				200	return len(physical_line), "W292 no newline at end of file"
				201
				202
				203	def maximum_line_length(physical_line, max_line_length):
				204	"""
				205	Limit all lines to a maximum of 79 characters.
				206
				207	There are still many devices around that are limited to 80 character
				208	lines; plus, limiting windows to 80 characters makes it possible to have
				209	several windows side-by-side. The default wrapping on such devices looks
				210	ugly. Therefore, please limit all lines to a maximum of 79 characters.
				211	For flowing long blocks of text (docstrings or comments), limiting the
				212	length to 72 characters is recommended.
				213
				214	Reports error E501.
				215	"""
				216	line = physical_line.rstrip()
				217	length = len(line)
				218	if length > max_line_length:
				219	if noqa(line):
				220	return
				221	if hasattr(line, 'decode'): # Python 2
				222	# The line could contain multi-byte characters
				223	try:
				224	length = len(line.decode('utf-8'))
				225	except UnicodeError:
				226	pass
				227	if length > max_line_length:
				228	return (max_line_length, "E501 line too long "
				229	"(%d > %d characters)" % (length, max_line_length))
				230
				231
				232	##############################################################################
				233	# Plugins (check functions) for logical lines
				234	##############################################################################
				235
				236
				237	def blank_lines(logical_line, blank_lines, indent_level, line_number,
				238	previous_logical, previous_indent_level):
				239	r"""
				240	Separate top-level function and class definitions with two blank lines.
				241
				242	Method definitions inside a class are separated by a single blank line.
				243
				244	Extra blank lines may be used (sparingly) to separate groups of related
				245	functions. Blank lines may be omitted between a bunch of related
				246	one-liners (e.g. a set of dummy implementations).
				247
				248	Use blank lines in functions, sparingly, to indicate logical sections.
				249
				250	Okay: def a():\n pass\n\n\ndef b():\n pass
				251	Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
				252
				253	E301: class Foo:\n b = 0\n def bar():\n pass
				254	E302: def a():\n pass\n\ndef b(n):\n pass
				255	E303: def a():\n pass\n\n\n\ndef b(n):\n pass
				256	E303: def a():\n\n\n\n pass
				257	E304: @decorator\n\ndef a():\n pass
				258	"""
				259	if line_number < 3 and not previous_logical:
				260	return # Don't expect blank lines before the first line
				261	if previous_logical.startswith('@'):
				262	if blank_lines:
				263	yield 0, "E304 blank lines found after function decorator"
				264	elif blank_lines > 2 or (indent_level and blank_lines == 2):
				265	yield 0, "E303 too many blank lines (%d)" % blank_lines
				266	elif logical_line.startswith(('def ', 'class ', '@')):
				267	if indent_level:
				268	if not (blank_lines or previous_indent_level < indent_level or
				269	DOCSTRING_REGEX.match(previous_logical)):
				270	yield 0, "E301 expected 1 blank line, found 0"
				271	elif blank_lines != 2:
				272	yield 0, "E302 expected 2 blank lines, found %d" % blank_lines
				273
				274
				275	def extraneous_whitespace(logical_line):
				276	"""
				277	Avoid extraneous whitespace in the following situations:
				278
				279	- Immediately inside parentheses, brackets or braces.
				280
				281	- Immediately before a comma, semicolon, or colon.
				282
				283	Okay: spam(ham[1], {eggs: 2})
				284	E201: spam( ham[1], {eggs: 2})
				285	E201: spam(ham[ 1], {eggs: 2})
				286	E201: spam(ham[1], { eggs: 2})
				287	E202: spam(ham[1], {eggs: 2} )
				288	E202: spam(ham[1 ], {eggs: 2})
				289	E202: spam(ham[1], {eggs: 2 })
				290
				291	E203: if x == 4: print x, y; x, y = y , x
				292	E203: if x == 4: print x, y ; x, y = y, x
				293	E203: if x == 4 : print x, y; x, y = y, x
				294	"""
				295	line = logical_line
				296	for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
				297	text = match.group()
				298	char = text.strip()
				299	found = match.start()
				300	if text == char + ' ':
				301	# assert char in '([{'
				302	yield found + 1, "E201 whitespace after '%s'" % char
				303	elif line[found - 1] != ',':
				304	code = ('E202' if char in '}])' else 'E203') # if char in ',;:'
				305	yield found, "%s whitespace before '%s'" % (code, char)
				306
				307
				308	def whitespace_around_keywords(logical_line):
				309	r"""
				310	Avoid extraneous whitespace around keywords.
				311
				312	Okay: True and False
				313	E271: True and False
				314	E272: True and False
				315	E273: True and\tFalse
				316	E274: True\tand False
				317	"""
				318	for match in KEYWORD_REGEX.finditer(logical_line):
				319	before, after = match.groups()
				320
				321	if '\t' in before:
				322	yield match.start(1), "E274 tab before keyword"
				323	elif len(before) > 1:
				324	yield match.start(1), "E272 multiple spaces before keyword"
				325
				326	if '\t' in after:
				327	yield match.start(2), "E273 tab after keyword"
				328	elif len(after) > 1:
				329	yield match.start(2), "E271 multiple spaces after keyword"
				330
				331
				332	def missing_whitespace(logical_line):
				333	"""
				334	JCR: Each comma, semicolon or colon should be followed by whitespace.
				335
				336	Okay: [a, b]
				337	Okay: (3,)
				338	Okay: a[1:4]
				339	Okay: a[:4]
				340	Okay: a[1:]
				341	Okay: a[1:4:2]
				342	E231: ['a','b']
				343	E231: foo(bar,baz)
				344	E231: [{'a':'b'}]
				345	"""
				346	line = logical_line
				347	for index in range(len(line) - 1):
				348	char = line[index]
				349	if char in ',;:' and line[index + 1] not in WHITESPACE:
				350	before = line[:index]
				351	if char == ':' and before.count('[') > before.count(']') and \
				352	before.rfind('{') < before.rfind('['):
				353	continue # Slice syntax, no space required
				354	if char == ',' and line[index + 1] == ')':
				355	continue # Allow tuple with only one element: (3,)
				356	yield index, "E231 missing whitespace after '%s'" % char
				357
				358
				359	def indentation(logical_line, previous_logical, indent_char,
				360	indent_level, previous_indent_level):
				361	r"""
				362	Use 4 spaces per indentation level.
				363
				364	For really old code that you don't want to mess up, you can continue to
				365	use 8-space tabs.
				366
				367	Okay: a = 1
				368	Okay: if a == 0:\n a = 1
				369	E111: a = 1
				370
				371	Okay: for item in items:\n pass
				372	E112: for item in items:\npass
				373
				374	Okay: a = 1\nb = 2
				375	E113: a = 1\n b = 2
				376	"""
				377	if indent_char == ' ' and indent_level % 4:
				378	yield 0, "E111 indentation is not a multiple of four"
				379	indent_expect = previous_logical.endswith(':')
				380	if indent_expect and indent_level <= previous_indent_level:
				381	yield 0, "E112 expected an indented block"
				382	if indent_level > previous_indent_level and not indent_expect:
				383	yield 0, "E113 unexpected indentation"
				384
				385
				386	def continuation_line_indentation(logical_line, tokens, indent_level, verbose):
				387	r"""
				388	Continuation lines should align wrapped elements either vertically using
				389	Python's implicit line joining inside parentheses, brackets and braces, or
				390	using a hanging indent.
				391
				392	When using a hanging indent the following considerations should be applied:
				393
				394	- there should be no arguments on the first line, and
				395
				396	- further indentation should be used to clearly distinguish itself as a
				397	continuation line.
				398
				399	Okay: a = (\n)
				400	E123: a = (\n )
				401
				402	Okay: a = (\n 42)
				403	E121: a = (\n 42)
				404	E122: a = (\n42)
				405	E123: a = (\n 42\n )
				406	E124: a = (24,\n 42\n)
				407	E125: if (a or\n b):\n pass
				408	E126: a = (\n 42)
				409	E127: a = (24,\n 42)
				410	E128: a = (24,\n 42)
				411	"""
				412	first_row = tokens[0][2][0]
				413	nrows = 1 + tokens[-1][2][0] - first_row
				414	if nrows == 1 or noqa(tokens[0][4]):
				415	return
				416
				417	# indent_next tells us whether the next block is indented; assuming
				418	# that it is indented by 4 spaces, then we should not allow 4-space
				419	# indents on the final continuation line; in turn, some other
				420	# indents are allowed to have an extra 4 spaces.
				421	indent_next = logical_line.endswith(':')
				422
				423	row = depth = 0
				424	# remember how many brackets were opened on each line
				425	parens = [0] * nrows
				426	# relative indents of physical lines
				427	rel_indent = [0] * nrows
				428	# visual indents
				429	indent_chances = {}
				430	last_indent = tokens[0][2]
				431	indent = [last_indent[1]]
				432	if verbose >= 3:
				433	print(">>> " + tokens[0][4].rstrip())
				434
				435	for token_type, text, start, end, line in tokens:
				436
				437	newline = row < start[0] - first_row
				438	if newline:
				439	row = start[0] - first_row
				440	newline = (not last_token_multiline and
				441	token_type not in (tokenize.NL, tokenize.NEWLINE))
				442
				443	if newline:
				444	# this is the beginning of a continuation line.
				445	last_indent = start
				446	if verbose >= 3:
				447	print("... " + line.rstrip())
				448
				449	# record the initial indent.
				450	rel_indent[row] = expand_indent(line) - indent_level
				451
				452	if depth:
				453	# a bracket expression in a continuation line.
				454	# find the line that it was opened on
				455	for open_row in range(row - 1, -1, -1):
				456	if parens[open_row]:
				457	break
				458	else:
				459	# an unbracketed continuation line (ie, backslash)
				460	open_row = 0
				461	hang = rel_indent[row] - rel_indent[open_row]
				462	visual_indent = indent_chances.get(start[1])
				463
				464	if token_type == tokenize.OP and text in ']})':
				465	# this line starts with a closing bracket
				466	if indent[depth]:
				467	if start[1] != indent[depth]:
				468	yield (start, "E124 closing bracket does not match "
				469	"visual indentation")
				470	elif hang:
				471	yield (start, "E123 closing bracket does not match "
				472	"indentation of opening bracket's line")
				473	elif visual_indent is True:
				474	# visual indent is verified
				475	if not indent[depth]:
				476	indent[depth] = start[1]
				477	elif visual_indent in (text, str):
				478	# ignore token lined up with matching one from a previous line
				479	pass
				480	elif indent[depth] and start[1] < indent[depth]:
				481	# visual indent is broken
				482	yield (start, "E128 continuation line "
				483	"under-indented for visual indent")
				484	elif hang == 4 or (indent_next and rel_indent[row] == 8):
				485	# hanging indent is verified
				486	pass
				487	else:
				488	# indent is broken
				489	if hang <= 0:
				490	error = "E122", "missing indentation or outdented"
				491	elif indent[depth]:
				492	error = "E127", "over-indented for visual indent"
				493	elif hang % 4:
				494	error = "E121", "indentation is not a multiple of four"
				495	else:
				496	error = "E126", "over-indented for hanging indent"
				497	yield start, "%s continuation line %s" % error
				498
				499	# look for visual indenting
				500	if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
				501	and not indent[depth]):
				502	indent[depth] = start[1]
				503	indent_chances[start[1]] = True
				504	if verbose >= 4:
				505	print("bracket depth %s indent to %s" % (depth, start[1]))
				506	# deal with implicit string concatenation
				507	elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
				508	text in ('u', 'ur', 'b', 'br')):
				509	indent_chances[start[1]] = str
				510	# special case for the "if" statement because len("if (") == 4
				511	elif not indent_chances and not row and not depth and text == 'if':
				512	indent_chances[end[1] + 1] = True
				513
				514	# keep track of bracket depth
				515	if token_type == tokenize.OP:
				516	if text in '([{':
				517	depth += 1
				518	indent.append(0)
				519	parens[row] += 1
				520	if verbose >= 4:
				521	print("bracket depth %s seen, col %s, visual min = %s" %
				522	(depth, start[1], indent[depth]))
				523	elif text in ')]}' and depth > 0:
				524	# parent indents should not be more than this one
				525	prev_indent = indent.pop() or last_indent[1]
				526	for d in range(depth):
				527	if indent[d] > prev_indent:
				528	indent[d] = 0
				529	for ind in list(indent_chances):
				530	if ind >= prev_indent:
				531	del indent_chances[ind]
				532	depth -= 1
				533	if depth:
				534	indent_chances[indent[depth]] = True
				535	for idx in range(row, -1, -1):
				536	if parens[idx]:
				537	parens[idx] -= 1
				538	break
				539	assert len(indent) == depth + 1
				540	if start[1] not in indent_chances:
				541	# allow to line up tokens
				542	indent_chances[start[1]] = text
				543
				544	last_token_multiline = (start[0] != end[0])
				545
				546	if indent_next and rel_indent[-1] == 4:
				547	yield (last_indent, "E125 continuation line does not distinguish "
				548	"itself from next logical line")
				549
				550
				551	def whitespace_before_parameters(logical_line, tokens):
				552	"""
				553	Avoid extraneous whitespace in the following situations:
				554
				555	- Immediately before the open parenthesis that starts the argument
				556	list of a function call.
				557
				558	- Immediately before the open parenthesis that starts an indexing or
				559	slicing.
				560
				561	Okay: spam(1)
				562	E211: spam (1)
				563
				564	Okay: dict['key'] = list[index]
				565	E211: dict ['key'] = list[index]
				566	E211: dict['key'] = list [index]
				567	"""
				568	prev_type = tokens[0][0]
				569	prev_text = tokens[0][1]
				570	prev_end = tokens[0][3]
				571	for index in range(1, len(tokens)):
				572	token_type, text, start, end, line = tokens[index]
				573	if (token_type == tokenize.OP and
				574	text in '([' and
				575	start != prev_end and
				576	(prev_type == tokenize.NAME or prev_text in '}])') and
				577	# Syntax "class A (B):" is allowed, but avoid it
				578	(index < 2 or tokens[index - 2][1] != 'class') and
				579	# Allow "return (a.foo for a in range(5))"
				580	not keyword.iskeyword(prev_text)):
				581	yield prev_end, "E211 whitespace before '%s'" % text
				582	prev_type = token_type
				583	prev_text = text
				584	prev_end = end
				585
				586
				587	def whitespace_around_operator(logical_line):
				588	r"""
				589	Avoid extraneous whitespace in the following situations:
				590
				591	- More than one space around an assignment (or other) operator to
				592	align it with another.
				593
				594	Okay: a = 12 + 3
				595	E221: a = 4 + 5
				596	E222: a = 4 + 5
				597	E223: a = 4\t+ 5
				598	E224: a = 4 +\t5
				599	"""
				600	for match in OPERATOR_REGEX.finditer(logical_line):
				601	before, after = match.groups()
				602
				603	if '\t' in before:
				604	yield match.start(1), "E223 tab before operator"
				605	elif len(before) > 1:
				606	yield match.start(1), "E221 multiple spaces before operator"
				607
				608	if '\t' in after:
				609	yield match.start(2), "E224 tab after operator"
				610	elif len(after) > 1:
				611	yield match.start(2), "E222 multiple spaces after operator"
				612
				613
				614	def missing_whitespace_around_operator(logical_line, tokens):
				615	r"""
				616	- Always surround these binary operators with a single space on
				617	either side: assignment (=), augmented assignment (+=, -= etc.),
				618	comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
				619	Booleans (and, or, not).
				620
				621	- Use spaces around arithmetic operators.
				622
				623	Okay: i = i + 1
				624	Okay: submitted += 1
				625	Okay: x = x * 2 - 1
				626	Okay: hypot2 = x * x + y * y
				627	Okay: c = (a + b) * (a - b)
				628	Okay: foo(bar, key='word', args, *kwargs)
				629	Okay: alpha[:-i]
				630
				631	E225: i=i+1
				632	E225: submitted +=1
				633	E225: x = x /2 - 1
				634	E225: z = x **y
				635	E226: c = (a+b) * (a-b)
				636	E226: hypot2 = xx + yy
				637	E227: c = a\|b
				638	E228: msg = fmt%(errno, errmsg)
				639	"""
				640	parens = 0
				641	need_space = False
				642	prev_type = tokenize.OP
				643	prev_text = prev_end = None
				644	for token_type, text, start, end, line in tokens:
				645	if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
				646	# ERRORTOKEN is triggered by backticks in Python 3
				647	continue
				648	if text in ('(', 'lambda'):
				649	parens += 1
				650	elif text == ')':
				651	parens -= 1
				652	if need_space:
				653	if start != prev_end:
				654	# Found a (probably) needed space
				655	if need_space is not True and not need_space[1]:
				656	yield (need_space[0],
				657	"E225 missing whitespace around operator")
				658	need_space = False
				659	elif text == '>' and prev_text in ('<', '-'):
				660	# Tolerate the "<>" operator, even if running Python 3
				661	# Deal with Python 3's annotated return value "->"
				662	pass
				663	else:
				664	if need_space is True or need_space[1]:
				665	# A needed trailing space was not found
				666	yield prev_end, "E225 missing whitespace around operator"
				667	else:
				668	code, optype = 'E226', 'arithmetic'
				669	if prev_text == '%':
				670	code, optype = 'E228', 'modulo'
				671	elif prev_text not in ARITHMETIC_OP:
				672	code, optype = 'E227', 'bitwise or shift'
				673	yield (need_space[0], "%s missing whitespace "
				674	"around %s operator" % (code, optype))
				675	need_space = False
				676	elif token_type == tokenize.OP and prev_end is not None:
				677	if text == '=' and parens:
				678	# Allow keyword args or defaults: foo(bar=None).
				679	pass
				680	elif text in WS_NEEDED_OPERATORS:
				681	need_space = True
				682	elif text in UNARY_OPERATORS:
				683	# Check if the operator is being used as a binary operator
				684	# Allow unary operators: -123, -x, +1.
				685	# Allow argument unpacking: foo(args, *kwargs).
				686	if prev_type == tokenize.OP:
				687	binary_usage = (prev_text in '}])')
				688	elif prev_type == tokenize.NAME:
				689	binary_usage = (prev_text not in KEYWORDS)
				690	else:
				691	binary_usage = (prev_type not in SKIP_TOKENS)
				692
				693	if binary_usage:
				694	if text in WS_OPTIONAL_OPERATORS:
				695	need_space = None
				696	else:
				697	need_space = True
				698	elif text in WS_OPTIONAL_OPERATORS:
				699	need_space = None
				700
				701	if need_space is None:
				702	# Surrounding space is optional, but ensure that
				703	# trailing space matches opening space
				704	need_space = (prev_end, start != prev_end)
				705	elif need_space and start == prev_end:
				706	# A needed opening space was not found
				707	yield prev_end, "E225 missing whitespace around operator"
				708	need_space = False
				709	prev_type = token_type
				710	prev_text = text
				711	prev_end = end
				712
				713
				714	def whitespace_around_comma(logical_line):
				715	r"""
				716	Avoid extraneous whitespace in the following situations:
				717
				718	- More than one space around an assignment (or other) operator to
				719	align it with another.
				720
				721	Note: these checks are disabled by default
				722
				723	Okay: a = (1, 2)
				724	E241: a = (1, 2)
				725	E242: a = (1,\t2)
				726	"""
				727	line = logical_line
				728	for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
				729	found = m.start() + 1
				730	if '\t' in m.group():
				731	yield found, "E242 tab after '%s'" % m.group()[0]
				732	else:
				733	yield found, "E241 multiple spaces after '%s'" % m.group()[0]
				734
				735
				736	def whitespace_around_named_parameter_equals(logical_line, tokens):
				737	"""
				738	Don't use spaces around the '=' sign when used to indicate a
				739	keyword argument or a default parameter value.
				740
				741	Okay: def complex(real, imag=0.0):
				742	Okay: return magic(r=real, i=imag)
				743	Okay: boolean(a == b)
				744	Okay: boolean(a != b)
				745	Okay: boolean(a <= b)
				746	Okay: boolean(a >= b)
				747
				748	E251: def complex(real, imag = 0.0):
				749	E251: return magic(r = real, i = imag)
				750	"""
				751	parens = 0
				752	no_space = False
				753	prev_end = None
				754	message = "E251 unexpected spaces around keyword / parameter equals"
				755	for token_type, text, start, end, line in tokens:
				756	if no_space:
				757	no_space = False
				758	if start != prev_end:
				759	yield (prev_end, message)
				760	elif token_type == tokenize.OP:
				761	if text == '(':
				762	parens += 1
				763	elif text == ')':
				764	parens -= 1
				765	elif parens and text == '=':
				766	no_space = True
				767	if start != prev_end:
				768	yield (prev_end, message)
				769	prev_end = end
				770
				771
				772	def whitespace_before_inline_comment(logical_line, tokens):
				773	"""
				774	Separate inline comments by at least two spaces.
				775
				776	An inline comment is a comment on the same line as a statement. Inline
				777	comments should be separated by at least two spaces from the statement.
				778	They should start with a # and a single space.
				779
				780	Okay: x = x + 1 # Increment x
				781	Okay: x = x + 1 # Increment x
				782	E261: x = x + 1 # Increment x
				783	E262: x = x + 1 #Increment x
				784	E262: x = x + 1 # Increment x
				785	"""
				786	prev_end = (0, 0)
				787	for token_type, text, start, end, line in tokens:
				788	if token_type == tokenize.COMMENT:
				789	if not line[:start[1]].strip():
				790	continue
				791	if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
				792	yield (prev_end,
				793	"E261 at least two spaces before inline comment")
				794	symbol, sp, comment = text.partition(' ')
				795	if symbol not in ('#', '#:') or comment[:1].isspace():
				796	yield start, "E262 inline comment should start with '# '"
				797	elif token_type != tokenize.NL:
				798	prev_end = end
				799
				800
				801	def imports_on_separate_lines(logical_line):
				802	r"""
				803	Imports should usually be on separate lines.
				804
				805	Okay: import os\nimport sys
				806	E401: import sys, os
				807
				808	Okay: from subprocess import Popen, PIPE
				809	Okay: from myclas import MyClass
				810	Okay: from foo.bar.yourclass import YourClass
				811	Okay: import myclass
				812	Okay: import foo.bar.yourclass
				813	"""
				814	line = logical_line
				815	if line.startswith('import '):
				816	found = line.find(',')
				817	if -1 < found and ';' not in line[:found]:
				818	yield found, "E401 multiple imports on one line"
				819
				820
				821	def compound_statements(logical_line):
				822	r"""
				823	Compound statements (multiple statements on the same line) are
				824	generally discouraged.
				825
				826	While sometimes it's okay to put an if/for/while with a small body
				827	on the same line, never do this for multi-clause statements. Also
				828	avoid folding such long lines!
				829
				830	Okay: if foo == 'blah':\n do_blah_thing()
				831	Okay: do_one()
				832	Okay: do_two()
				833	Okay: do_three()
				834
				835	E701: if foo == 'blah': do_blah_thing()
				836	E701: for x in lst: total += x
				837	E701: while t < 10: t = delay()
				838	E701: if foo == 'blah': do_blah_thing()
				839	E701: else: do_non_blah_thing()
				840	E701: try: something()
				841	E701: finally: cleanup()
				842	E701: if foo == 'blah': one(); two(); three()
				843
				844	E702: do_one(); do_two(); do_three()
				845	E703: do_four(); # useless semicolon
				846	"""
				847	line = logical_line
				848	last_char = len(line) - 1
				849	found = line.find(':')
				850	if -1 < found < last_char:
				851	before = line[:found]
				852	if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
				853	before.count('[') <= before.count(']') and # [1:2] (slice)
				854	before.count('(') <= before.count(')') and # (Python 3 annotation)
				855	not LAMBDA_REGEX.search(before)): # lambda x: x
				856	yield found, "E701 multiple statements on one line (colon)"
				857	found = line.find(';')
				858	if -1 < found:
				859	if found < last_char:
				860	yield found, "E702 multiple statements on one line (semicolon)"
				861	else:
				862	yield found, "E703 statement ends with a semicolon"
				863
				864
				865	def explicit_line_join(logical_line, tokens):
				866	r"""
				867	Avoid explicit line join between brackets.
				868
				869	The preferred way of wrapping long lines is by using Python's implied line
				870	continuation inside parentheses, brackets and braces. Long lines can be
				871	broken over multiple lines by wrapping expressions in parentheses. These
				872	should be used in preference to using a backslash for line continuation.
				873
				874	E502: aaa = [123, \\n 123]
				875	E502: aaa = ("bbb " \\n "ccc")
				876
				877	Okay: aaa = [123,\n 123]
				878	Okay: aaa = ("bbb "\n "ccc")
				879	Okay: aaa = "bbb " \\n "ccc"
				880	"""
				881	prev_start = prev_end = parens = 0
				882	for token_type, text, start, end, line in tokens:
				883	if start[0] != prev_start and parens and backslash:
				884	yield backslash, "E502 the backslash is redundant between brackets"
				885	if end[0] != prev_end:
				886	if line.rstrip('\r\n').endswith('\\'):
				887	backslash = (end[0], len(line.splitlines()[-1]) - 1)
				888	else:
				889	backslash = None
				890	prev_start = prev_end = end[0]
				891	else:
				892	prev_start = start[0]
				893	if token_type == tokenize.OP:
				894	if text in '([{':
				895	parens += 1
				896	elif text in ')]}':
				897	parens -= 1
				898
				899
				900	def comparison_to_singleton(logical_line):
				901	"""
				902	Comparisons to singletons like None should always be done
				903	with "is" or "is not", never the equality operators.
				904
				905	Okay: if arg is not None:
				906	E711: if arg != None:
				907	E712: if arg == True:
				908
				909	Also, beware of writing if x when you really mean if x is not None --
				910	e.g. when testing whether a variable or argument that defaults to None was
				911	set to some other value. The other value might have a type (such as a
				912	container) that could be false in a boolean context!
				913	"""
				914	match = COMPARE_SINGLETON_REGEX.search(logical_line)
				915	if match:
				916	same = (match.group(1) == '==')
				917	singleton = match.group(2)
				918	msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
				919	if singleton in ('None',):
				920	code = 'E711'
				921	else:
				922	code = 'E712'
				923	nonzero = ((singleton == 'True' and same) or
				924	(singleton == 'False' and not same))
				925	msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
				926	yield match.start(1), ("%s comparison to %s should be %s" %
				927	(code, singleton, msg))
				928
				929
				930	def comparison_type(logical_line):
				931	"""
				932	Object type comparisons should always use isinstance() instead of
				933	comparing types directly.
				934
				935	Okay: if isinstance(obj, int):
				936	E721: if type(obj) is type(1):
				937
				938	When checking if an object is a string, keep in mind that it might be a
				939	unicode string too! In Python 2.3, str and unicode have a common base
				940	class, basestring, so you can do:
				941
				942	Okay: if isinstance(obj, basestring):
				943	Okay: if type(a1) is type(b1):
				944	"""
				945	match = COMPARE_TYPE_REGEX.search(logical_line)
				946	if match:
				947	inst = match.group(1)
				948	if inst and isidentifier(inst) and inst not in SINGLETONS:
				949	return # Allow comparison for types which are not obvious
				950	yield match.start(0), "E721 do not compare types, use 'isinstance()'"
				951
				952
				953	def python_3000_has_key(logical_line):
				954	r"""
				955	The {}.has_key() method is removed in the Python 3.
				956	Use the 'in' operation instead.
				957
				958	Okay: if "alph" in d:\n print d["alph"]
				959	W601: assert d.has_key('alph')
				960	"""
				961	pos = logical_line.find('.has_key(')
				962	if pos > -1:
				963	yield pos, "W601 .has_key() is deprecated, use 'in'"
				964
				965
				966	def python_3000_raise_comma(logical_line):
				967	"""
				968	When raising an exception, use "raise ValueError('message')"
				969	instead of the older form "raise ValueError, 'message'".
				970
				971	The paren-using form is preferred because when the exception arguments
				972	are long or include string formatting, you don't need to use line
				973	continuation characters thanks to the containing parentheses. The older
				974	form is removed in Python 3.
				975
				976	Okay: raise DummyError("Message")
				977	W602: raise DummyError, "Message"
				978	"""
				979	match = RAISE_COMMA_REGEX.match(logical_line)
				980	if match and not RERAISE_COMMA_REGEX.match(logical_line):
				981	yield match.start(1), "W602 deprecated form of raising exception"
				982
				983
				984	def python_3000_not_equal(logical_line):
				985	"""
				986	!= can also be written <>, but this is an obsolete usage kept for
				987	backwards compatibility only. New code should always use !=.
				988	The older syntax is removed in Python 3.
				989
				990	Okay: if a != 'no':
				991	W603: if a <> 'no':
				992	"""
				993	pos = logical_line.find('<>')
				994	if pos > -1:
				995	yield pos, "W603 '<>' is deprecated, use '!='"
				996
				997
				998	def python_3000_backticks(logical_line):
				999	"""
				1000	Backticks are removed in Python 3.
				1001	Use repr() instead.
				1002
				1003	Okay: val = repr(1 + 2)
				1004	W604: val = `1 + 2`
				1005	"""
				1006	pos = logical_line.find('`')
				1007	if pos > -1:
				1008	yield pos, "W604 backticks are deprecated, use 'repr()'"
				1009
				1010
				1011	##############################################################################
				1012	# Helper functions
				1013	##############################################################################
				1014
				1015
				1016	if '' == ''.encode():
				1017	# Python 2: implicit encoding.
				1018	def readlines(filename):
				1019	f = open(filename)
				1020	try:
				1021	return f.readlines()
				1022	finally:
				1023	f.close()
				1024
				1025	isidentifier = re.compile(r'[a-zA-Z_]\w*').match
				1026	stdin_get_value = sys.stdin.read
				1027	else:
				1028	# Python 3
				1029	def readlines(filename):
				1030	f = open(filename, 'rb')
				1031	try:
				1032	coding, lines = tokenize.detect_encoding(f.readline)
				1033	f = TextIOWrapper(f, coding, line_buffering=True)
				1034	return [l.decode(coding) for l in lines] + f.readlines()
				1035	except (LookupError, SyntaxError, UnicodeError):
				1036	f.close()
				1037	# Fall back if files are improperly declared
				1038	f = open(filename, encoding='latin-1')
				1039	return f.readlines()
				1040	finally:
				1041	f.close()
				1042
				1043	isidentifier = str.isidentifier
				1044
				1045	def stdin_get_value():
				1046	return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
				1047	readlines.__doc__ = " Read the source code."
				1048	noqa = re.compile(r'# no(?:qa\|pep8)\b', re.I).search
				1049
				1050
				1051	def expand_indent(line):
				1052	r"""
				1053	Return the amount of indentation.
				1054	Tabs are expanded to the next multiple of 8.
				1055
				1056	>>> expand_indent(' ')
				1057	4
				1058	>>> expand_indent('\t')
				1059	8
				1060	>>> expand_indent(' \t')
				1061	8
				1062	>>> expand_indent(' \t')
				1063	8
				1064	>>> expand_indent(' \t')
				1065	16
				1066	"""
				1067	if '\t' not in line:
				1068	return len(line) - len(line.lstrip())
				1069	result = 0
				1070	for char in line:
				1071	if char == '\t':
				1072	result = result // 8 * 8 + 8
				1073	elif char == ' ':
				1074	result += 1
				1075	else:
				1076	break
				1077	return result
				1078
				1079
				1080	def mute_string(text):
				1081	"""
				1082	Replace contents with 'xxx' to prevent syntax matching.
				1083
				1084	>>> mute_string('"abc"')
				1085	'"xxx"'
				1086	>>> mute_string("'''abc'''")
				1087	"'''xxx'''"
				1088	>>> mute_string("r'abc'")
				1089	"r'xxx'"
				1090	"""
				1091	# String modifiers (e.g. u or r)
				1092	start = text.index(text[-1]) + 1
				1093	end = len(text) - 1
				1094	# Triple quotes
				1095	if text[-3:] in ('"""', "'''"):
				1096	start += 2
				1097	end -= 2
				1098	return text[:start] + 'x' * (end - start) + text[end:]
				1099
				1100
				1101	def parse_udiff(diff, patterns=None, parent='.'):
				1102	"""Return a dictionary of matching lines."""
				1103	# For each file of the diff, the entry key is the filename,
				1104	# and the value is a set of row numbers to consider.
				1105	rv = {}
				1106	path = nrows = None
				1107	for line in diff.splitlines():
				1108	if nrows:
				1109	if line[:1] != '-':
				1110	nrows -= 1
				1111	continue
				1112	if line[:3] == '@@ ':
				1113	hunk_match = HUNK_REGEX.match(line)
				1114	row, nrows = [int(g or '1') for g in hunk_match.groups()]
				1115	rv[path].update(range(row, row + nrows))
				1116	elif line[:3] == '+++':
				1117	path = line[4:].split('\t', 1)[0]
				1118	if path[:2] == 'b/':
				1119	path = path[2:]
				1120	rv[path] = set()
				1121	return dict([(os.path.join(parent, path), rows)
				1122	for (path, rows) in rv.items()
				1123	if rows and filename_match(path, patterns)])
				1124
				1125
				1126	def filename_match(filename, patterns, default=True):
				1127	"""
				1128	Check if patterns contains a pattern that matches filename.
				1129	If patterns is unspecified, this always returns True.
				1130	"""
				1131	if not patterns:
				1132	return default
				1133	return any(fnmatch(filename, pattern) for pattern in patterns)
				1134
				1135
				1136	##############################################################################
				1137	# Framework to run all checks
				1138	##############################################################################
				1139
				1140
				1141	_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
				1142
				1143
				1144	def register_check(check, codes=None):
				1145	"""
				1146	Register a new check object.
				1147	"""
				1148	def _add_check(check, kind, codes, args):
				1149	if check in _checks[kind]:
				1150	_checks[kind][check][0].extend(codes or [])
				1151	else:
				1152	_checks[kind][check] = (codes or [''], args)
				1153	if inspect.isfunction(check):
				1154	args = inspect.getargspec(check)[0]
				1155	if args and args[0] in ('physical_line', 'logical_line'):
				1156	if codes is None:
				1157	codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
				1158	_add_check(check, args[0], codes, args)
				1159	elif inspect.isclass(check):
				1160	if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
				1161	_add_check(check, 'tree', codes, None)
				1162
				1163
				1164	def init_checks_registry():
				1165	"""
				1166	Register all globally visible functions where the first argument name
				1167	is 'physical_line' or 'logical_line'.
				1168	"""
				1169	mod = inspect.getmodule(register_check)
				1170	for (name, function) in inspect.getmembers(mod, inspect.isfunction):
				1171	register_check(function)
				1172	init_checks_registry()
				1173
				1174
				1175	class Checker(object):
				1176	"""
				1177	Load a Python source file, tokenize it, check coding style.
				1178	"""
				1179
				1180	def __init__(self, filename=None, lines=None,
				1181	options=None, report=None, **kwargs):
				1182	if options is None:
				1183	options = StyleGuide(kwargs).options
				1184	else:
				1185	assert not kwargs
				1186	self._io_error = None
				1187	self._physical_checks = options.physical_checks
				1188	self._logical_checks = options.logical_checks
				1189	self._ast_checks = options.ast_checks
				1190	self.max_line_length = options.max_line_length
				1191	self.verbose = options.verbose
				1192	self.filename = filename
				1193	if filename is None:
				1194	self.filename = 'stdin'
				1195	self.lines = lines or []
				1196	elif filename == '-':
				1197	self.filename = 'stdin'
				1198	self.lines = stdin_get_value().splitlines(True)
				1199	elif lines is None:
				1200	try:
				1201	self.lines = readlines(filename)
				1202	except IOError:
				1203	exc_type, exc = sys.exc_info()[:2]
				1204	self._io_error = '%s: %s' % (exc_type.__name__, exc)
				1205	self.lines = []
				1206	else:
				1207	self.lines = lines
				1208	self.report = report or options.report
				1209	self.report_error = self.report.error
				1210
				1211	def report_invalid_syntax(self):
				1212	exc_type, exc = sys.exc_info()[:2]
				1213	offset = exc.args[1]
				1214	if len(offset) > 2:
				1215	offset = offset[1:3]
				1216	self.report_error(offset[0], offset[1] or 0,
				1217	'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
				1218	self.report_invalid_syntax)
				1219	report_invalid_syntax.__doc__ = " Check if the syntax is valid."
				1220
				1221	def readline(self):
				1222	"""
				1223	Get the next line from the input buffer.
				1224	"""
				1225	self.line_number += 1
				1226	if self.line_number > len(self.lines):
				1227	return ''
				1228	return self.lines[self.line_number - 1]
				1229
				1230	def readline_check_physical(self):
				1231	"""
				1232	Check and return the next physical line. This method can be
				1233	used to feed tokenize.generate_tokens.
				1234	"""
				1235	line = self.readline()
				1236	if line:
				1237	self.check_physical(line)
				1238	return line
				1239
				1240	def run_check(self, check, argument_names):
				1241	"""
				1242	Run a check plugin.
				1243	"""
				1244	arguments = []
				1245	for name in argument_names:
				1246	arguments.append(getattr(self, name))
				1247	return check(*arguments)
				1248
				1249	def check_physical(self, line):
				1250	"""
				1251	Run all physical checks on a raw input line.
				1252	"""
				1253	self.physical_line = line
				1254	if self.indent_char is None and line[:1] in WHITESPACE:
				1255	self.indent_char = line[0]
				1256	for name, check, argument_names in self._physical_checks:
				1257	result = self.run_check(check, argument_names)
				1258	if result is not None:
				1259	offset, text = result
				1260	self.report_error(self.line_number, offset, text, check)
				1261
				1262	def build_tokens_line(self):
				1263	"""
				1264	Build a logical line from tokens.
				1265	"""
				1266	self.mapping = []
				1267	logical = []
				1268	length = 0
				1269	previous = None
				1270	for token in self.tokens:
				1271	token_type, text = token[0:2]
				1272	if token_type in SKIP_TOKENS:
				1273	continue
				1274	if token_type == tokenize.STRING:
				1275	text = mute_string(text)
				1276	if previous:
				1277	end_row, end = previous[3]
				1278	start_row, start = token[2]
				1279	if end_row != start_row: # different row
				1280	prev_text = self.lines[end_row - 1][end - 1]
				1281	if prev_text == ',' or (prev_text not in '{[('
				1282	and text not in '}])'):
				1283	logical.append(' ')
				1284	length += 1
				1285	elif end != start: # different column
				1286	fill = self.lines[end_row - 1][end:start]
				1287	logical.append(fill)
				1288	length += len(fill)
				1289	self.mapping.append((length, token))
				1290	logical.append(text)
				1291	length += len(text)
				1292	previous = token
				1293	self.logical_line = ''.join(logical)
				1294	# With Python 2, if the line ends with '\r\r\n' the assertion fails
				1295	# assert self.logical_line.strip() == self.logical_line
				1296
				1297	def check_logical(self):
				1298	"""
				1299	Build a line from tokens and run all logical checks on it.
				1300	"""
				1301	self.build_tokens_line()
				1302	self.report.increment_logical_line()
				1303	first_line = self.lines[self.mapping[0][1][2][0] - 1]
				1304	indent = first_line[:self.mapping[0][1][2][1]]
				1305	self.previous_indent_level = self.indent_level
				1306	self.indent_level = expand_indent(indent)
				1307	if self.verbose >= 2:
				1308	print(self.logical_line[:80].rstrip())
				1309	for name, check, argument_names in self._logical_checks:
				1310	if self.verbose >= 4:
				1311	print(' ' + name)
				1312	for result in self.run_check(check, argument_names):
				1313	offset, text = result
				1314	if isinstance(offset, tuple):
				1315	orig_number, orig_offset = offset
				1316	else:
				1317	for token_offset, token in self.mapping:
				1318	if offset >= token_offset:
				1319	orig_number = token[2][0]
				1320	orig_offset = (token[2][1] + offset - token_offset)
				1321	self.report_error(orig_number, orig_offset, text, check)
				1322	self.previous_logical = self.logical_line
				1323
				1324	def check_ast(self):
				1325	try:
				1326	tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
				1327	except SyntaxError:
				1328	return self.report_invalid_syntax()
				1329	for name, cls, _ in self._ast_checks:
				1330	checker = cls(tree, self.filename)
				1331	for lineno, offset, text, check in checker.run():
				1332	if not noqa(self.lines[lineno - 1]):
				1333	self.report_error(lineno, offset, text, check)
				1334
				1335	def generate_tokens(self):
				1336	if self._io_error:
				1337	self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
				1338	tokengen = tokenize.generate_tokens(self.readline_check_physical)
				1339	try:
				1340	for token in tokengen:
				1341	yield token
				1342	except (SyntaxError, tokenize.TokenError):
				1343	self.report_invalid_syntax()
				1344
				1345	def check_all(self, expected=None, line_offset=0):
				1346	"""
				1347	Run all checks on the input file.
				1348	"""
				1349	self.report.init_file(self.filename, self.lines, expected, line_offset)
				1350	if self._ast_checks:
				1351	self.check_ast()
				1352	self.line_number = 0
				1353	self.indent_char = None
				1354	self.indent_level = 0
				1355	self.previous_logical = ''
				1356	self.tokens = []
				1357	self.blank_lines = blank_lines_before_comment = 0
				1358	parens = 0
				1359	for token in self.generate_tokens():
				1360	self.tokens.append(token)
				1361	token_type, text = token[0:2]
				1362	if self.verbose >= 3:
				1363	if token[2][0] == token[3][0]:
				1364	pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
				1365	else:
				1366	pos = 'l.%s' % token[3][0]
				1367	print('l.%s\t%s\t%s\t%r' %
				1368	(token[2][0], pos, tokenize.tok_name[token[0]], text))
				1369	if token_type == tokenize.OP:
				1370	if text in '([{':
				1371	parens += 1
				1372	elif text in '}])':
				1373	parens -= 1
				1374	elif not parens:
				1375	if token_type == tokenize.NEWLINE:
				1376	if self.blank_lines < blank_lines_before_comment:
				1377	self.blank_lines = blank_lines_before_comment
				1378	self.check_logical()
				1379	self.tokens = []
				1380	self.blank_lines = blank_lines_before_comment = 0
				1381	elif token_type == tokenize.NL:
				1382	if len(self.tokens) == 1:
				1383	# The physical line contains only this token.
				1384	self.blank_lines += 1
				1385	self.tokens = []
				1386	elif token_type == tokenize.COMMENT and len(self.tokens) == 1:
				1387	if blank_lines_before_comment < self.blank_lines:
				1388	blank_lines_before_comment = self.blank_lines
				1389	self.blank_lines = 0
				1390	if COMMENT_WITH_NL:
				1391	# The comment also ends a physical line
				1392	self.tokens = []
				1393	return self.report.get_file_results()
				1394
				1395
				1396	class BaseReport(object):
				1397	"""Collect the results of the checks."""
				1398	print_filename = False
				1399
				1400	def __init__(self, options):
				1401	self._benchmark_keys = options.benchmark_keys
				1402	self._ignore_code = options.ignore_code
				1403	# Results
				1404	self.elapsed = 0
				1405	self.total_errors = 0
				1406	self.counters = dict.fromkeys(self._benchmark_keys, 0)
				1407	self.messages = {}
				1408
				1409	def start(self):
				1410	"""Start the timer."""
				1411	self._start_time = time.time()
				1412
				1413	def stop(self):
				1414	"""Stop the timer."""
				1415	self.elapsed = time.time() - self._start_time
				1416
				1417	def init_file(self, filename, lines, expected, line_offset):
				1418	"""Signal a new file."""
				1419	self.filename = filename
				1420	self.lines = lines
				1421	self.expected = expected or ()
				1422	self.line_offset = line_offset
				1423	self.file_errors = 0
				1424	self.counters['files'] += 1
				1425	self.counters['physical lines'] += len(lines)
				1426
				1427	def increment_logical_line(self):
				1428	"""Signal a new logical line."""
				1429	self.counters['logical lines'] += 1
				1430
				1431	def error(self, line_number, offset, text, check):
				1432	"""Report an error, according to options."""
				1433	code = text[:4]
				1434	if self._ignore_code(code):
				1435	return
				1436	if code in self.counters:
				1437	self.counters[code] += 1
				1438	else:
				1439	self.counters[code] = 1
				1440	self.messages[code] = text[5:]
				1441	# Don't care about expected errors or warnings
				1442	if code in self.expected:
				1443	return
				1444	if self.print_filename and not self.file_errors:
				1445	print(self.filename)
				1446	self.file_errors += 1
				1447	self.total_errors += 1
				1448	return code
				1449
				1450	def get_file_results(self):
				1451	"""Return the count of errors and warnings for this file."""
				1452	return self.file_errors
				1453
				1454	def get_count(self, prefix=''):
				1455	"""Return the total count of errors and warnings."""
				1456	return sum([self.counters[key]
				1457	for key in self.messages if key.startswith(prefix)])
				1458
				1459	def get_statistics(self, prefix=''):
				1460	"""
				1461	Get statistics for message codes that start with the prefix.
				1462
				1463	prefix='' matches all errors and warnings
				1464	prefix='E' matches all errors
				1465	prefix='W' matches all warnings
				1466	prefix='E4' matches all errors that have to do with imports
				1467	"""
				1468	return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
				1469	for key in sorted(self.messages) if key.startswith(prefix)]
				1470
				1471	def print_statistics(self, prefix=''):
				1472	"""Print overall statistics (number of errors and warnings)."""
				1473	for line in self.get_statistics(prefix):
				1474	print(line)
				1475
				1476	def print_benchmark(self):
				1477	"""Print benchmark numbers."""
				1478	print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
				1479	if self.elapsed:
				1480	for key in self._benchmark_keys:
				1481	print('%-7d %s per second (%d total)' %
				1482	(self.counters[key] / self.elapsed, key,
				1483	self.counters[key]))
				1484
				1485
				1486	class FileReport(BaseReport):
				1487	"""Collect the results of the checks and print only the filenames."""
				1488	print_filename = True
				1489
				1490
				1491	class StandardReport(BaseReport):
				1492	"""Collect and print the results of the checks."""
				1493
				1494	def __init__(self, options):
				1495	super(StandardReport, self).__init__(options)
				1496	self._fmt = REPORT_FORMAT.get(options.format.lower(),
				1497	options.format)
				1498	self._repeat = options.repeat
				1499	self._show_source = options.show_source
				1500	self._show_pep8 = options.show_pep8
				1501
				1502	def init_file(self, filename, lines, expected, line_offset):
				1503	"""Signal a new file."""
				1504	self._deferred_print = []
				1505	return super(StandardReport, self).init_file(
				1506	filename, lines, expected, line_offset)
				1507
				1508	def error(self, line_number, offset, text, check):
				1509	"""Report an error, according to options."""
				1510	code = super(StandardReport, self).error(line_number, offset,
				1511	text, check)
				1512	if code and (self.counters[code] == 1 or self._repeat):
				1513	self._deferred_print.append(
				1514	(line_number, offset, code, text[5:], check.__doc__))
				1515	return code
				1516
				1517	def get_file_results(self):
				1518	"""Print the result and return the overall count for this file."""
				1519	self._deferred_print.sort()
				1520	for line_number, offset, code, text, doc in self._deferred_print:
				1521	print(self._fmt % {
				1522	'path': self.filename,
				1523	'row': self.line_offset + line_number, 'col': offset + 1,
				1524	'code': code, 'text': text,
				1525	})
				1526	if self._show_source:
				1527	if line_number > len(self.lines):
				1528	line = ''
				1529	else:
				1530	line = self.lines[line_number - 1]
				1531	print(line.rstrip())
				1532	print(' ' * offset + '^')
				1533	if self._show_pep8 and doc:
				1534	print(doc.lstrip('\n').rstrip())
				1535	return self.file_errors
				1536
				1537
				1538	class DiffReport(StandardReport):
				1539	"""Collect and print the results for the changed lines only."""
				1540
				1541	def __init__(self, options):
				1542	super(DiffReport, self).__init__(options)
				1543	self._selected = options.selected_lines
				1544
				1545	def error(self, line_number, offset, text, check):
				1546	if line_number not in self._selected[self.filename]:
				1547	return
				1548	return super(DiffReport, self).error(line_number, offset, text, check)
				1549
				1550
				1551	class StyleGuide(object):
				1552	"""Initialize a PEP-8 instance with few options."""
				1553
				1554	def __init__(self, args, *kwargs):
				1555	# build options from the command line
				1556	self.checker_class = kwargs.pop('checker_class', Checker)
				1557	parse_argv = kwargs.pop('parse_argv', False)
				1558	config_file = kwargs.pop('config_file', None)
				1559	parser = kwargs.pop('parser', None)
				1560	options, self.paths = process_options(
				1561	parse_argv=parse_argv, config_file=config_file, parser=parser)
				1562	if args or kwargs:
				1563	# build options from dict
				1564	options_dict = dict(args, *kwargs)
				1565	options.__dict__.update(options_dict)
				1566	if 'paths' in options_dict:
				1567	self.paths = options_dict['paths']
				1568
				1569	self.runner = self.input_file
				1570	self.options = options
				1571
				1572	if not options.reporter:
				1573	options.reporter = BaseReport if options.quiet else StandardReport
				1574
				1575	for index, value in enumerate(options.exclude):
				1576	options.exclude[index] = value.rstrip('/')
				1577	# Ignore all checks which are not explicitly selected
				1578	options.select = tuple(options.select or ())
				1579	options.ignore = tuple(options.ignore or options.select and ('',))
				1580	options.benchmark_keys = BENCHMARK_KEYS[:]
				1581	options.ignore_code = self.ignore_code
				1582	options.physical_checks = self.get_checks('physical_line')
				1583	options.logical_checks = self.get_checks('logical_line')
				1584	options.ast_checks = self.get_checks('tree')
				1585	self.init_report()
				1586
				1587	def init_report(self, reporter=None):
				1588	"""Initialize the report instance."""
				1589	self.options.report = (reporter or self.options.reporter)(self.options)
				1590	return self.options.report
				1591
				1592	def check_files(self, paths=None):
				1593	"""Run all checks on the paths."""
				1594	if paths is None:
				1595	paths = self.paths
				1596	report = self.options.report
				1597	runner = self.runner
				1598	report.start()
				1599	try:
				1600	for path in paths:
				1601	if os.path.isdir(path):
				1602	self.input_dir(path)
				1603	elif not self.excluded(path):
				1604	runner(path)
				1605	except KeyboardInterrupt:
				1606	print('... stopped')
				1607	report.stop()
				1608	return report
				1609
				1610	def input_file(self, filename, lines=None, expected=None, line_offset=0):
				1611	"""Run all checks on a Python source file."""
				1612	if self.options.verbose:
				1613	print('checking %s' % filename)
				1614	fchecker = self.checker_class(
				1615	filename, lines=lines, options=self.options)
				1616	return fchecker.check_all(expected=expected, line_offset=line_offset)
				1617
				1618	def input_dir(self, dirname):
				1619	"""Check all files in this directory and all subdirectories."""
				1620	dirname = dirname.rstrip('/')
				1621	if self.excluded(dirname):
				1622	return 0
				1623	counters = self.options.report.counters
				1624	verbose = self.options.verbose
				1625	filepatterns = self.options.filename
				1626	runner = self.runner
				1627	for root, dirs, files in os.walk(dirname):
				1628	if verbose:
				1629	print('directory ' + root)
				1630	counters['directories'] += 1
				1631	for subdir in sorted(dirs):
				1632	if self.excluded(os.path.join(root, subdir)):
				1633	dirs.remove(subdir)
				1634	for filename in sorted(files):
				1635	# contain a pattern that matches?
				1636	if ((filename_match(filename, filepatterns) and
				1637	not self.excluded(filename))):
				1638	runner(os.path.join(root, filename))
				1639
				1640	def excluded(self, filename):
				1641	"""
				1642	Check if options.exclude contains a pattern that matches filename.
				1643	"""
				1644	basename = os.path.basename(filename)
				1645	return any((filename_match(filename, self.options.exclude,
				1646	default=False),
				1647	filename_match(basename, self.options.exclude,
				1648	default=False)))
				1649
				1650	def ignore_code(self, code):
				1651	"""
				1652	Check if the error code should be ignored.
				1653
				1654	If 'options.select' contains a prefix of the error code,
				1655	return False. Else, if 'options.ignore' contains a prefix of
				1656	the error code, return True.
				1657	"""
				1658	return (code.startswith(self.options.ignore) and
				1659	not code.startswith(self.options.select))
				1660
				1661	def get_checks(self, argument_name):
				1662	"""
				1663	Find all globally visible functions where the first argument name
				1664	starts with argument_name and which contain selected tests.
				1665	"""
				1666	checks = []
				1667	for check, attrs in _checks[argument_name].items():
				1668	(codes, args) = attrs
				1669	if any(not (code and self.ignore_code(code)) for code in codes):
				1670	checks.append((check.__name__, check, args))
				1671	return sorted(checks)
				1672
				1673
				1674	def get_parser(prog='pep8', version=__version__):
				1675	parser = OptionParser(prog=prog, version=version,
				1676	usage="%prog [options] input ...")
				1677	parser.config_options = [
				1678	'exclude', 'filename', 'select', 'ignore', 'max-line-length', 'count',
				1679	'format', 'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose']
				1680	parser.add_option('-v', '--verbose', default=0, action='count',
				1681	help="print status messages, or debug with -vv")
				1682	parser.add_option('-q', '--quiet', default=0, action='count',
				1683	help="report only file names, or nothing with -qq")
				1684	parser.add_option('-r', '--repeat', default=True, action='store_true',
				1685	help="(obsolete) show all occurrences of the same error")
				1686	parser.add_option('--first', action='store_false', dest='repeat',
				1687	help="show first occurrence of each error")
				1688	parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
				1689	help="exclude files or directories which match these "
				1690	"comma separated patterns (default: %default)")
				1691	parser.add_option('--filename', metavar='patterns', default='*.py',
				1692	help="when parsing directories, only check filenames "
				1693	"matching these comma separated patterns "
				1694	"(default: %default)")
				1695	parser.add_option('--select', metavar='errors', default='',
				1696	help="select errors and warnings (e.g. E,W6)")
				1697	parser.add_option('--ignore', metavar='errors', default='',
				1698	help="skip errors and warnings (e.g. E4,W)")
				1699	parser.add_option('--show-source', action='store_true',
				1700	help="show source code for each error")
				1701	parser.add_option('--show-pep8', action='store_true',
				1702	help="show text of PEP 8 for each error "
				1703	"(implies --first)")
				1704	parser.add_option('--statistics', action='store_true',
				1705	help="count errors and warnings")
				1706	parser.add_option('--count', action='store_true',
				1707	help="print total number of errors and warnings "
				1708	"to standard error and set exit code to 1 if "
				1709	"total is not null")
				1710	parser.add_option('--max-line-length', type='int', metavar='n',
				1711	default=MAX_LINE_LENGTH,
				1712	help="set maximum allowed line length "
				1713	"(default: %default)")
				1714	parser.add_option('--format', metavar='format', default='default',
				1715	help="set the error format [default\|pylint\|<custom>]")
				1716	parser.add_option('--diff', action='store_true',
				1717	help="report only lines changed according to the "
				1718	"unified diff received on STDIN")
				1719	group = parser.add_option_group("Testing Options")
				1720	if os.path.exists(TESTSUITE_PATH):
				1721	group.add_option('--testsuite', metavar='dir',
				1722	help="run regression tests from dir")
				1723	group.add_option('--doctest', action='store_true',
				1724	help="run doctest on myself")
				1725	group.add_option('--benchmark', action='store_true',
				1726	help="measure processing speed")
				1727	return parser
				1728
				1729
				1730	def read_config(options, args, arglist, parser):
				1731	"""Read both user configuration and local configuration."""
				1732	config = RawConfigParser()
				1733
				1734	user_conf = options.config
				1735	if user_conf and os.path.isfile(user_conf):
				1736	if options.verbose:
				1737	print('user configuration: %s' % user_conf)
				1738	config.read(user_conf)
				1739
				1740	parent = tail = args and os.path.abspath(os.path.commonprefix(args))
				1741	while tail:
				1742	for name in PROJECT_CONFIG:
				1743	local_conf = os.path.join(parent, name)
				1744	if os.path.isfile(local_conf):
				1745	break
				1746	else:
				1747	parent, tail = os.path.split(parent)
				1748	continue
				1749	if options.verbose:
				1750	print('local configuration: %s' % local_conf)
				1751	config.read(local_conf)
				1752	break
				1753
				1754	pep8_section = parser.prog
				1755	if config.has_section(pep8_section):
				1756	option_list = dict([(o.dest, o.type or o.action)
				1757	for o in parser.option_list])
				1758
				1759	# First, read the default values
				1760	new_options, _ = parser.parse_args([])
				1761
				1762	# Second, parse the configuration
				1763	for opt in config.options(pep8_section):
				1764	if options.verbose > 1:
				1765	print(" %s = %s" % (opt, config.get(pep8_section, opt)))
				1766	if opt.replace('_', '-') not in parser.config_options:
				1767	print("Unknown option: '%s'\n not in [%s]" %
				1768	(opt, ' '.join(parser.config_options)))
				1769	sys.exit(1)
				1770	normalized_opt = opt.replace('-', '_')
				1771	opt_type = option_list[normalized_opt]
				1772	if opt_type in ('int', 'count'):
				1773	value = config.getint(pep8_section, opt)
				1774	elif opt_type == 'string':
				1775	value = config.get(pep8_section, opt)
				1776	else:
				1777	assert opt_type in ('store_true', 'store_false')
				1778	value = config.getboolean(pep8_section, opt)
				1779	setattr(new_options, normalized_opt, value)
				1780
				1781	# Third, overwrite with the command-line options
				1782	options, _ = parser.parse_args(arglist, values=new_options)
				1783	options.doctest = options.testsuite = False
				1784	return options
				1785
				1786
				1787	def process_options(arglist=None, parse_argv=False, config_file=None,
				1788	parser=None):
				1789	"""Process options passed either via arglist or via command line args."""
				1790	if not arglist and not parse_argv:
				1791	# Don't read the command line if the module is used as a library.
				1792	arglist = []
				1793	if not parser:
				1794	parser = get_parser()
				1795	if not parser.has_option('--config'):
				1796	if config_file is True:
				1797	config_file = DEFAULT_CONFIG
				1798	group = parser.add_option_group("Configuration", description=(
				1799	"The project options are read from the [%s] section of the "
				1800	"tox.ini file or the setup.cfg file located in any parent folder "
				1801	"of the path(s) being processed. Allowed options are: %s." %
				1802	(parser.prog, ', '.join(parser.config_options))))
				1803	group.add_option('--config', metavar='path', default=config_file,
				1804	help="user config file location (default: %default)")
				1805	options, args = parser.parse_args(arglist)
				1806	options.reporter = None
				1807
				1808	if options.ensure_value('testsuite', False):
				1809	args.append(options.testsuite)
				1810	elif not options.ensure_value('doctest', False):
				1811	if parse_argv and not args:
				1812	if options.diff or any(os.path.exists(name)
				1813	for name in PROJECT_CONFIG):
				1814	args = ['.']
				1815	else:
				1816	parser.error('input not specified')
				1817	options = read_config(options, args, arglist, parser)
				1818	options.reporter = parse_argv and options.quiet == 1 and FileReport
				1819
				1820	if options.filename:
				1821	options.filename = options.filename.split(',')
				1822	options.exclude = options.exclude.split(',')
				1823	if options.select:
				1824	options.select = options.select.split(',')
				1825	if options.ignore:
				1826	options.ignore = options.ignore.split(',')
				1827	elif not (options.select or
				1828	options.testsuite or options.doctest) and DEFAULT_IGNORE:
				1829	# The default choice: ignore controversial checks
				1830	# (for doctest and testsuite, all checks are required)
				1831	options.ignore = DEFAULT_IGNORE.split(',')
				1832
				1833	if options.diff:
				1834	options.reporter = DiffReport
				1835	stdin = stdin_get_value()
				1836	options.selected_lines = parse_udiff(stdin, options.filename, args[0])
				1837	args = sorted(options.selected_lines)
				1838
				1839	return options, args
				1840
				1841
				1842	def _main():
				1843	"""Parse options and run checks on Python source."""
				1844	pep8style = StyleGuide(parse_argv=True, config_file=True)
				1845	options = pep8style.options
				1846	if options.doctest or options.testsuite:
				1847	sys.path[:0] = [TESTSUITE_PATH]
				1848	from test_pep8 import run_tests
				1849	del sys.path[0]
				1850	report = run_tests(pep8style, options.doctest, options.testsuite)
				1851	else:
				1852	report = pep8style.check_files()
				1853	if options.statistics:
				1854	report.print_statistics()
				1855	if options.benchmark:
				1856	report.print_benchmark()
				1857	if options.testsuite and not options.quiet:
				1858	report.print_results()
				1859	if report.total_errors:
				1860	if options.count:
				1861	sys.stderr.write(str(report.total_errors) + '\n')
				1862	sys.exit(1)
				1863
				1864	if __name__ == '__main__':
				1865	_main()