blob: f605f189fab31af0edff9b3b076104bc3e61d91c [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001#!/usr/bin/env python
2# pep8.py - Check Python source code formatting, according to PEP 8
3# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
Tor Norbye2e5965e2014-07-25 12:24:15 -07004# Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
Tor Norbye3a2425a2013-11-04 10:16:08 -08005#
6# Permission is hereby granted, free of charge, to any person
7# obtaining a copy of this software and associated documentation files
8# (the "Software"), to deal in the Software without restriction,
9# including without limitation the rights to use, copy, modify, merge,
10# publish, distribute, sublicense, and/or sell copies of the Software,
11# and to permit persons to whom the Software is furnished to do so,
12# subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be
15# included in all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24# SOFTWARE.
25
26r"""
Tor Norbye2e5965e2014-07-25 12:24:15 -070027Check Python source code formatting, according to PEP 8.
Tor Norbye3a2425a2013-11-04 10:16:08 -080028
29For usage and a list of options, try this:
30$ python pep8.py -h
31
32This program and its regression test suite live here:
33http://github.com/jcrocholl/pep8
34
35Groups of errors and warnings:
36E errors
37W warnings
38100 indentation
39200 whitespace
40300 blank lines
41400 imports
42500 line length
43600 deprecation
44700 statements
45900 syntax error
46"""
Tor Norbye2e5965e2014-07-25 12:24:15 -070047from __future__ import with_statement
48
49__version__ = '1.5.7'
Tor Norbye3a2425a2013-11-04 10:16:08 -080050
51import os
52import sys
53import re
54import time
55import inspect
56import keyword
57import tokenize
58from optparse import OptionParser
59from fnmatch import fnmatch
60try:
61 from configparser import RawConfigParser
62 from io import TextIOWrapper
63except ImportError:
64 from ConfigParser import RawConfigParser
65
66DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
Tor Norbye2e5965e2014-07-25 12:24:15 -070067DEFAULT_IGNORE = 'E123,E226,E24'
Tor Norbye3a2425a2013-11-04 10:16:08 -080068if sys.platform == 'win32':
69 DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
70else:
71 DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
72 os.path.expanduser('~/.config'), 'pep8')
Tor Norbye2e5965e2014-07-25 12:24:15 -070073PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
Tor Norbye3a2425a2013-11-04 10:16:08 -080074TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
75MAX_LINE_LENGTH = 79
76REPORT_FORMAT = {
77 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
78 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
79}
80
81PyCF_ONLY_AST = 1024
82SINGLETONS = frozenset(['False', 'None', 'True'])
83KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
84UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
85ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
86WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
87WS_NEEDED_OPERATORS = frozenset([
88 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
89 '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
90WHITESPACE = frozenset(' \t')
Tor Norbye2e5965e2014-07-25 12:24:15 -070091NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
92SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
93# ERRORTOKEN is triggered by backticks in Python 3
94SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
Tor Norbye3a2425a2013-11-04 10:16:08 -080095BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
96
97INDENT_REGEX = re.compile(r'([ \t]*)')
Tor Norbye2e5965e2014-07-25 12:24:15 -070098RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
99RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
Tor Norbye3a2425a2013-11-04 10:16:08 -0800100ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
101DOCSTRING_REGEX = re.compile(r'u?r?["\']')
102EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
103WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
104COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
Tor Norbye2e5965e2014-07-25 12:24:15 -0700105COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^[({ ]+\s+(in|is)\s')
Tor Norbye3a2425a2013-11-04 10:16:08 -0800106COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
107 r'|\s*\(\s*([^)]*[^ )])\s*\))')
108KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
109OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
110LAMBDA_REGEX = re.compile(r'\blambda\b')
111HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
112
113# Work around Python < 2.6 behaviour, which does not generate NL after
114# a comment which is on a line by itself.
115COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
116
117
118##############################################################################
119# Plugins (check functions) for physical lines
120##############################################################################
121
122
123def tabs_or_spaces(physical_line, indent_char):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700124 r"""Never mix tabs and spaces.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800125
126 The most popular way of indenting Python is with spaces only. The
127 second-most popular way is with tabs only. Code indented with a mixture
128 of tabs and spaces should be converted to using spaces exclusively. When
129 invoking the Python command line interpreter with the -t option, it issues
130 warnings about code that illegally mixes tabs and spaces. When using -tt
131 these warnings become errors. These options are highly recommended!
132
133 Okay: if a == 0:\n a = 1\n b = 1
134 E101: if a == 0:\n a = 1\n\tb = 1
135 """
136 indent = INDENT_REGEX.match(physical_line).group(1)
137 for offset, char in enumerate(indent):
138 if char != indent_char:
139 return offset, "E101 indentation contains mixed spaces and tabs"
140
141
142def tabs_obsolete(physical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700143 r"""For new projects, spaces-only are strongly recommended over tabs.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800144
145 Okay: if True:\n return
146 W191: if True:\n\treturn
147 """
148 indent = INDENT_REGEX.match(physical_line).group(1)
149 if '\t' in indent:
150 return indent.index('\t'), "W191 indentation contains tabs"
151
152
153def trailing_whitespace(physical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700154 r"""Trailing whitespace is superfluous.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800155
156 The warning returned varies on whether the line itself is blank, for easier
157 filtering for those who want to indent their blank lines.
158
159 Okay: spam(1)\n#
160 W291: spam(1) \n#
161 W293: class Foo(object):\n \n bang = 12
162 """
163 physical_line = physical_line.rstrip('\n') # chr(10), newline
164 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
165 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
166 stripped = physical_line.rstrip(' \t\v')
167 if physical_line != stripped:
168 if stripped:
169 return len(stripped), "W291 trailing whitespace"
170 else:
171 return 0, "W293 blank line contains whitespace"
172
173
Tor Norbye2e5965e2014-07-25 12:24:15 -0700174def trailing_blank_lines(physical_line, lines, line_number, total_lines):
175 r"""Trailing blank lines are superfluous.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800176
177 Okay: spam(1)
178 W391: spam(1)\n
Tor Norbye2e5965e2014-07-25 12:24:15 -0700179
180 However the last line should end with a new line (warning W292).
Tor Norbye3a2425a2013-11-04 10:16:08 -0800181 """
Tor Norbye2e5965e2014-07-25 12:24:15 -0700182 if line_number == total_lines:
183 stripped_last_line = physical_line.rstrip()
184 if not stripped_last_line:
185 return 0, "W391 blank line at end of file"
186 if stripped_last_line == physical_line:
187 return len(physical_line), "W292 no newline at end of file"
Tor Norbye3a2425a2013-11-04 10:16:08 -0800188
189
Tor Norbye2e5965e2014-07-25 12:24:15 -0700190def maximum_line_length(physical_line, max_line_length, multiline):
191 r"""Limit all lines to a maximum of 79 characters.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800192
193 There are still many devices around that are limited to 80 character
194 lines; plus, limiting windows to 80 characters makes it possible to have
195 several windows side-by-side. The default wrapping on such devices looks
196 ugly. Therefore, please limit all lines to a maximum of 79 characters.
197 For flowing long blocks of text (docstrings or comments), limiting the
198 length to 72 characters is recommended.
199
200 Reports error E501.
201 """
202 line = physical_line.rstrip()
203 length = len(line)
Tor Norbye2e5965e2014-07-25 12:24:15 -0700204 if length > max_line_length and not noqa(line):
205 # Special case for long URLs in multi-line docstrings or comments,
206 # but still report the error when the 72 first chars are whitespaces.
207 chunks = line.split()
208 if ((len(chunks) == 1 and multiline) or
209 (len(chunks) == 2 and chunks[0] == '#')) and \
210 len(line) - len(chunks[-1]) < max_line_length - 7:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800211 return
212 if hasattr(line, 'decode'): # Python 2
213 # The line could contain multi-byte characters
214 try:
215 length = len(line.decode('utf-8'))
216 except UnicodeError:
217 pass
218 if length > max_line_length:
219 return (max_line_length, "E501 line too long "
220 "(%d > %d characters)" % (length, max_line_length))
221
222
223##############################################################################
224# Plugins (check functions) for logical lines
225##############################################################################
226
227
228def blank_lines(logical_line, blank_lines, indent_level, line_number,
Tor Norbye2e5965e2014-07-25 12:24:15 -0700229 blank_before, previous_logical, previous_indent_level):
230 r"""Separate top-level function and class definitions with two blank lines.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800231
232 Method definitions inside a class are separated by a single blank line.
233
234 Extra blank lines may be used (sparingly) to separate groups of related
235 functions. Blank lines may be omitted between a bunch of related
236 one-liners (e.g. a set of dummy implementations).
237
238 Use blank lines in functions, sparingly, to indicate logical sections.
239
240 Okay: def a():\n pass\n\n\ndef b():\n pass
241 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
242
243 E301: class Foo:\n b = 0\n def bar():\n pass
244 E302: def a():\n pass\n\ndef b(n):\n pass
245 E303: def a():\n pass\n\n\n\ndef b(n):\n pass
246 E303: def a():\n\n\n\n pass
247 E304: @decorator\n\ndef a():\n pass
248 """
249 if line_number < 3 and not previous_logical:
250 return # Don't expect blank lines before the first line
251 if previous_logical.startswith('@'):
252 if blank_lines:
253 yield 0, "E304 blank lines found after function decorator"
254 elif blank_lines > 2 or (indent_level and blank_lines == 2):
255 yield 0, "E303 too many blank lines (%d)" % blank_lines
256 elif logical_line.startswith(('def ', 'class ', '@')):
257 if indent_level:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700258 if not (blank_before or previous_indent_level < indent_level or
Tor Norbye3a2425a2013-11-04 10:16:08 -0800259 DOCSTRING_REGEX.match(previous_logical)):
260 yield 0, "E301 expected 1 blank line, found 0"
Tor Norbye2e5965e2014-07-25 12:24:15 -0700261 elif blank_before != 2:
262 yield 0, "E302 expected 2 blank lines, found %d" % blank_before
Tor Norbye3a2425a2013-11-04 10:16:08 -0800263
264
265def extraneous_whitespace(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700266 r"""Avoid extraneous whitespace.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800267
Tor Norbye2e5965e2014-07-25 12:24:15 -0700268 Avoid extraneous whitespace in these situations:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800269 - Immediately inside parentheses, brackets or braces.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800270 - Immediately before a comma, semicolon, or colon.
271
272 Okay: spam(ham[1], {eggs: 2})
273 E201: spam( ham[1], {eggs: 2})
274 E201: spam(ham[ 1], {eggs: 2})
275 E201: spam(ham[1], { eggs: 2})
276 E202: spam(ham[1], {eggs: 2} )
277 E202: spam(ham[1 ], {eggs: 2})
278 E202: spam(ham[1], {eggs: 2 })
279
280 E203: if x == 4: print x, y; x, y = y , x
281 E203: if x == 4: print x, y ; x, y = y, x
282 E203: if x == 4 : print x, y; x, y = y, x
283 """
284 line = logical_line
285 for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
286 text = match.group()
287 char = text.strip()
288 found = match.start()
289 if text == char + ' ':
290 # assert char in '([{'
291 yield found + 1, "E201 whitespace after '%s'" % char
292 elif line[found - 1] != ',':
293 code = ('E202' if char in '}])' else 'E203') # if char in ',;:'
294 yield found, "%s whitespace before '%s'" % (code, char)
295
296
297def whitespace_around_keywords(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700298 r"""Avoid extraneous whitespace around keywords.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800299
300 Okay: True and False
301 E271: True and False
302 E272: True and False
303 E273: True and\tFalse
304 E274: True\tand False
305 """
306 for match in KEYWORD_REGEX.finditer(logical_line):
307 before, after = match.groups()
308
309 if '\t' in before:
310 yield match.start(1), "E274 tab before keyword"
311 elif len(before) > 1:
312 yield match.start(1), "E272 multiple spaces before keyword"
313
314 if '\t' in after:
315 yield match.start(2), "E273 tab after keyword"
316 elif len(after) > 1:
317 yield match.start(2), "E271 multiple spaces after keyword"
318
319
320def missing_whitespace(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700321 r"""Each comma, semicolon or colon should be followed by whitespace.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800322
323 Okay: [a, b]
324 Okay: (3,)
325 Okay: a[1:4]
326 Okay: a[:4]
327 Okay: a[1:]
328 Okay: a[1:4:2]
329 E231: ['a','b']
330 E231: foo(bar,baz)
331 E231: [{'a':'b'}]
332 """
333 line = logical_line
334 for index in range(len(line) - 1):
335 char = line[index]
336 if char in ',;:' and line[index + 1] not in WHITESPACE:
337 before = line[:index]
338 if char == ':' and before.count('[') > before.count(']') and \
339 before.rfind('{') < before.rfind('['):
340 continue # Slice syntax, no space required
341 if char == ',' and line[index + 1] == ')':
342 continue # Allow tuple with only one element: (3,)
343 yield index, "E231 missing whitespace after '%s'" % char
344
345
346def indentation(logical_line, previous_logical, indent_char,
347 indent_level, previous_indent_level):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700348 r"""Use 4 spaces per indentation level.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800349
350 For really old code that you don't want to mess up, you can continue to
351 use 8-space tabs.
352
353 Okay: a = 1
354 Okay: if a == 0:\n a = 1
355 E111: a = 1
356
357 Okay: for item in items:\n pass
358 E112: for item in items:\npass
359
360 Okay: a = 1\nb = 2
361 E113: a = 1\n b = 2
362 """
363 if indent_char == ' ' and indent_level % 4:
364 yield 0, "E111 indentation is not a multiple of four"
365 indent_expect = previous_logical.endswith(':')
366 if indent_expect and indent_level <= previous_indent_level:
367 yield 0, "E112 expected an indented block"
368 if indent_level > previous_indent_level and not indent_expect:
369 yield 0, "E113 unexpected indentation"
370
371
Tor Norbye2e5965e2014-07-25 12:24:15 -0700372def continued_indentation(logical_line, tokens, indent_level, hang_closing,
373 indent_char, noqa, verbose):
374 r"""Continuation lines indentation.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800375
Tor Norbye2e5965e2014-07-25 12:24:15 -0700376 Continuation lines should align wrapped elements either vertically
377 using Python's implicit line joining inside parentheses, brackets
378 and braces, or using a hanging indent.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800379
Tor Norbye2e5965e2014-07-25 12:24:15 -0700380 When using a hanging indent these considerations should be applied:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800381 - there should be no arguments on the first line, and
Tor Norbye3a2425a2013-11-04 10:16:08 -0800382 - further indentation should be used to clearly distinguish itself as a
383 continuation line.
384
385 Okay: a = (\n)
386 E123: a = (\n )
387
388 Okay: a = (\n 42)
389 E121: a = (\n 42)
390 E122: a = (\n42)
391 E123: a = (\n 42\n )
392 E124: a = (24,\n 42\n)
Tor Norbye2e5965e2014-07-25 12:24:15 -0700393 E125: if (\n b):\n pass
Tor Norbye3a2425a2013-11-04 10:16:08 -0800394 E126: a = (\n 42)
395 E127: a = (24,\n 42)
396 E128: a = (24,\n 42)
Tor Norbye2e5965e2014-07-25 12:24:15 -0700397 E129: if (a or\n b):\n pass
398 E131: a = (\n 42\n 24)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800399 """
400 first_row = tokens[0][2][0]
401 nrows = 1 + tokens[-1][2][0] - first_row
Tor Norbye2e5965e2014-07-25 12:24:15 -0700402 if noqa or nrows == 1:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800403 return
404
405 # indent_next tells us whether the next block is indented; assuming
406 # that it is indented by 4 spaces, then we should not allow 4-space
407 # indents on the final continuation line; in turn, some other
408 # indents are allowed to have an extra 4 spaces.
409 indent_next = logical_line.endswith(':')
410
411 row = depth = 0
Tor Norbye2e5965e2014-07-25 12:24:15 -0700412 valid_hangs = (4,) if indent_char != '\t' else (4, 8)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800413 # remember how many brackets were opened on each line
414 parens = [0] * nrows
415 # relative indents of physical lines
416 rel_indent = [0] * nrows
Tor Norbye2e5965e2014-07-25 12:24:15 -0700417 # for each depth, collect a list of opening rows
418 open_rows = [[0]]
419 # for each depth, memorize the hanging indentation
420 hangs = [None]
Tor Norbye3a2425a2013-11-04 10:16:08 -0800421 # visual indents
422 indent_chances = {}
423 last_indent = tokens[0][2]
Tor Norbye2e5965e2014-07-25 12:24:15 -0700424 visual_indent = None
425 # for each depth, memorize the visual indent column
Tor Norbye3a2425a2013-11-04 10:16:08 -0800426 indent = [last_indent[1]]
427 if verbose >= 3:
428 print(">>> " + tokens[0][4].rstrip())
429
430 for token_type, text, start, end, line in tokens:
431
432 newline = row < start[0] - first_row
433 if newline:
434 row = start[0] - first_row
Tor Norbye2e5965e2014-07-25 12:24:15 -0700435 newline = not last_token_multiline and token_type not in NEWLINE
Tor Norbye3a2425a2013-11-04 10:16:08 -0800436
437 if newline:
438 # this is the beginning of a continuation line.
439 last_indent = start
440 if verbose >= 3:
441 print("... " + line.rstrip())
442
443 # record the initial indent.
444 rel_indent[row] = expand_indent(line) - indent_level
445
Tor Norbye2e5965e2014-07-25 12:24:15 -0700446 # identify closing bracket
447 close_bracket = (token_type == tokenize.OP and text in ']})')
Tor Norbye3a2425a2013-11-04 10:16:08 -0800448
Tor Norbye2e5965e2014-07-25 12:24:15 -0700449 # is the indent relative to an opening bracket line?
450 for open_row in reversed(open_rows[depth]):
451 hang = rel_indent[row] - rel_indent[open_row]
452 hanging_indent = hang in valid_hangs
453 if hanging_indent:
454 break
455 if hangs[depth]:
456 hanging_indent = (hang == hangs[depth])
457 # is there any chance of visual indent?
458 visual_indent = (not close_bracket and hang > 0 and
459 indent_chances.get(start[1]))
460
461 if close_bracket and indent[depth]:
462 # closing bracket for visual indent
463 if start[1] != indent[depth]:
464 yield (start, "E124 closing bracket does not match "
465 "visual indentation")
466 elif close_bracket and not hang:
467 # closing bracket matches indentation of opening bracket's line
468 if hang_closing:
469 yield start, "E133 closing bracket is missing indentation"
470 elif indent[depth] and start[1] < indent[depth]:
471 if visual_indent is not True:
472 # visual indent is broken
473 yield (start, "E128 continuation line "
474 "under-indented for visual indent")
475 elif hanging_indent or (indent_next and rel_indent[row] == 8):
476 # hanging indent is verified
477 if close_bracket and not hang_closing:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800478 yield (start, "E123 closing bracket does not match "
479 "indentation of opening bracket's line")
Tor Norbye2e5965e2014-07-25 12:24:15 -0700480 hangs[depth] = hang
Tor Norbye3a2425a2013-11-04 10:16:08 -0800481 elif visual_indent is True:
482 # visual indent is verified
Tor Norbye2e5965e2014-07-25 12:24:15 -0700483 indent[depth] = start[1]
Tor Norbye3a2425a2013-11-04 10:16:08 -0800484 elif visual_indent in (text, str):
485 # ignore token lined up with matching one from a previous line
486 pass
Tor Norbye3a2425a2013-11-04 10:16:08 -0800487 else:
488 # indent is broken
489 if hang <= 0:
490 error = "E122", "missing indentation or outdented"
491 elif indent[depth]:
492 error = "E127", "over-indented for visual indent"
Tor Norbye2e5965e2014-07-25 12:24:15 -0700493 elif not close_bracket and hangs[depth]:
494 error = "E131", "unaligned for hanging indent"
Tor Norbye3a2425a2013-11-04 10:16:08 -0800495 else:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700496 hangs[depth] = hang
497 if hang > 4:
498 error = "E126", "over-indented for hanging indent"
499 else:
500 error = "E121", "under-indented for hanging indent"
Tor Norbye3a2425a2013-11-04 10:16:08 -0800501 yield start, "%s continuation line %s" % error
502
503 # look for visual indenting
504 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
505 and not indent[depth]):
506 indent[depth] = start[1]
507 indent_chances[start[1]] = True
508 if verbose >= 4:
509 print("bracket depth %s indent to %s" % (depth, start[1]))
510 # deal with implicit string concatenation
511 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
512 text in ('u', 'ur', 'b', 'br')):
513 indent_chances[start[1]] = str
514 # special case for the "if" statement because len("if (") == 4
515 elif not indent_chances and not row and not depth and text == 'if':
516 indent_chances[end[1] + 1] = True
Tor Norbye2e5965e2014-07-25 12:24:15 -0700517 elif text == ':' and line[end[1]:].isspace():
518 open_rows[depth].append(row)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800519
520 # keep track of bracket depth
521 if token_type == tokenize.OP:
522 if text in '([{':
523 depth += 1
524 indent.append(0)
Tor Norbye2e5965e2014-07-25 12:24:15 -0700525 hangs.append(None)
526 if len(open_rows) == depth:
527 open_rows.append([])
528 open_rows[depth].append(row)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800529 parens[row] += 1
530 if verbose >= 4:
531 print("bracket depth %s seen, col %s, visual min = %s" %
532 (depth, start[1], indent[depth]))
533 elif text in ')]}' and depth > 0:
534 # parent indents should not be more than this one
535 prev_indent = indent.pop() or last_indent[1]
Tor Norbye2e5965e2014-07-25 12:24:15 -0700536 hangs.pop()
Tor Norbye3a2425a2013-11-04 10:16:08 -0800537 for d in range(depth):
538 if indent[d] > prev_indent:
539 indent[d] = 0
540 for ind in list(indent_chances):
541 if ind >= prev_indent:
542 del indent_chances[ind]
Tor Norbye2e5965e2014-07-25 12:24:15 -0700543 del open_rows[depth + 1:]
Tor Norbye3a2425a2013-11-04 10:16:08 -0800544 depth -= 1
545 if depth:
546 indent_chances[indent[depth]] = True
547 for idx in range(row, -1, -1):
548 if parens[idx]:
549 parens[idx] -= 1
550 break
551 assert len(indent) == depth + 1
552 if start[1] not in indent_chances:
553 # allow to line up tokens
554 indent_chances[start[1]] = text
555
556 last_token_multiline = (start[0] != end[0])
Tor Norbye2e5965e2014-07-25 12:24:15 -0700557 if last_token_multiline:
558 rel_indent[end[0] - first_row] = rel_indent[row]
Tor Norbye3a2425a2013-11-04 10:16:08 -0800559
Tor Norbye2e5965e2014-07-25 12:24:15 -0700560 if indent_next and expand_indent(line) == indent_level + 4:
561 pos = (start[0], indent[0] + 4)
562 if visual_indent:
563 code = "E129 visually indented line"
564 else:
565 code = "E125 continuation line"
566 yield pos, "%s with same indent as next logical line" % code
Tor Norbye3a2425a2013-11-04 10:16:08 -0800567
568
569def whitespace_before_parameters(logical_line, tokens):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700570 r"""Avoid extraneous whitespace.
571
Tor Norbye3a2425a2013-11-04 10:16:08 -0800572 Avoid extraneous whitespace in the following situations:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700573 - before the open parenthesis that starts the argument list of a
574 function call.
575 - before the open parenthesis that starts an indexing or slicing.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800576
577 Okay: spam(1)
578 E211: spam (1)
579
580 Okay: dict['key'] = list[index]
581 E211: dict ['key'] = list[index]
582 E211: dict['key'] = list [index]
583 """
Tor Norbye2e5965e2014-07-25 12:24:15 -0700584 prev_type, prev_text, __, prev_end, __ = tokens[0]
Tor Norbye3a2425a2013-11-04 10:16:08 -0800585 for index in range(1, len(tokens)):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700586 token_type, text, start, end, __ = tokens[index]
Tor Norbye3a2425a2013-11-04 10:16:08 -0800587 if (token_type == tokenize.OP and
588 text in '([' and
589 start != prev_end and
590 (prev_type == tokenize.NAME or prev_text in '}])') and
591 # Syntax "class A (B):" is allowed, but avoid it
592 (index < 2 or tokens[index - 2][1] != 'class') and
593 # Allow "return (a.foo for a in range(5))"
594 not keyword.iskeyword(prev_text)):
595 yield prev_end, "E211 whitespace before '%s'" % text
596 prev_type = token_type
597 prev_text = text
598 prev_end = end
599
600
601def whitespace_around_operator(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700602 r"""Avoid extraneous whitespace around an operator.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800603
604 Okay: a = 12 + 3
605 E221: a = 4 + 5
606 E222: a = 4 + 5
607 E223: a = 4\t+ 5
608 E224: a = 4 +\t5
609 """
610 for match in OPERATOR_REGEX.finditer(logical_line):
611 before, after = match.groups()
612
613 if '\t' in before:
614 yield match.start(1), "E223 tab before operator"
615 elif len(before) > 1:
616 yield match.start(1), "E221 multiple spaces before operator"
617
618 if '\t' in after:
619 yield match.start(2), "E224 tab after operator"
620 elif len(after) > 1:
621 yield match.start(2), "E222 multiple spaces after operator"
622
623
624def missing_whitespace_around_operator(logical_line, tokens):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700625 r"""Surround operators with a single space on either side.
626
Tor Norbye3a2425a2013-11-04 10:16:08 -0800627 - Always surround these binary operators with a single space on
628 either side: assignment (=), augmented assignment (+=, -= etc.),
Tor Norbye2e5965e2014-07-25 12:24:15 -0700629 comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
Tor Norbye3a2425a2013-11-04 10:16:08 -0800630 Booleans (and, or, not).
631
Tor Norbye2e5965e2014-07-25 12:24:15 -0700632 - If operators with different priorities are used, consider adding
633 whitespace around the operators with the lowest priorities.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800634
635 Okay: i = i + 1
636 Okay: submitted += 1
637 Okay: x = x * 2 - 1
638 Okay: hypot2 = x * x + y * y
639 Okay: c = (a + b) * (a - b)
640 Okay: foo(bar, key='word', *args, **kwargs)
641 Okay: alpha[:-i]
642
643 E225: i=i+1
644 E225: submitted +=1
645 E225: x = x /2 - 1
646 E225: z = x **y
647 E226: c = (a+b) * (a-b)
648 E226: hypot2 = x*x + y*y
649 E227: c = a|b
650 E228: msg = fmt%(errno, errmsg)
651 """
652 parens = 0
653 need_space = False
654 prev_type = tokenize.OP
655 prev_text = prev_end = None
656 for token_type, text, start, end, line in tokens:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700657 if token_type in SKIP_COMMENTS:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800658 continue
659 if text in ('(', 'lambda'):
660 parens += 1
661 elif text == ')':
662 parens -= 1
663 if need_space:
664 if start != prev_end:
665 # Found a (probably) needed space
666 if need_space is not True and not need_space[1]:
667 yield (need_space[0],
668 "E225 missing whitespace around operator")
669 need_space = False
670 elif text == '>' and prev_text in ('<', '-'):
671 # Tolerate the "<>" operator, even if running Python 3
672 # Deal with Python 3's annotated return value "->"
673 pass
674 else:
675 if need_space is True or need_space[1]:
676 # A needed trailing space was not found
677 yield prev_end, "E225 missing whitespace around operator"
678 else:
679 code, optype = 'E226', 'arithmetic'
680 if prev_text == '%':
681 code, optype = 'E228', 'modulo'
682 elif prev_text not in ARITHMETIC_OP:
683 code, optype = 'E227', 'bitwise or shift'
684 yield (need_space[0], "%s missing whitespace "
685 "around %s operator" % (code, optype))
686 need_space = False
687 elif token_type == tokenize.OP and prev_end is not None:
688 if text == '=' and parens:
689 # Allow keyword args or defaults: foo(bar=None).
690 pass
691 elif text in WS_NEEDED_OPERATORS:
692 need_space = True
693 elif text in UNARY_OPERATORS:
694 # Check if the operator is being used as a binary operator
695 # Allow unary operators: -123, -x, +1.
696 # Allow argument unpacking: foo(*args, **kwargs).
Tor Norbye2e5965e2014-07-25 12:24:15 -0700697 if (prev_text in '}])' if prev_type == tokenize.OP
698 else prev_text not in KEYWORDS):
699 need_space = None
Tor Norbye3a2425a2013-11-04 10:16:08 -0800700 elif text in WS_OPTIONAL_OPERATORS:
701 need_space = None
702
703 if need_space is None:
704 # Surrounding space is optional, but ensure that
705 # trailing space matches opening space
706 need_space = (prev_end, start != prev_end)
707 elif need_space and start == prev_end:
708 # A needed opening space was not found
709 yield prev_end, "E225 missing whitespace around operator"
710 need_space = False
711 prev_type = token_type
712 prev_text = text
713 prev_end = end
714
715
716def whitespace_around_comma(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700717 r"""Avoid extraneous whitespace after a comma or a colon.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800718
719 Note: these checks are disabled by default
720
721 Okay: a = (1, 2)
722 E241: a = (1, 2)
723 E242: a = (1,\t2)
724 """
725 line = logical_line
726 for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
727 found = m.start() + 1
728 if '\t' in m.group():
729 yield found, "E242 tab after '%s'" % m.group()[0]
730 else:
731 yield found, "E241 multiple spaces after '%s'" % m.group()[0]
732
733
734def whitespace_around_named_parameter_equals(logical_line, tokens):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700735 r"""Don't use spaces around the '=' sign in function arguments.
736
Tor Norbye3a2425a2013-11-04 10:16:08 -0800737 Don't use spaces around the '=' sign when used to indicate a
738 keyword argument or a default parameter value.
739
740 Okay: def complex(real, imag=0.0):
741 Okay: return magic(r=real, i=imag)
742 Okay: boolean(a == b)
743 Okay: boolean(a != b)
744 Okay: boolean(a <= b)
745 Okay: boolean(a >= b)
746
747 E251: def complex(real, imag = 0.0):
748 E251: return magic(r = real, i = imag)
749 """
750 parens = 0
751 no_space = False
752 prev_end = None
753 message = "E251 unexpected spaces around keyword / parameter equals"
754 for token_type, text, start, end, line in tokens:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700755 if token_type == tokenize.NL:
756 continue
Tor Norbye3a2425a2013-11-04 10:16:08 -0800757 if no_space:
758 no_space = False
759 if start != prev_end:
760 yield (prev_end, message)
761 elif token_type == tokenize.OP:
762 if text == '(':
763 parens += 1
764 elif text == ')':
765 parens -= 1
766 elif parens and text == '=':
767 no_space = True
768 if start != prev_end:
769 yield (prev_end, message)
770 prev_end = end
771
772
Tor Norbye2e5965e2014-07-25 12:24:15 -0700773def whitespace_before_comment(logical_line, tokens):
774 r"""Separate inline comments by at least two spaces.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800775
776 An inline comment is a comment on the same line as a statement. Inline
777 comments should be separated by at least two spaces from the statement.
778 They should start with a # and a single space.
779
Tor Norbye2e5965e2014-07-25 12:24:15 -0700780 Each line of a block comment starts with a # and a single space
781 (unless it is indented text inside the comment).
782
Tor Norbye3a2425a2013-11-04 10:16:08 -0800783 Okay: x = x + 1 # Increment x
784 Okay: x = x + 1 # Increment x
Tor Norbye2e5965e2014-07-25 12:24:15 -0700785 Okay: # Block comment
Tor Norbye3a2425a2013-11-04 10:16:08 -0800786 E261: x = x + 1 # Increment x
787 E262: x = x + 1 #Increment x
788 E262: x = x + 1 # Increment x
Tor Norbye2e5965e2014-07-25 12:24:15 -0700789 E265: #Block comment
Tor Norbye3a2425a2013-11-04 10:16:08 -0800790 """
791 prev_end = (0, 0)
792 for token_type, text, start, end, line in tokens:
793 if token_type == tokenize.COMMENT:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700794 inline_comment = line[:start[1]].strip()
795 if inline_comment:
796 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
797 yield (prev_end,
798 "E261 at least two spaces before inline comment")
Tor Norbye3a2425a2013-11-04 10:16:08 -0800799 symbol, sp, comment = text.partition(' ')
Tor Norbye2e5965e2014-07-25 12:24:15 -0700800 bad_prefix = symbol not in ('#', '#:')
801 if inline_comment:
802 if bad_prefix or comment[:1].isspace():
803 yield start, "E262 inline comment should start with '# '"
804 elif bad_prefix:
805 if text.rstrip('#') and (start[0] > 1 or symbol[1] != '!'):
806 yield start, "E265 block comment should start with '# '"
Tor Norbye3a2425a2013-11-04 10:16:08 -0800807 elif token_type != tokenize.NL:
808 prev_end = end
809
810
811def imports_on_separate_lines(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700812 r"""Imports should usually be on separate lines.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800813
814 Okay: import os\nimport sys
815 E401: import sys, os
816
817 Okay: from subprocess import Popen, PIPE
818 Okay: from myclas import MyClass
819 Okay: from foo.bar.yourclass import YourClass
820 Okay: import myclass
821 Okay: import foo.bar.yourclass
822 """
823 line = logical_line
824 if line.startswith('import '):
825 found = line.find(',')
826 if -1 < found and ';' not in line[:found]:
827 yield found, "E401 multiple imports on one line"
828
829
830def compound_statements(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700831 r"""Compound statements (on the same line) are generally discouraged.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800832
833 While sometimes it's okay to put an if/for/while with a small body
Tor Norbye2e5965e2014-07-25 12:24:15 -0700834 on the same line, never do this for multi-clause statements.
835 Also avoid folding such long lines!
Tor Norbye3a2425a2013-11-04 10:16:08 -0800836
837 Okay: if foo == 'blah':\n do_blah_thing()
838 Okay: do_one()
839 Okay: do_two()
840 Okay: do_three()
841
842 E701: if foo == 'blah': do_blah_thing()
843 E701: for x in lst: total += x
844 E701: while t < 10: t = delay()
845 E701: if foo == 'blah': do_blah_thing()
846 E701: else: do_non_blah_thing()
847 E701: try: something()
848 E701: finally: cleanup()
849 E701: if foo == 'blah': one(); two(); three()
850
851 E702: do_one(); do_two(); do_three()
852 E703: do_four(); # useless semicolon
853 """
854 line = logical_line
855 last_char = len(line) - 1
856 found = line.find(':')
Tor Norbye2e5965e2014-07-25 12:24:15 -0700857 while -1 < found < last_char:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800858 before = line[:found]
859 if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
860 before.count('[') <= before.count(']') and # [1:2] (slice)
861 before.count('(') <= before.count(')') and # (Python 3 annotation)
862 not LAMBDA_REGEX.search(before)): # lambda x: x
863 yield found, "E701 multiple statements on one line (colon)"
Tor Norbye2e5965e2014-07-25 12:24:15 -0700864 found = line.find(':', found + 1)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800865 found = line.find(';')
Tor Norbye2e5965e2014-07-25 12:24:15 -0700866 while -1 < found:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800867 if found < last_char:
868 yield found, "E702 multiple statements on one line (semicolon)"
869 else:
870 yield found, "E703 statement ends with a semicolon"
Tor Norbye2e5965e2014-07-25 12:24:15 -0700871 found = line.find(';', found + 1)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800872
873
874def explicit_line_join(logical_line, tokens):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700875 r"""Avoid explicit line join between brackets.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800876
877 The preferred way of wrapping long lines is by using Python's implied line
878 continuation inside parentheses, brackets and braces. Long lines can be
879 broken over multiple lines by wrapping expressions in parentheses. These
880 should be used in preference to using a backslash for line continuation.
881
882 E502: aaa = [123, \\n 123]
883 E502: aaa = ("bbb " \\n "ccc")
884
885 Okay: aaa = [123,\n 123]
886 Okay: aaa = ("bbb "\n "ccc")
887 Okay: aaa = "bbb " \\n "ccc"
888 """
889 prev_start = prev_end = parens = 0
890 for token_type, text, start, end, line in tokens:
891 if start[0] != prev_start and parens and backslash:
892 yield backslash, "E502 the backslash is redundant between brackets"
893 if end[0] != prev_end:
894 if line.rstrip('\r\n').endswith('\\'):
895 backslash = (end[0], len(line.splitlines()[-1]) - 1)
896 else:
897 backslash = None
898 prev_start = prev_end = end[0]
899 else:
900 prev_start = start[0]
901 if token_type == tokenize.OP:
902 if text in '([{':
903 parens += 1
904 elif text in ')]}':
905 parens -= 1
906
907
Tor Norbye2e5965e2014-07-25 12:24:15 -0700908def comparison_to_singleton(logical_line, noqa):
909 r"""Comparison to singletons should use "is" or "is not".
910
Tor Norbye3a2425a2013-11-04 10:16:08 -0800911 Comparisons to singletons like None should always be done
912 with "is" or "is not", never the equality operators.
913
914 Okay: if arg is not None:
915 E711: if arg != None:
916 E712: if arg == True:
917
918 Also, beware of writing if x when you really mean if x is not None --
919 e.g. when testing whether a variable or argument that defaults to None was
920 set to some other value. The other value might have a type (such as a
921 container) that could be false in a boolean context!
922 """
Tor Norbye2e5965e2014-07-25 12:24:15 -0700923 match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800924 if match:
925 same = (match.group(1) == '==')
926 singleton = match.group(2)
927 msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
928 if singleton in ('None',):
929 code = 'E711'
930 else:
931 code = 'E712'
932 nonzero = ((singleton == 'True' and same) or
933 (singleton == 'False' and not same))
934 msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
935 yield match.start(1), ("%s comparison to %s should be %s" %
936 (code, singleton, msg))
937
938
Tor Norbye2e5965e2014-07-25 12:24:15 -0700939def comparison_negative(logical_line):
940 r"""Negative comparison should be done using "not in" and "is not".
941
942 Okay: if x not in y:\n pass
943 Okay: assert (X in Y or X is Z)
944 Okay: if not (X in Y):\n pass
945 Okay: zz = x is not y
946 E713: Z = not X in Y
947 E713: if not X.B in Y:\n pass
948 E714: if not X is Y:\n pass
949 E714: Z = not X.B is Y
Tor Norbye3a2425a2013-11-04 10:16:08 -0800950 """
Tor Norbye2e5965e2014-07-25 12:24:15 -0700951 match = COMPARE_NEGATIVE_REGEX.search(logical_line)
952 if match:
953 pos = match.start(1)
954 if match.group(2) == 'in':
955 yield pos, "E713 test for membership should be 'not in'"
956 else:
957 yield pos, "E714 test for object identity should be 'is not'"
958
959
960def comparison_type(logical_line):
961 r"""Object type comparisons should always use isinstance().
962
963 Do not compare types directly.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800964
965 Okay: if isinstance(obj, int):
966 E721: if type(obj) is type(1):
967
968 When checking if an object is a string, keep in mind that it might be a
969 unicode string too! In Python 2.3, str and unicode have a common base
970 class, basestring, so you can do:
971
972 Okay: if isinstance(obj, basestring):
973 Okay: if type(a1) is type(b1):
974 """
975 match = COMPARE_TYPE_REGEX.search(logical_line)
976 if match:
977 inst = match.group(1)
978 if inst and isidentifier(inst) and inst not in SINGLETONS:
979 return # Allow comparison for types which are not obvious
Tor Norbye2e5965e2014-07-25 12:24:15 -0700980 yield match.start(), "E721 do not compare types, use 'isinstance()'"
Tor Norbye3a2425a2013-11-04 10:16:08 -0800981
982
Tor Norbye2e5965e2014-07-25 12:24:15 -0700983def python_3000_has_key(logical_line, noqa):
984 r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800985
986 Okay: if "alph" in d:\n print d["alph"]
987 W601: assert d.has_key('alph')
988 """
989 pos = logical_line.find('.has_key(')
Tor Norbye2e5965e2014-07-25 12:24:15 -0700990 if pos > -1 and not noqa:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800991 yield pos, "W601 .has_key() is deprecated, use 'in'"
992
993
994def python_3000_raise_comma(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700995 r"""When raising an exception, use "raise ValueError('message')".
Tor Norbye3a2425a2013-11-04 10:16:08 -0800996
Tor Norbye2e5965e2014-07-25 12:24:15 -0700997 The older form is removed in Python 3.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800998
999 Okay: raise DummyError("Message")
1000 W602: raise DummyError, "Message"
1001 """
1002 match = RAISE_COMMA_REGEX.match(logical_line)
1003 if match and not RERAISE_COMMA_REGEX.match(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001004 yield match.end() - 1, "W602 deprecated form of raising exception"
Tor Norbye3a2425a2013-11-04 10:16:08 -08001005
1006
1007def python_3000_not_equal(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001008 r"""New code should always use != instead of <>.
1009
Tor Norbye3a2425a2013-11-04 10:16:08 -08001010 The older syntax is removed in Python 3.
1011
1012 Okay: if a != 'no':
1013 W603: if a <> 'no':
1014 """
1015 pos = logical_line.find('<>')
1016 if pos > -1:
1017 yield pos, "W603 '<>' is deprecated, use '!='"
1018
1019
1020def python_3000_backticks(logical_line):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001021 r"""Backticks are removed in Python 3: use repr() instead.
Tor Norbye3a2425a2013-11-04 10:16:08 -08001022
1023 Okay: val = repr(1 + 2)
1024 W604: val = `1 + 2`
1025 """
1026 pos = logical_line.find('`')
1027 if pos > -1:
1028 yield pos, "W604 backticks are deprecated, use 'repr()'"
1029
1030
1031##############################################################################
1032# Helper functions
1033##############################################################################
1034
1035
1036if '' == ''.encode():
1037 # Python 2: implicit encoding.
1038 def readlines(filename):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001039 """Read the source code."""
1040 with open(filename, 'rU') as f:
Tor Norbye3a2425a2013-11-04 10:16:08 -08001041 return f.readlines()
Tor Norbye3a2425a2013-11-04 10:16:08 -08001042 isidentifier = re.compile(r'[a-zA-Z_]\w*').match
1043 stdin_get_value = sys.stdin.read
1044else:
1045 # Python 3
1046 def readlines(filename):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001047 """Read the source code."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001048 try:
Tor Norbye2e5965e2014-07-25 12:24:15 -07001049 with open(filename, 'rb') as f:
1050 (coding, lines) = tokenize.detect_encoding(f.readline)
1051 f = TextIOWrapper(f, coding, line_buffering=True)
1052 return [l.decode(coding) for l in lines] + f.readlines()
Tor Norbye3a2425a2013-11-04 10:16:08 -08001053 except (LookupError, SyntaxError, UnicodeError):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001054 # Fall back if file encoding is improperly declared
1055 with open(filename, encoding='latin-1') as f:
1056 return f.readlines()
Tor Norbye3a2425a2013-11-04 10:16:08 -08001057 isidentifier = str.isidentifier
1058
1059 def stdin_get_value():
1060 return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
Tor Norbye3a2425a2013-11-04 10:16:08 -08001061noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
1062
1063
1064def expand_indent(line):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001065 r"""Return the amount of indentation.
1066
Tor Norbye3a2425a2013-11-04 10:16:08 -08001067 Tabs are expanded to the next multiple of 8.
1068
1069 >>> expand_indent(' ')
1070 4
1071 >>> expand_indent('\t')
1072 8
Tor Norbye3a2425a2013-11-04 10:16:08 -08001073 >>> expand_indent(' \t')
1074 8
1075 >>> expand_indent(' \t')
1076 16
1077 """
1078 if '\t' not in line:
1079 return len(line) - len(line.lstrip())
1080 result = 0
1081 for char in line:
1082 if char == '\t':
1083 result = result // 8 * 8 + 8
1084 elif char == ' ':
1085 result += 1
1086 else:
1087 break
1088 return result
1089
1090
1091def mute_string(text):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001092 """Replace contents with 'xxx' to prevent syntax matching.
Tor Norbye3a2425a2013-11-04 10:16:08 -08001093
1094 >>> mute_string('"abc"')
1095 '"xxx"'
1096 >>> mute_string("'''abc'''")
1097 "'''xxx'''"
1098 >>> mute_string("r'abc'")
1099 "r'xxx'"
1100 """
1101 # String modifiers (e.g. u or r)
1102 start = text.index(text[-1]) + 1
1103 end = len(text) - 1
1104 # Triple quotes
1105 if text[-3:] in ('"""', "'''"):
1106 start += 2
1107 end -= 2
1108 return text[:start] + 'x' * (end - start) + text[end:]
1109
1110
1111def parse_udiff(diff, patterns=None, parent='.'):
1112 """Return a dictionary of matching lines."""
1113 # For each file of the diff, the entry key is the filename,
1114 # and the value is a set of row numbers to consider.
1115 rv = {}
1116 path = nrows = None
1117 for line in diff.splitlines():
1118 if nrows:
1119 if line[:1] != '-':
1120 nrows -= 1
1121 continue
1122 if line[:3] == '@@ ':
1123 hunk_match = HUNK_REGEX.match(line)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001124 (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001125 rv[path].update(range(row, row + nrows))
1126 elif line[:3] == '+++':
1127 path = line[4:].split('\t', 1)[0]
1128 if path[:2] == 'b/':
1129 path = path[2:]
1130 rv[path] = set()
1131 return dict([(os.path.join(parent, path), rows)
1132 for (path, rows) in rv.items()
1133 if rows and filename_match(path, patterns)])
1134
1135
Tor Norbye2e5965e2014-07-25 12:24:15 -07001136def normalize_paths(value, parent=os.curdir):
1137 """Parse a comma-separated list of paths.
1138
1139 Return a list of absolute paths.
Tor Norbye3a2425a2013-11-04 10:16:08 -08001140 """
Tor Norbye2e5965e2014-07-25 12:24:15 -07001141 if not value or isinstance(value, list):
1142 return value
1143 paths = []
1144 for path in value.split(','):
1145 if '/' in path:
1146 path = os.path.abspath(os.path.join(parent, path))
1147 paths.append(path.rstrip('/'))
1148 return paths
1149
1150
1151def filename_match(filename, patterns, default=True):
1152 """Check if patterns contains a pattern that matches filename.
1153
Tor Norbye3a2425a2013-11-04 10:16:08 -08001154 If patterns is unspecified, this always returns True.
1155 """
1156 if not patterns:
1157 return default
1158 return any(fnmatch(filename, pattern) for pattern in patterns)
1159
1160
Tor Norbye2e5965e2014-07-25 12:24:15 -07001161if COMMENT_WITH_NL:
1162 def _is_eol_token(token):
1163 return (token[0] in NEWLINE or
1164 (token[0] == tokenize.COMMENT and token[1] == token[4]))
1165else:
1166 def _is_eol_token(token):
1167 return token[0] in NEWLINE
1168
1169
Tor Norbye3a2425a2013-11-04 10:16:08 -08001170##############################################################################
1171# Framework to run all checks
1172##############################################################################
1173
1174
1175_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1176
1177
1178def register_check(check, codes=None):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001179 """Register a new check object."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001180 def _add_check(check, kind, codes, args):
1181 if check in _checks[kind]:
1182 _checks[kind][check][0].extend(codes or [])
1183 else:
1184 _checks[kind][check] = (codes or [''], args)
1185 if inspect.isfunction(check):
1186 args = inspect.getargspec(check)[0]
1187 if args and args[0] in ('physical_line', 'logical_line'):
1188 if codes is None:
1189 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
1190 _add_check(check, args[0], codes, args)
1191 elif inspect.isclass(check):
1192 if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
1193 _add_check(check, 'tree', codes, None)
1194
1195
1196def init_checks_registry():
Tor Norbye2e5965e2014-07-25 12:24:15 -07001197 """Register all globally visible functions.
1198
1199 The first argument name is either 'physical_line' or 'logical_line'.
Tor Norbye3a2425a2013-11-04 10:16:08 -08001200 """
1201 mod = inspect.getmodule(register_check)
1202 for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1203 register_check(function)
1204init_checks_registry()
1205
1206
1207class Checker(object):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001208 """Load a Python source file, tokenize it, check coding style."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001209
1210 def __init__(self, filename=None, lines=None,
1211 options=None, report=None, **kwargs):
1212 if options is None:
1213 options = StyleGuide(kwargs).options
1214 else:
1215 assert not kwargs
1216 self._io_error = None
1217 self._physical_checks = options.physical_checks
1218 self._logical_checks = options.logical_checks
1219 self._ast_checks = options.ast_checks
1220 self.max_line_length = options.max_line_length
Tor Norbye2e5965e2014-07-25 12:24:15 -07001221 self.multiline = False # in a multiline string?
1222 self.hang_closing = options.hang_closing
Tor Norbye3a2425a2013-11-04 10:16:08 -08001223 self.verbose = options.verbose
1224 self.filename = filename
1225 if filename is None:
1226 self.filename = 'stdin'
1227 self.lines = lines or []
1228 elif filename == '-':
1229 self.filename = 'stdin'
1230 self.lines = stdin_get_value().splitlines(True)
1231 elif lines is None:
1232 try:
1233 self.lines = readlines(filename)
1234 except IOError:
Tor Norbye2e5965e2014-07-25 12:24:15 -07001235 (exc_type, exc) = sys.exc_info()[:2]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001236 self._io_error = '%s: %s' % (exc_type.__name__, exc)
1237 self.lines = []
1238 else:
1239 self.lines = lines
Tor Norbye2e5965e2014-07-25 12:24:15 -07001240 if self.lines:
1241 ord0 = ord(self.lines[0][0])
1242 if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM
1243 if ord0 == 0xfeff:
1244 self.lines[0] = self.lines[0][1:]
1245 elif self.lines[0][:3] == '\xef\xbb\xbf':
1246 self.lines[0] = self.lines[0][3:]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001247 self.report = report or options.report
1248 self.report_error = self.report.error
1249
1250 def report_invalid_syntax(self):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001251 """Check if the syntax is valid."""
1252 (exc_type, exc) = sys.exc_info()[:2]
1253 if len(exc.args) > 1:
1254 offset = exc.args[1]
1255 if len(offset) > 2:
1256 offset = offset[1:3]
1257 else:
1258 offset = (1, 0)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001259 self.report_error(offset[0], offset[1] or 0,
1260 'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
1261 self.report_invalid_syntax)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001262
1263 def readline(self):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001264 """Get the next line from the input buffer."""
1265 if self.line_number >= self.total_lines:
Tor Norbye3a2425a2013-11-04 10:16:08 -08001266 return ''
Tor Norbye2e5965e2014-07-25 12:24:15 -07001267 line = self.lines[self.line_number]
1268 self.line_number += 1
1269 if self.indent_char is None and line[:1] in WHITESPACE:
1270 self.indent_char = line[0]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001271 return line
1272
1273 def run_check(self, check, argument_names):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001274 """Run a check plugin."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001275 arguments = []
1276 for name in argument_names:
1277 arguments.append(getattr(self, name))
1278 return check(*arguments)
1279
1280 def check_physical(self, line):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001281 """Run all physical checks on a raw input line."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001282 self.physical_line = line
Tor Norbye3a2425a2013-11-04 10:16:08 -08001283 for name, check, argument_names in self._physical_checks:
1284 result = self.run_check(check, argument_names)
1285 if result is not None:
Tor Norbye2e5965e2014-07-25 12:24:15 -07001286 (offset, text) = result
Tor Norbye3a2425a2013-11-04 10:16:08 -08001287 self.report_error(self.line_number, offset, text, check)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001288 if text[:4] == 'E101':
1289 self.indent_char = line[0]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001290
1291 def build_tokens_line(self):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001292 """Build a logical line from tokens."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001293 logical = []
Tor Norbye2e5965e2014-07-25 12:24:15 -07001294 comments = []
Tor Norbye3a2425a2013-11-04 10:16:08 -08001295 length = 0
Tor Norbye2e5965e2014-07-25 12:24:15 -07001296 prev_row = prev_col = mapping = None
1297 for token_type, text, start, end, line in self.tokens:
Tor Norbye3a2425a2013-11-04 10:16:08 -08001298 if token_type in SKIP_TOKENS:
1299 continue
Tor Norbye2e5965e2014-07-25 12:24:15 -07001300 if not mapping:
1301 mapping = [(0, start)]
1302 if token_type == tokenize.COMMENT:
1303 comments.append(text)
1304 continue
Tor Norbye3a2425a2013-11-04 10:16:08 -08001305 if token_type == tokenize.STRING:
1306 text = mute_string(text)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001307 if prev_row:
1308 (start_row, start_col) = start
1309 if prev_row != start_row: # different row
1310 prev_text = self.lines[prev_row - 1][prev_col - 1]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001311 if prev_text == ',' or (prev_text not in '{[('
1312 and text not in '}])'):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001313 text = ' ' + text
1314 elif prev_col != start_col: # different column
1315 text = line[prev_col:start_col] + text
Tor Norbye3a2425a2013-11-04 10:16:08 -08001316 logical.append(text)
1317 length += len(text)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001318 mapping.append((length, end))
1319 (prev_row, prev_col) = end
Tor Norbye3a2425a2013-11-04 10:16:08 -08001320 self.logical_line = ''.join(logical)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001321 self.noqa = comments and noqa(''.join(comments))
1322 return mapping
Tor Norbye3a2425a2013-11-04 10:16:08 -08001323
1324 def check_logical(self):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001325 """Build a line from tokens and run all logical checks on it."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001326 self.report.increment_logical_line()
Tor Norbye2e5965e2014-07-25 12:24:15 -07001327 mapping = self.build_tokens_line()
1328 (start_row, start_col) = mapping[0][1]
1329 start_line = self.lines[start_row - 1]
1330 self.indent_level = expand_indent(start_line[:start_col])
1331 if self.blank_before < self.blank_lines:
1332 self.blank_before = self.blank_lines
Tor Norbye3a2425a2013-11-04 10:16:08 -08001333 if self.verbose >= 2:
1334 print(self.logical_line[:80].rstrip())
1335 for name, check, argument_names in self._logical_checks:
1336 if self.verbose >= 4:
1337 print(' ' + name)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001338 for offset, text in self.run_check(check, argument_names) or ():
1339 if not isinstance(offset, tuple):
1340 for token_offset, pos in mapping:
1341 if offset <= token_offset:
1342 break
1343 offset = (pos[0], pos[1] + offset - token_offset)
1344 self.report_error(offset[0], offset[1], text, check)
1345 if self.logical_line:
1346 self.previous_indent_level = self.indent_level
1347 self.previous_logical = self.logical_line
1348 self.blank_lines = 0
1349 self.tokens = []
Tor Norbye3a2425a2013-11-04 10:16:08 -08001350
1351 def check_ast(self):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001352 """Build the file's AST and run all AST checks."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001353 try:
1354 tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001355 except (SyntaxError, TypeError):
Tor Norbye3a2425a2013-11-04 10:16:08 -08001356 return self.report_invalid_syntax()
Tor Norbye2e5965e2014-07-25 12:24:15 -07001357 for name, cls, __ in self._ast_checks:
Tor Norbye3a2425a2013-11-04 10:16:08 -08001358 checker = cls(tree, self.filename)
1359 for lineno, offset, text, check in checker.run():
Tor Norbye2e5965e2014-07-25 12:24:15 -07001360 if not self.lines or not noqa(self.lines[lineno - 1]):
Tor Norbye3a2425a2013-11-04 10:16:08 -08001361 self.report_error(lineno, offset, text, check)
1362
1363 def generate_tokens(self):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001364 """Tokenize the file, run physical line checks and yield tokens."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001365 if self._io_error:
1366 self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001367 tokengen = tokenize.generate_tokens(self.readline)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001368 try:
1369 for token in tokengen:
Tor Norbye2e5965e2014-07-25 12:24:15 -07001370 if token[2][0] > self.total_lines:
1371 return
1372 self.maybe_check_physical(token)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001373 yield token
1374 except (SyntaxError, tokenize.TokenError):
1375 self.report_invalid_syntax()
1376
Tor Norbye2e5965e2014-07-25 12:24:15 -07001377 def maybe_check_physical(self, token):
1378 """If appropriate (based on token), check current physical line(s)."""
1379 # Called after every token, but act only on end of line.
1380 if _is_eol_token(token):
1381 # Obviously, a newline token ends a single physical line.
1382 self.check_physical(token[4])
1383 elif token[0] == tokenize.STRING and '\n' in token[1]:
1384 # Less obviously, a string that contains newlines is a
1385 # multiline string, either triple-quoted or with internal
1386 # newlines backslash-escaped. Check every physical line in the
1387 # string *except* for the last one: its newline is outside of
1388 # the multiline string, so we consider it a regular physical
1389 # line, and will check it like any other physical line.
1390 #
1391 # Subtleties:
1392 # - we don't *completely* ignore the last line; if it contains
1393 # the magical "# noqa" comment, we disable all physical
1394 # checks for the entire multiline string
1395 # - have to wind self.line_number back because initially it
1396 # points to the last line of the string, and we want
1397 # check_physical() to give accurate feedback
1398 if noqa(token[4]):
1399 return
1400 self.multiline = True
1401 self.line_number = token[2][0]
1402 for line in token[1].split('\n')[:-1]:
1403 self.check_physical(line + '\n')
1404 self.line_number += 1
1405 self.multiline = False
1406
Tor Norbye3a2425a2013-11-04 10:16:08 -08001407 def check_all(self, expected=None, line_offset=0):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001408 """Run all checks on the input file."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001409 self.report.init_file(self.filename, self.lines, expected, line_offset)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001410 self.total_lines = len(self.lines)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001411 if self._ast_checks:
1412 self.check_ast()
1413 self.line_number = 0
1414 self.indent_char = None
Tor Norbye2e5965e2014-07-25 12:24:15 -07001415 self.indent_level = self.previous_indent_level = 0
Tor Norbye3a2425a2013-11-04 10:16:08 -08001416 self.previous_logical = ''
1417 self.tokens = []
Tor Norbye2e5965e2014-07-25 12:24:15 -07001418 self.blank_lines = self.blank_before = 0
Tor Norbye3a2425a2013-11-04 10:16:08 -08001419 parens = 0
1420 for token in self.generate_tokens():
1421 self.tokens.append(token)
1422 token_type, text = token[0:2]
1423 if self.verbose >= 3:
1424 if token[2][0] == token[3][0]:
1425 pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
1426 else:
1427 pos = 'l.%s' % token[3][0]
1428 print('l.%s\t%s\t%s\t%r' %
1429 (token[2][0], pos, tokenize.tok_name[token[0]], text))
1430 if token_type == tokenize.OP:
1431 if text in '([{':
1432 parens += 1
1433 elif text in '}])':
1434 parens -= 1
1435 elif not parens:
Tor Norbye2e5965e2014-07-25 12:24:15 -07001436 if token_type in NEWLINE:
1437 if token_type == tokenize.NEWLINE:
1438 self.check_logical()
1439 self.blank_before = 0
1440 elif len(self.tokens) == 1:
Tor Norbye3a2425a2013-11-04 10:16:08 -08001441 # The physical line contains only this token.
1442 self.blank_lines += 1
Tor Norbye2e5965e2014-07-25 12:24:15 -07001443 del self.tokens[0]
1444 else:
1445 self.check_logical()
1446 elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
1447 if len(self.tokens) == 1:
Tor Norbye3a2425a2013-11-04 10:16:08 -08001448 # The comment also ends a physical line
Tor Norbye2e5965e2014-07-25 12:24:15 -07001449 token = list(token)
1450 token[1] = text.rstrip('\r\n')
1451 token[3] = (token[2][0], token[2][1] + len(token[1]))
1452 self.tokens = [tuple(token)]
1453 self.check_logical()
1454 if self.tokens:
1455 self.check_physical(self.lines[-1])
1456 self.check_logical()
Tor Norbye3a2425a2013-11-04 10:16:08 -08001457 return self.report.get_file_results()
1458
1459
1460class BaseReport(object):
1461 """Collect the results of the checks."""
Tor Norbye2e5965e2014-07-25 12:24:15 -07001462
Tor Norbye3a2425a2013-11-04 10:16:08 -08001463 print_filename = False
1464
1465 def __init__(self, options):
1466 self._benchmark_keys = options.benchmark_keys
1467 self._ignore_code = options.ignore_code
1468 # Results
1469 self.elapsed = 0
1470 self.total_errors = 0
1471 self.counters = dict.fromkeys(self._benchmark_keys, 0)
1472 self.messages = {}
1473
1474 def start(self):
1475 """Start the timer."""
1476 self._start_time = time.time()
1477
1478 def stop(self):
1479 """Stop the timer."""
1480 self.elapsed = time.time() - self._start_time
1481
1482 def init_file(self, filename, lines, expected, line_offset):
1483 """Signal a new file."""
1484 self.filename = filename
1485 self.lines = lines
1486 self.expected = expected or ()
1487 self.line_offset = line_offset
1488 self.file_errors = 0
1489 self.counters['files'] += 1
1490 self.counters['physical lines'] += len(lines)
1491
1492 def increment_logical_line(self):
1493 """Signal a new logical line."""
1494 self.counters['logical lines'] += 1
1495
1496 def error(self, line_number, offset, text, check):
1497 """Report an error, according to options."""
1498 code = text[:4]
1499 if self._ignore_code(code):
1500 return
1501 if code in self.counters:
1502 self.counters[code] += 1
1503 else:
1504 self.counters[code] = 1
1505 self.messages[code] = text[5:]
1506 # Don't care about expected errors or warnings
1507 if code in self.expected:
1508 return
1509 if self.print_filename and not self.file_errors:
1510 print(self.filename)
1511 self.file_errors += 1
1512 self.total_errors += 1
1513 return code
1514
1515 def get_file_results(self):
1516 """Return the count of errors and warnings for this file."""
1517 return self.file_errors
1518
1519 def get_count(self, prefix=''):
1520 """Return the total count of errors and warnings."""
1521 return sum([self.counters[key]
1522 for key in self.messages if key.startswith(prefix)])
1523
1524 def get_statistics(self, prefix=''):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001525 """Get statistics for message codes that start with the prefix.
Tor Norbye3a2425a2013-11-04 10:16:08 -08001526
1527 prefix='' matches all errors and warnings
1528 prefix='E' matches all errors
1529 prefix='W' matches all warnings
1530 prefix='E4' matches all errors that have to do with imports
1531 """
1532 return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
1533 for key in sorted(self.messages) if key.startswith(prefix)]
1534
1535 def print_statistics(self, prefix=''):
1536 """Print overall statistics (number of errors and warnings)."""
1537 for line in self.get_statistics(prefix):
1538 print(line)
1539
1540 def print_benchmark(self):
1541 """Print benchmark numbers."""
1542 print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
1543 if self.elapsed:
1544 for key in self._benchmark_keys:
1545 print('%-7d %s per second (%d total)' %
1546 (self.counters[key] / self.elapsed, key,
1547 self.counters[key]))
1548
1549
1550class FileReport(BaseReport):
1551 """Collect the results of the checks and print only the filenames."""
1552 print_filename = True
1553
1554
1555class StandardReport(BaseReport):
1556 """Collect and print the results of the checks."""
1557
1558 def __init__(self, options):
1559 super(StandardReport, self).__init__(options)
1560 self._fmt = REPORT_FORMAT.get(options.format.lower(),
1561 options.format)
1562 self._repeat = options.repeat
1563 self._show_source = options.show_source
1564 self._show_pep8 = options.show_pep8
1565
1566 def init_file(self, filename, lines, expected, line_offset):
1567 """Signal a new file."""
1568 self._deferred_print = []
1569 return super(StandardReport, self).init_file(
1570 filename, lines, expected, line_offset)
1571
1572 def error(self, line_number, offset, text, check):
1573 """Report an error, according to options."""
1574 code = super(StandardReport, self).error(line_number, offset,
1575 text, check)
1576 if code and (self.counters[code] == 1 or self._repeat):
1577 self._deferred_print.append(
1578 (line_number, offset, code, text[5:], check.__doc__))
1579 return code
1580
1581 def get_file_results(self):
1582 """Print the result and return the overall count for this file."""
1583 self._deferred_print.sort()
1584 for line_number, offset, code, text, doc in self._deferred_print:
1585 print(self._fmt % {
1586 'path': self.filename,
1587 'row': self.line_offset + line_number, 'col': offset + 1,
1588 'code': code, 'text': text,
1589 })
1590 if self._show_source:
1591 if line_number > len(self.lines):
1592 line = ''
1593 else:
1594 line = self.lines[line_number - 1]
1595 print(line.rstrip())
Tor Norbye2e5965e2014-07-25 12:24:15 -07001596 print(re.sub(r'\S', ' ', line[:offset]) + '^')
Tor Norbye3a2425a2013-11-04 10:16:08 -08001597 if self._show_pep8 and doc:
Tor Norbye2e5965e2014-07-25 12:24:15 -07001598 print(' ' + doc.strip())
Tor Norbye3a2425a2013-11-04 10:16:08 -08001599 return self.file_errors
1600
1601
1602class DiffReport(StandardReport):
1603 """Collect and print the results for the changed lines only."""
1604
1605 def __init__(self, options):
1606 super(DiffReport, self).__init__(options)
1607 self._selected = options.selected_lines
1608
1609 def error(self, line_number, offset, text, check):
1610 if line_number not in self._selected[self.filename]:
1611 return
1612 return super(DiffReport, self).error(line_number, offset, text, check)
1613
1614
1615class StyleGuide(object):
1616 """Initialize a PEP-8 instance with few options."""
1617
1618 def __init__(self, *args, **kwargs):
1619 # build options from the command line
1620 self.checker_class = kwargs.pop('checker_class', Checker)
1621 parse_argv = kwargs.pop('parse_argv', False)
1622 config_file = kwargs.pop('config_file', None)
1623 parser = kwargs.pop('parser', None)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001624 # build options from dict
1625 options_dict = dict(*args, **kwargs)
1626 arglist = None if parse_argv else options_dict.get('paths', None)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001627 options, self.paths = process_options(
Tor Norbye2e5965e2014-07-25 12:24:15 -07001628 arglist, parse_argv, config_file, parser)
1629 if options_dict:
Tor Norbye3a2425a2013-11-04 10:16:08 -08001630 options.__dict__.update(options_dict)
1631 if 'paths' in options_dict:
1632 self.paths = options_dict['paths']
1633
1634 self.runner = self.input_file
1635 self.options = options
1636
1637 if not options.reporter:
1638 options.reporter = BaseReport if options.quiet else StandardReport
1639
Tor Norbye3a2425a2013-11-04 10:16:08 -08001640 options.select = tuple(options.select or ())
Tor Norbye2e5965e2014-07-25 12:24:15 -07001641 if not (options.select or options.ignore or
1642 options.testsuite or options.doctest) and DEFAULT_IGNORE:
1643 # The default choice: ignore controversial checks
1644 options.ignore = tuple(DEFAULT_IGNORE.split(','))
1645 else:
1646 # Ignore all checks which are not explicitly selected
1647 options.ignore = ('',) if options.select else tuple(options.ignore)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001648 options.benchmark_keys = BENCHMARK_KEYS[:]
1649 options.ignore_code = self.ignore_code
1650 options.physical_checks = self.get_checks('physical_line')
1651 options.logical_checks = self.get_checks('logical_line')
1652 options.ast_checks = self.get_checks('tree')
1653 self.init_report()
1654
1655 def init_report(self, reporter=None):
1656 """Initialize the report instance."""
1657 self.options.report = (reporter or self.options.reporter)(self.options)
1658 return self.options.report
1659
1660 def check_files(self, paths=None):
1661 """Run all checks on the paths."""
1662 if paths is None:
1663 paths = self.paths
1664 report = self.options.report
1665 runner = self.runner
1666 report.start()
1667 try:
1668 for path in paths:
1669 if os.path.isdir(path):
1670 self.input_dir(path)
1671 elif not self.excluded(path):
1672 runner(path)
1673 except KeyboardInterrupt:
1674 print('... stopped')
1675 report.stop()
1676 return report
1677
1678 def input_file(self, filename, lines=None, expected=None, line_offset=0):
1679 """Run all checks on a Python source file."""
1680 if self.options.verbose:
1681 print('checking %s' % filename)
1682 fchecker = self.checker_class(
1683 filename, lines=lines, options=self.options)
1684 return fchecker.check_all(expected=expected, line_offset=line_offset)
1685
1686 def input_dir(self, dirname):
1687 """Check all files in this directory and all subdirectories."""
1688 dirname = dirname.rstrip('/')
1689 if self.excluded(dirname):
1690 return 0
1691 counters = self.options.report.counters
1692 verbose = self.options.verbose
1693 filepatterns = self.options.filename
1694 runner = self.runner
1695 for root, dirs, files in os.walk(dirname):
1696 if verbose:
1697 print('directory ' + root)
1698 counters['directories'] += 1
1699 for subdir in sorted(dirs):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001700 if self.excluded(subdir, root):
Tor Norbye3a2425a2013-11-04 10:16:08 -08001701 dirs.remove(subdir)
1702 for filename in sorted(files):
1703 # contain a pattern that matches?
1704 if ((filename_match(filename, filepatterns) and
Tor Norbye2e5965e2014-07-25 12:24:15 -07001705 not self.excluded(filename, root))):
Tor Norbye3a2425a2013-11-04 10:16:08 -08001706 runner(os.path.join(root, filename))
1707
Tor Norbye2e5965e2014-07-25 12:24:15 -07001708 def excluded(self, filename, parent=None):
1709 """Check if the file should be excluded.
1710
1711 Check if 'options.exclude' contains a pattern that matches filename.
Tor Norbye3a2425a2013-11-04 10:16:08 -08001712 """
Tor Norbye2e5965e2014-07-25 12:24:15 -07001713 if not self.options.exclude:
1714 return False
Tor Norbye3a2425a2013-11-04 10:16:08 -08001715 basename = os.path.basename(filename)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001716 if filename_match(basename, self.options.exclude):
1717 return True
1718 if parent:
1719 filename = os.path.join(parent, filename)
1720 filename = os.path.abspath(filename)
1721 return filename_match(filename, self.options.exclude)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001722
1723 def ignore_code(self, code):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001724 """Check if the error code should be ignored.
Tor Norbye3a2425a2013-11-04 10:16:08 -08001725
1726 If 'options.select' contains a prefix of the error code,
1727 return False. Else, if 'options.ignore' contains a prefix of
1728 the error code, return True.
1729 """
Tor Norbye2e5965e2014-07-25 12:24:15 -07001730 if len(code) < 4 and any(s.startswith(code)
1731 for s in self.options.select):
1732 return False
Tor Norbye3a2425a2013-11-04 10:16:08 -08001733 return (code.startswith(self.options.ignore) and
1734 not code.startswith(self.options.select))
1735
1736 def get_checks(self, argument_name):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001737 """Get all the checks for this category.
1738
Tor Norbye3a2425a2013-11-04 10:16:08 -08001739 Find all globally visible functions where the first argument name
1740 starts with argument_name and which contain selected tests.
1741 """
1742 checks = []
1743 for check, attrs in _checks[argument_name].items():
1744 (codes, args) = attrs
1745 if any(not (code and self.ignore_code(code)) for code in codes):
1746 checks.append((check.__name__, check, args))
1747 return sorted(checks)
1748
1749
1750def get_parser(prog='pep8', version=__version__):
1751 parser = OptionParser(prog=prog, version=version,
1752 usage="%prog [options] input ...")
1753 parser.config_options = [
Tor Norbye2e5965e2014-07-25 12:24:15 -07001754 'exclude', 'filename', 'select', 'ignore', 'max-line-length',
1755 'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
1756 'show-source', 'statistics', 'verbose']
Tor Norbye3a2425a2013-11-04 10:16:08 -08001757 parser.add_option('-v', '--verbose', default=0, action='count',
1758 help="print status messages, or debug with -vv")
1759 parser.add_option('-q', '--quiet', default=0, action='count',
1760 help="report only file names, or nothing with -qq")
1761 parser.add_option('-r', '--repeat', default=True, action='store_true',
1762 help="(obsolete) show all occurrences of the same error")
1763 parser.add_option('--first', action='store_false', dest='repeat',
1764 help="show first occurrence of each error")
1765 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1766 help="exclude files or directories which match these "
1767 "comma separated patterns (default: %default)")
1768 parser.add_option('--filename', metavar='patterns', default='*.py',
1769 help="when parsing directories, only check filenames "
1770 "matching these comma separated patterns "
1771 "(default: %default)")
1772 parser.add_option('--select', metavar='errors', default='',
1773 help="select errors and warnings (e.g. E,W6)")
1774 parser.add_option('--ignore', metavar='errors', default='',
1775 help="skip errors and warnings (e.g. E4,W)")
1776 parser.add_option('--show-source', action='store_true',
1777 help="show source code for each error")
1778 parser.add_option('--show-pep8', action='store_true',
1779 help="show text of PEP 8 for each error "
1780 "(implies --first)")
1781 parser.add_option('--statistics', action='store_true',
1782 help="count errors and warnings")
1783 parser.add_option('--count', action='store_true',
1784 help="print total number of errors and warnings "
1785 "to standard error and set exit code to 1 if "
1786 "total is not null")
1787 parser.add_option('--max-line-length', type='int', metavar='n',
1788 default=MAX_LINE_LENGTH,
1789 help="set maximum allowed line length "
1790 "(default: %default)")
Tor Norbye2e5965e2014-07-25 12:24:15 -07001791 parser.add_option('--hang-closing', action='store_true',
1792 help="hang closing bracket instead of matching "
1793 "indentation of opening bracket's line")
Tor Norbye3a2425a2013-11-04 10:16:08 -08001794 parser.add_option('--format', metavar='format', default='default',
1795 help="set the error format [default|pylint|<custom>]")
1796 parser.add_option('--diff', action='store_true',
1797 help="report only lines changed according to the "
1798 "unified diff received on STDIN")
1799 group = parser.add_option_group("Testing Options")
1800 if os.path.exists(TESTSUITE_PATH):
1801 group.add_option('--testsuite', metavar='dir',
1802 help="run regression tests from dir")
1803 group.add_option('--doctest', action='store_true',
1804 help="run doctest on myself")
1805 group.add_option('--benchmark', action='store_true',
1806 help="measure processing speed")
1807 return parser
1808
1809
1810def read_config(options, args, arglist, parser):
1811 """Read both user configuration and local configuration."""
1812 config = RawConfigParser()
1813
1814 user_conf = options.config
1815 if user_conf and os.path.isfile(user_conf):
1816 if options.verbose:
1817 print('user configuration: %s' % user_conf)
1818 config.read(user_conf)
1819
Tor Norbye2e5965e2014-07-25 12:24:15 -07001820 local_dir = os.curdir
Tor Norbye3a2425a2013-11-04 10:16:08 -08001821 parent = tail = args and os.path.abspath(os.path.commonprefix(args))
1822 while tail:
Tor Norbye2e5965e2014-07-25 12:24:15 -07001823 if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
1824 local_dir = parent
1825 if options.verbose:
1826 print('local configuration: in %s' % parent)
1827 break
1828 (parent, tail) = os.path.split(parent)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001829
1830 pep8_section = parser.prog
1831 if config.has_section(pep8_section):
1832 option_list = dict([(o.dest, o.type or o.action)
1833 for o in parser.option_list])
1834
1835 # First, read the default values
Tor Norbye2e5965e2014-07-25 12:24:15 -07001836 (new_options, __) = parser.parse_args([])
Tor Norbye3a2425a2013-11-04 10:16:08 -08001837
1838 # Second, parse the configuration
1839 for opt in config.options(pep8_section):
Tor Norbye2e5965e2014-07-25 12:24:15 -07001840 if opt.replace('_', '-') not in parser.config_options:
1841 print(" unknown option '%s' ignored" % opt)
1842 continue
Tor Norbye3a2425a2013-11-04 10:16:08 -08001843 if options.verbose > 1:
1844 print(" %s = %s" % (opt, config.get(pep8_section, opt)))
Tor Norbye3a2425a2013-11-04 10:16:08 -08001845 normalized_opt = opt.replace('-', '_')
1846 opt_type = option_list[normalized_opt]
1847 if opt_type in ('int', 'count'):
1848 value = config.getint(pep8_section, opt)
1849 elif opt_type == 'string':
1850 value = config.get(pep8_section, opt)
Tor Norbye2e5965e2014-07-25 12:24:15 -07001851 if normalized_opt == 'exclude':
1852 value = normalize_paths(value, local_dir)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001853 else:
1854 assert opt_type in ('store_true', 'store_false')
1855 value = config.getboolean(pep8_section, opt)
1856 setattr(new_options, normalized_opt, value)
1857
1858 # Third, overwrite with the command-line options
Tor Norbye2e5965e2014-07-25 12:24:15 -07001859 (options, __) = parser.parse_args(arglist, values=new_options)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001860 options.doctest = options.testsuite = False
1861 return options
1862
1863
1864def process_options(arglist=None, parse_argv=False, config_file=None,
1865 parser=None):
1866 """Process options passed either via arglist or via command line args."""
Tor Norbye3a2425a2013-11-04 10:16:08 -08001867 if not parser:
1868 parser = get_parser()
1869 if not parser.has_option('--config'):
1870 if config_file is True:
1871 config_file = DEFAULT_CONFIG
1872 group = parser.add_option_group("Configuration", description=(
1873 "The project options are read from the [%s] section of the "
1874 "tox.ini file or the setup.cfg file located in any parent folder "
1875 "of the path(s) being processed. Allowed options are: %s." %
1876 (parser.prog, ', '.join(parser.config_options))))
1877 group.add_option('--config', metavar='path', default=config_file,
1878 help="user config file location (default: %default)")
Tor Norbye2e5965e2014-07-25 12:24:15 -07001879 # Don't read the command line if the module is used as a library.
1880 if not arglist and not parse_argv:
1881 arglist = []
1882 # If parse_argv is True and arglist is None, arguments are
1883 # parsed from the command line (sys.argv)
1884 (options, args) = parser.parse_args(arglist)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001885 options.reporter = None
1886
1887 if options.ensure_value('testsuite', False):
1888 args.append(options.testsuite)
1889 elif not options.ensure_value('doctest', False):
1890 if parse_argv and not args:
1891 if options.diff or any(os.path.exists(name)
1892 for name in PROJECT_CONFIG):
1893 args = ['.']
1894 else:
1895 parser.error('input not specified')
1896 options = read_config(options, args, arglist, parser)
1897 options.reporter = parse_argv and options.quiet == 1 and FileReport
1898
Tor Norbye2e5965e2014-07-25 12:24:15 -07001899 options.filename = options.filename and options.filename.split(',')
1900 options.exclude = normalize_paths(options.exclude)
1901 options.select = options.select and options.select.split(',')
1902 options.ignore = options.ignore and options.ignore.split(',')
Tor Norbye3a2425a2013-11-04 10:16:08 -08001903
1904 if options.diff:
1905 options.reporter = DiffReport
1906 stdin = stdin_get_value()
1907 options.selected_lines = parse_udiff(stdin, options.filename, args[0])
1908 args = sorted(options.selected_lines)
1909
1910 return options, args
1911
1912
1913def _main():
1914 """Parse options and run checks on Python source."""
Tor Norbye2e5965e2014-07-25 12:24:15 -07001915 import signal
1916
1917 # Handle "Broken pipe" gracefully
1918 try:
1919 signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
1920 except AttributeError:
1921 pass # not supported on Windows
1922
Tor Norbye3a2425a2013-11-04 10:16:08 -08001923 pep8style = StyleGuide(parse_argv=True, config_file=True)
1924 options = pep8style.options
1925 if options.doctest or options.testsuite:
Tor Norbye2e5965e2014-07-25 12:24:15 -07001926 from testsuite.support import run_tests
1927 report = run_tests(pep8style)
Tor Norbye3a2425a2013-11-04 10:16:08 -08001928 else:
1929 report = pep8style.check_files()
1930 if options.statistics:
1931 report.print_statistics()
1932 if options.benchmark:
1933 report.print_benchmark()
1934 if options.testsuite and not options.quiet:
1935 report.print_results()
1936 if report.total_errors:
1937 if options.count:
1938 sys.stderr.write(str(report.total_errors) + '\n')
1939 sys.exit(1)
1940
1941if __name__ == '__main__':
1942 _main()