blob: 2ce7554840ccdc1713b75729b9a3c9f5c5a0e2d9 [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001#!/usr/bin/env python
2# pep8.py - Check Python source code formatting, according to PEP 8
3# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
4# Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
5#
6# Permission is hereby granted, free of charge, to any person
7# obtaining a copy of this software and associated documentation files
8# (the "Software"), to deal in the Software without restriction,
9# including without limitation the rights to use, copy, modify, merge,
10# publish, distribute, sublicense, and/or sell copies of the Software,
11# and to permit persons to whom the Software is furnished to do so,
12# subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be
15# included in all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24# SOFTWARE.
25
26r"""
27Check Python source code formatting, according to PEP 8:
28http://www.python.org/dev/peps/pep-0008/
29
30For usage and a list of options, try this:
31$ python pep8.py -h
32
33This program and its regression test suite live here:
34http://github.com/jcrocholl/pep8
35
36Groups of errors and warnings:
37E errors
38W warnings
39100 indentation
40200 whitespace
41300 blank lines
42400 imports
43500 line length
44600 deprecation
45700 statements
46900 syntax error
47"""
48__version__ = '1.4.5a0'
49
50import os
51import sys
52import re
53import time
54import inspect
55import keyword
56import tokenize
57from optparse import OptionParser
58from fnmatch import fnmatch
59try:
60 from configparser import RawConfigParser
61 from io import TextIOWrapper
62except ImportError:
63 from ConfigParser import RawConfigParser
64
65DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
66DEFAULT_IGNORE = 'E226,E24'
67if sys.platform == 'win32':
68 DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
69else:
70 DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
71 os.path.expanduser('~/.config'), 'pep8')
72PROJECT_CONFIG = ('.pep8', 'tox.ini', 'setup.cfg')
73TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
74MAX_LINE_LENGTH = 79
75REPORT_FORMAT = {
76 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
77 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
78}
79
80PyCF_ONLY_AST = 1024
81SINGLETONS = frozenset(['False', 'None', 'True'])
82KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
83UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
84ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
85WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
86WS_NEEDED_OPERATORS = frozenset([
87 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
88 '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
89WHITESPACE = frozenset(' \t')
90SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE,
91 tokenize.INDENT, tokenize.DEDENT])
92BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
93
94INDENT_REGEX = re.compile(r'([ \t]*)')
95RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
96RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+')
97ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
98DOCSTRING_REGEX = re.compile(r'u?r?["\']')
99EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
100WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
101COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
102COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
103 r'|\s*\(\s*([^)]*[^ )])\s*\))')
104KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
105OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
106LAMBDA_REGEX = re.compile(r'\blambda\b')
107HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
108
109# Work around Python < 2.6 behaviour, which does not generate NL after
110# a comment which is on a line by itself.
111COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
112
113
114##############################################################################
115# Plugins (check functions) for physical lines
116##############################################################################
117
118
119def tabs_or_spaces(physical_line, indent_char):
120 r"""
121 Never mix tabs and spaces.
122
123 The most popular way of indenting Python is with spaces only. The
124 second-most popular way is with tabs only. Code indented with a mixture
125 of tabs and spaces should be converted to using spaces exclusively. When
126 invoking the Python command line interpreter with the -t option, it issues
127 warnings about code that illegally mixes tabs and spaces. When using -tt
128 these warnings become errors. These options are highly recommended!
129
130 Okay: if a == 0:\n a = 1\n b = 1
131 E101: if a == 0:\n a = 1\n\tb = 1
132 """
133 indent = INDENT_REGEX.match(physical_line).group(1)
134 for offset, char in enumerate(indent):
135 if char != indent_char:
136 return offset, "E101 indentation contains mixed spaces and tabs"
137
138
139def tabs_obsolete(physical_line):
140 r"""
141 For new projects, spaces-only are strongly recommended over tabs. Most
142 editors have features that make this easy to do.
143
144 Okay: if True:\n return
145 W191: if True:\n\treturn
146 """
147 indent = INDENT_REGEX.match(physical_line).group(1)
148 if '\t' in indent:
149 return indent.index('\t'), "W191 indentation contains tabs"
150
151
152def trailing_whitespace(physical_line):
153 r"""
154 JCR: Trailing whitespace is superfluous.
155 FBM: Except when it occurs as part of a blank line (i.e. the line is
156 nothing but whitespace). According to Python docs[1] a line with only
157 whitespace is considered a blank line, and is to be ignored. However,
158 matching a blank line to its indentation level avoids mistakenly
159 terminating a multi-line statement (e.g. class declaration) when
160 pasting code into the standard Python interpreter.
161
162 [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
163
164 The warning returned varies on whether the line itself is blank, for easier
165 filtering for those who want to indent their blank lines.
166
167 Okay: spam(1)\n#
168 W291: spam(1) \n#
169 W293: class Foo(object):\n \n bang = 12
170 """
171 physical_line = physical_line.rstrip('\n') # chr(10), newline
172 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
173 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
174 stripped = physical_line.rstrip(' \t\v')
175 if physical_line != stripped:
176 if stripped:
177 return len(stripped), "W291 trailing whitespace"
178 else:
179 return 0, "W293 blank line contains whitespace"
180
181
182def trailing_blank_lines(physical_line, lines, line_number):
183 r"""
184 JCR: Trailing blank lines are superfluous.
185
186 Okay: spam(1)
187 W391: spam(1)\n
188 """
189 if not physical_line.rstrip() and line_number == len(lines):
190 return 0, "W391 blank line at end of file"
191
192
193def missing_newline(physical_line):
194 """
195 JCR: The last line should have a newline.
196
197 Reports warning W292.
198 """
199 if physical_line.rstrip() == physical_line:
200 return len(physical_line), "W292 no newline at end of file"
201
202
203def maximum_line_length(physical_line, max_line_length):
204 """
205 Limit all lines to a maximum of 79 characters.
206
207 There are still many devices around that are limited to 80 character
208 lines; plus, limiting windows to 80 characters makes it possible to have
209 several windows side-by-side. The default wrapping on such devices looks
210 ugly. Therefore, please limit all lines to a maximum of 79 characters.
211 For flowing long blocks of text (docstrings or comments), limiting the
212 length to 72 characters is recommended.
213
214 Reports error E501.
215 """
216 line = physical_line.rstrip()
217 length = len(line)
218 if length > max_line_length:
219 if noqa(line):
220 return
221 if hasattr(line, 'decode'): # Python 2
222 # The line could contain multi-byte characters
223 try:
224 length = len(line.decode('utf-8'))
225 except UnicodeError:
226 pass
227 if length > max_line_length:
228 return (max_line_length, "E501 line too long "
229 "(%d > %d characters)" % (length, max_line_length))
230
231
232##############################################################################
233# Plugins (check functions) for logical lines
234##############################################################################
235
236
237def blank_lines(logical_line, blank_lines, indent_level, line_number,
238 previous_logical, previous_indent_level):
239 r"""
240 Separate top-level function and class definitions with two blank lines.
241
242 Method definitions inside a class are separated by a single blank line.
243
244 Extra blank lines may be used (sparingly) to separate groups of related
245 functions. Blank lines may be omitted between a bunch of related
246 one-liners (e.g. a set of dummy implementations).
247
248 Use blank lines in functions, sparingly, to indicate logical sections.
249
250 Okay: def a():\n pass\n\n\ndef b():\n pass
251 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
252
253 E301: class Foo:\n b = 0\n def bar():\n pass
254 E302: def a():\n pass\n\ndef b(n):\n pass
255 E303: def a():\n pass\n\n\n\ndef b(n):\n pass
256 E303: def a():\n\n\n\n pass
257 E304: @decorator\n\ndef a():\n pass
258 """
259 if line_number < 3 and not previous_logical:
260 return # Don't expect blank lines before the first line
261 if previous_logical.startswith('@'):
262 if blank_lines:
263 yield 0, "E304 blank lines found after function decorator"
264 elif blank_lines > 2 or (indent_level and blank_lines == 2):
265 yield 0, "E303 too many blank lines (%d)" % blank_lines
266 elif logical_line.startswith(('def ', 'class ', '@')):
267 if indent_level:
268 if not (blank_lines or previous_indent_level < indent_level or
269 DOCSTRING_REGEX.match(previous_logical)):
270 yield 0, "E301 expected 1 blank line, found 0"
271 elif blank_lines != 2:
272 yield 0, "E302 expected 2 blank lines, found %d" % blank_lines
273
274
275def extraneous_whitespace(logical_line):
276 """
277 Avoid extraneous whitespace in the following situations:
278
279 - Immediately inside parentheses, brackets or braces.
280
281 - Immediately before a comma, semicolon, or colon.
282
283 Okay: spam(ham[1], {eggs: 2})
284 E201: spam( ham[1], {eggs: 2})
285 E201: spam(ham[ 1], {eggs: 2})
286 E201: spam(ham[1], { eggs: 2})
287 E202: spam(ham[1], {eggs: 2} )
288 E202: spam(ham[1 ], {eggs: 2})
289 E202: spam(ham[1], {eggs: 2 })
290
291 E203: if x == 4: print x, y; x, y = y , x
292 E203: if x == 4: print x, y ; x, y = y, x
293 E203: if x == 4 : print x, y; x, y = y, x
294 """
295 line = logical_line
296 for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
297 text = match.group()
298 char = text.strip()
299 found = match.start()
300 if text == char + ' ':
301 # assert char in '([{'
302 yield found + 1, "E201 whitespace after '%s'" % char
303 elif line[found - 1] != ',':
304 code = ('E202' if char in '}])' else 'E203') # if char in ',;:'
305 yield found, "%s whitespace before '%s'" % (code, char)
306
307
308def whitespace_around_keywords(logical_line):
309 r"""
310 Avoid extraneous whitespace around keywords.
311
312 Okay: True and False
313 E271: True and False
314 E272: True and False
315 E273: True and\tFalse
316 E274: True\tand False
317 """
318 for match in KEYWORD_REGEX.finditer(logical_line):
319 before, after = match.groups()
320
321 if '\t' in before:
322 yield match.start(1), "E274 tab before keyword"
323 elif len(before) > 1:
324 yield match.start(1), "E272 multiple spaces before keyword"
325
326 if '\t' in after:
327 yield match.start(2), "E273 tab after keyword"
328 elif len(after) > 1:
329 yield match.start(2), "E271 multiple spaces after keyword"
330
331
332def missing_whitespace(logical_line):
333 """
334 JCR: Each comma, semicolon or colon should be followed by whitespace.
335
336 Okay: [a, b]
337 Okay: (3,)
338 Okay: a[1:4]
339 Okay: a[:4]
340 Okay: a[1:]
341 Okay: a[1:4:2]
342 E231: ['a','b']
343 E231: foo(bar,baz)
344 E231: [{'a':'b'}]
345 """
346 line = logical_line
347 for index in range(len(line) - 1):
348 char = line[index]
349 if char in ',;:' and line[index + 1] not in WHITESPACE:
350 before = line[:index]
351 if char == ':' and before.count('[') > before.count(']') and \
352 before.rfind('{') < before.rfind('['):
353 continue # Slice syntax, no space required
354 if char == ',' and line[index + 1] == ')':
355 continue # Allow tuple with only one element: (3,)
356 yield index, "E231 missing whitespace after '%s'" % char
357
358
359def indentation(logical_line, previous_logical, indent_char,
360 indent_level, previous_indent_level):
361 r"""
362 Use 4 spaces per indentation level.
363
364 For really old code that you don't want to mess up, you can continue to
365 use 8-space tabs.
366
367 Okay: a = 1
368 Okay: if a == 0:\n a = 1
369 E111: a = 1
370
371 Okay: for item in items:\n pass
372 E112: for item in items:\npass
373
374 Okay: a = 1\nb = 2
375 E113: a = 1\n b = 2
376 """
377 if indent_char == ' ' and indent_level % 4:
378 yield 0, "E111 indentation is not a multiple of four"
379 indent_expect = previous_logical.endswith(':')
380 if indent_expect and indent_level <= previous_indent_level:
381 yield 0, "E112 expected an indented block"
382 if indent_level > previous_indent_level and not indent_expect:
383 yield 0, "E113 unexpected indentation"
384
385
386def continuation_line_indentation(logical_line, tokens, indent_level, verbose):
387 r"""
388 Continuation lines should align wrapped elements either vertically using
389 Python's implicit line joining inside parentheses, brackets and braces, or
390 using a hanging indent.
391
392 When using a hanging indent the following considerations should be applied:
393
394 - there should be no arguments on the first line, and
395
396 - further indentation should be used to clearly distinguish itself as a
397 continuation line.
398
399 Okay: a = (\n)
400 E123: a = (\n )
401
402 Okay: a = (\n 42)
403 E121: a = (\n 42)
404 E122: a = (\n42)
405 E123: a = (\n 42\n )
406 E124: a = (24,\n 42\n)
407 E125: if (a or\n b):\n pass
408 E126: a = (\n 42)
409 E127: a = (24,\n 42)
410 E128: a = (24,\n 42)
411 """
412 first_row = tokens[0][2][0]
413 nrows = 1 + tokens[-1][2][0] - first_row
414 if nrows == 1 or noqa(tokens[0][4]):
415 return
416
417 # indent_next tells us whether the next block is indented; assuming
418 # that it is indented by 4 spaces, then we should not allow 4-space
419 # indents on the final continuation line; in turn, some other
420 # indents are allowed to have an extra 4 spaces.
421 indent_next = logical_line.endswith(':')
422
423 row = depth = 0
424 # remember how many brackets were opened on each line
425 parens = [0] * nrows
426 # relative indents of physical lines
427 rel_indent = [0] * nrows
428 # visual indents
429 indent_chances = {}
430 last_indent = tokens[0][2]
431 indent = [last_indent[1]]
432 if verbose >= 3:
433 print(">>> " + tokens[0][4].rstrip())
434
435 for token_type, text, start, end, line in tokens:
436
437 newline = row < start[0] - first_row
438 if newline:
439 row = start[0] - first_row
440 newline = (not last_token_multiline and
441 token_type not in (tokenize.NL, tokenize.NEWLINE))
442
443 if newline:
444 # this is the beginning of a continuation line.
445 last_indent = start
446 if verbose >= 3:
447 print("... " + line.rstrip())
448
449 # record the initial indent.
450 rel_indent[row] = expand_indent(line) - indent_level
451
452 if depth:
453 # a bracket expression in a continuation line.
454 # find the line that it was opened on
455 for open_row in range(row - 1, -1, -1):
456 if parens[open_row]:
457 break
458 else:
459 # an unbracketed continuation line (ie, backslash)
460 open_row = 0
461 hang = rel_indent[row] - rel_indent[open_row]
462 visual_indent = indent_chances.get(start[1])
463
464 if token_type == tokenize.OP and text in ']})':
465 # this line starts with a closing bracket
466 if indent[depth]:
467 if start[1] != indent[depth]:
468 yield (start, "E124 closing bracket does not match "
469 "visual indentation")
470 elif hang:
471 yield (start, "E123 closing bracket does not match "
472 "indentation of opening bracket's line")
473 elif visual_indent is True:
474 # visual indent is verified
475 if not indent[depth]:
476 indent[depth] = start[1]
477 elif visual_indent in (text, str):
478 # ignore token lined up with matching one from a previous line
479 pass
480 elif indent[depth] and start[1] < indent[depth]:
481 # visual indent is broken
482 yield (start, "E128 continuation line "
483 "under-indented for visual indent")
484 elif hang == 4 or (indent_next and rel_indent[row] == 8):
485 # hanging indent is verified
486 pass
487 else:
488 # indent is broken
489 if hang <= 0:
490 error = "E122", "missing indentation or outdented"
491 elif indent[depth]:
492 error = "E127", "over-indented for visual indent"
493 elif hang % 4:
494 error = "E121", "indentation is not a multiple of four"
495 else:
496 error = "E126", "over-indented for hanging indent"
497 yield start, "%s continuation line %s" % error
498
499 # look for visual indenting
500 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
501 and not indent[depth]):
502 indent[depth] = start[1]
503 indent_chances[start[1]] = True
504 if verbose >= 4:
505 print("bracket depth %s indent to %s" % (depth, start[1]))
506 # deal with implicit string concatenation
507 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
508 text in ('u', 'ur', 'b', 'br')):
509 indent_chances[start[1]] = str
510 # special case for the "if" statement because len("if (") == 4
511 elif not indent_chances and not row and not depth and text == 'if':
512 indent_chances[end[1] + 1] = True
513
514 # keep track of bracket depth
515 if token_type == tokenize.OP:
516 if text in '([{':
517 depth += 1
518 indent.append(0)
519 parens[row] += 1
520 if verbose >= 4:
521 print("bracket depth %s seen, col %s, visual min = %s" %
522 (depth, start[1], indent[depth]))
523 elif text in ')]}' and depth > 0:
524 # parent indents should not be more than this one
525 prev_indent = indent.pop() or last_indent[1]
526 for d in range(depth):
527 if indent[d] > prev_indent:
528 indent[d] = 0
529 for ind in list(indent_chances):
530 if ind >= prev_indent:
531 del indent_chances[ind]
532 depth -= 1
533 if depth:
534 indent_chances[indent[depth]] = True
535 for idx in range(row, -1, -1):
536 if parens[idx]:
537 parens[idx] -= 1
538 break
539 assert len(indent) == depth + 1
540 if start[1] not in indent_chances:
541 # allow to line up tokens
542 indent_chances[start[1]] = text
543
544 last_token_multiline = (start[0] != end[0])
545
546 if indent_next and rel_indent[-1] == 4:
547 yield (last_indent, "E125 continuation line does not distinguish "
548 "itself from next logical line")
549
550
551def whitespace_before_parameters(logical_line, tokens):
552 """
553 Avoid extraneous whitespace in the following situations:
554
555 - Immediately before the open parenthesis that starts the argument
556 list of a function call.
557
558 - Immediately before the open parenthesis that starts an indexing or
559 slicing.
560
561 Okay: spam(1)
562 E211: spam (1)
563
564 Okay: dict['key'] = list[index]
565 E211: dict ['key'] = list[index]
566 E211: dict['key'] = list [index]
567 """
568 prev_type = tokens[0][0]
569 prev_text = tokens[0][1]
570 prev_end = tokens[0][3]
571 for index in range(1, len(tokens)):
572 token_type, text, start, end, line = tokens[index]
573 if (token_type == tokenize.OP and
574 text in '([' and
575 start != prev_end and
576 (prev_type == tokenize.NAME or prev_text in '}])') and
577 # Syntax "class A (B):" is allowed, but avoid it
578 (index < 2 or tokens[index - 2][1] != 'class') and
579 # Allow "return (a.foo for a in range(5))"
580 not keyword.iskeyword(prev_text)):
581 yield prev_end, "E211 whitespace before '%s'" % text
582 prev_type = token_type
583 prev_text = text
584 prev_end = end
585
586
587def whitespace_around_operator(logical_line):
588 r"""
589 Avoid extraneous whitespace in the following situations:
590
591 - More than one space around an assignment (or other) operator to
592 align it with another.
593
594 Okay: a = 12 + 3
595 E221: a = 4 + 5
596 E222: a = 4 + 5
597 E223: a = 4\t+ 5
598 E224: a = 4 +\t5
599 """
600 for match in OPERATOR_REGEX.finditer(logical_line):
601 before, after = match.groups()
602
603 if '\t' in before:
604 yield match.start(1), "E223 tab before operator"
605 elif len(before) > 1:
606 yield match.start(1), "E221 multiple spaces before operator"
607
608 if '\t' in after:
609 yield match.start(2), "E224 tab after operator"
610 elif len(after) > 1:
611 yield match.start(2), "E222 multiple spaces after operator"
612
613
614def missing_whitespace_around_operator(logical_line, tokens):
615 r"""
616 - Always surround these binary operators with a single space on
617 either side: assignment (=), augmented assignment (+=, -= etc.),
618 comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
619 Booleans (and, or, not).
620
621 - Use spaces around arithmetic operators.
622
623 Okay: i = i + 1
624 Okay: submitted += 1
625 Okay: x = x * 2 - 1
626 Okay: hypot2 = x * x + y * y
627 Okay: c = (a + b) * (a - b)
628 Okay: foo(bar, key='word', *args, **kwargs)
629 Okay: alpha[:-i]
630
631 E225: i=i+1
632 E225: submitted +=1
633 E225: x = x /2 - 1
634 E225: z = x **y
635 E226: c = (a+b) * (a-b)
636 E226: hypot2 = x*x + y*y
637 E227: c = a|b
638 E228: msg = fmt%(errno, errmsg)
639 """
640 parens = 0
641 need_space = False
642 prev_type = tokenize.OP
643 prev_text = prev_end = None
644 for token_type, text, start, end, line in tokens:
645 if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
646 # ERRORTOKEN is triggered by backticks in Python 3
647 continue
648 if text in ('(', 'lambda'):
649 parens += 1
650 elif text == ')':
651 parens -= 1
652 if need_space:
653 if start != prev_end:
654 # Found a (probably) needed space
655 if need_space is not True and not need_space[1]:
656 yield (need_space[0],
657 "E225 missing whitespace around operator")
658 need_space = False
659 elif text == '>' and prev_text in ('<', '-'):
660 # Tolerate the "<>" operator, even if running Python 3
661 # Deal with Python 3's annotated return value "->"
662 pass
663 else:
664 if need_space is True or need_space[1]:
665 # A needed trailing space was not found
666 yield prev_end, "E225 missing whitespace around operator"
667 else:
668 code, optype = 'E226', 'arithmetic'
669 if prev_text == '%':
670 code, optype = 'E228', 'modulo'
671 elif prev_text not in ARITHMETIC_OP:
672 code, optype = 'E227', 'bitwise or shift'
673 yield (need_space[0], "%s missing whitespace "
674 "around %s operator" % (code, optype))
675 need_space = False
676 elif token_type == tokenize.OP and prev_end is not None:
677 if text == '=' and parens:
678 # Allow keyword args or defaults: foo(bar=None).
679 pass
680 elif text in WS_NEEDED_OPERATORS:
681 need_space = True
682 elif text in UNARY_OPERATORS:
683 # Check if the operator is being used as a binary operator
684 # Allow unary operators: -123, -x, +1.
685 # Allow argument unpacking: foo(*args, **kwargs).
686 if prev_type == tokenize.OP:
687 binary_usage = (prev_text in '}])')
688 elif prev_type == tokenize.NAME:
689 binary_usage = (prev_text not in KEYWORDS)
690 else:
691 binary_usage = (prev_type not in SKIP_TOKENS)
692
693 if binary_usage:
694 if text in WS_OPTIONAL_OPERATORS:
695 need_space = None
696 else:
697 need_space = True
698 elif text in WS_OPTIONAL_OPERATORS:
699 need_space = None
700
701 if need_space is None:
702 # Surrounding space is optional, but ensure that
703 # trailing space matches opening space
704 need_space = (prev_end, start != prev_end)
705 elif need_space and start == prev_end:
706 # A needed opening space was not found
707 yield prev_end, "E225 missing whitespace around operator"
708 need_space = False
709 prev_type = token_type
710 prev_text = text
711 prev_end = end
712
713
714def whitespace_around_comma(logical_line):
715 r"""
716 Avoid extraneous whitespace in the following situations:
717
718 - More than one space around an assignment (or other) operator to
719 align it with another.
720
721 Note: these checks are disabled by default
722
723 Okay: a = (1, 2)
724 E241: a = (1, 2)
725 E242: a = (1,\t2)
726 """
727 line = logical_line
728 for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
729 found = m.start() + 1
730 if '\t' in m.group():
731 yield found, "E242 tab after '%s'" % m.group()[0]
732 else:
733 yield found, "E241 multiple spaces after '%s'" % m.group()[0]
734
735
736def whitespace_around_named_parameter_equals(logical_line, tokens):
737 """
738 Don't use spaces around the '=' sign when used to indicate a
739 keyword argument or a default parameter value.
740
741 Okay: def complex(real, imag=0.0):
742 Okay: return magic(r=real, i=imag)
743 Okay: boolean(a == b)
744 Okay: boolean(a != b)
745 Okay: boolean(a <= b)
746 Okay: boolean(a >= b)
747
748 E251: def complex(real, imag = 0.0):
749 E251: return magic(r = real, i = imag)
750 """
751 parens = 0
752 no_space = False
753 prev_end = None
754 message = "E251 unexpected spaces around keyword / parameter equals"
755 for token_type, text, start, end, line in tokens:
756 if no_space:
757 no_space = False
758 if start != prev_end:
759 yield (prev_end, message)
760 elif token_type == tokenize.OP:
761 if text == '(':
762 parens += 1
763 elif text == ')':
764 parens -= 1
765 elif parens and text == '=':
766 no_space = True
767 if start != prev_end:
768 yield (prev_end, message)
769 prev_end = end
770
771
772def whitespace_before_inline_comment(logical_line, tokens):
773 """
774 Separate inline comments by at least two spaces.
775
776 An inline comment is a comment on the same line as a statement. Inline
777 comments should be separated by at least two spaces from the statement.
778 They should start with a # and a single space.
779
780 Okay: x = x + 1 # Increment x
781 Okay: x = x + 1 # Increment x
782 E261: x = x + 1 # Increment x
783 E262: x = x + 1 #Increment x
784 E262: x = x + 1 # Increment x
785 """
786 prev_end = (0, 0)
787 for token_type, text, start, end, line in tokens:
788 if token_type == tokenize.COMMENT:
789 if not line[:start[1]].strip():
790 continue
791 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
792 yield (prev_end,
793 "E261 at least two spaces before inline comment")
794 symbol, sp, comment = text.partition(' ')
795 if symbol not in ('#', '#:') or comment[:1].isspace():
796 yield start, "E262 inline comment should start with '# '"
797 elif token_type != tokenize.NL:
798 prev_end = end
799
800
801def imports_on_separate_lines(logical_line):
802 r"""
803 Imports should usually be on separate lines.
804
805 Okay: import os\nimport sys
806 E401: import sys, os
807
808 Okay: from subprocess import Popen, PIPE
809 Okay: from myclas import MyClass
810 Okay: from foo.bar.yourclass import YourClass
811 Okay: import myclass
812 Okay: import foo.bar.yourclass
813 """
814 line = logical_line
815 if line.startswith('import '):
816 found = line.find(',')
817 if -1 < found and ';' not in line[:found]:
818 yield found, "E401 multiple imports on one line"
819
820
821def compound_statements(logical_line):
822 r"""
823 Compound statements (multiple statements on the same line) are
824 generally discouraged.
825
826 While sometimes it's okay to put an if/for/while with a small body
827 on the same line, never do this for multi-clause statements. Also
828 avoid folding such long lines!
829
830 Okay: if foo == 'blah':\n do_blah_thing()
831 Okay: do_one()
832 Okay: do_two()
833 Okay: do_three()
834
835 E701: if foo == 'blah': do_blah_thing()
836 E701: for x in lst: total += x
837 E701: while t < 10: t = delay()
838 E701: if foo == 'blah': do_blah_thing()
839 E701: else: do_non_blah_thing()
840 E701: try: something()
841 E701: finally: cleanup()
842 E701: if foo == 'blah': one(); two(); three()
843
844 E702: do_one(); do_two(); do_three()
845 E703: do_four(); # useless semicolon
846 """
847 line = logical_line
848 last_char = len(line) - 1
849 found = line.find(':')
850 if -1 < found < last_char:
851 before = line[:found]
852 if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
853 before.count('[') <= before.count(']') and # [1:2] (slice)
854 before.count('(') <= before.count(')') and # (Python 3 annotation)
855 not LAMBDA_REGEX.search(before)): # lambda x: x
856 yield found, "E701 multiple statements on one line (colon)"
857 found = line.find(';')
858 if -1 < found:
859 if found < last_char:
860 yield found, "E702 multiple statements on one line (semicolon)"
861 else:
862 yield found, "E703 statement ends with a semicolon"
863
864
865def explicit_line_join(logical_line, tokens):
866 r"""
867 Avoid explicit line join between brackets.
868
869 The preferred way of wrapping long lines is by using Python's implied line
870 continuation inside parentheses, brackets and braces. Long lines can be
871 broken over multiple lines by wrapping expressions in parentheses. These
872 should be used in preference to using a backslash for line continuation.
873
874 E502: aaa = [123, \\n 123]
875 E502: aaa = ("bbb " \\n "ccc")
876
877 Okay: aaa = [123,\n 123]
878 Okay: aaa = ("bbb "\n "ccc")
879 Okay: aaa = "bbb " \\n "ccc"
880 """
881 prev_start = prev_end = parens = 0
882 for token_type, text, start, end, line in tokens:
883 if start[0] != prev_start and parens and backslash:
884 yield backslash, "E502 the backslash is redundant between brackets"
885 if end[0] != prev_end:
886 if line.rstrip('\r\n').endswith('\\'):
887 backslash = (end[0], len(line.splitlines()[-1]) - 1)
888 else:
889 backslash = None
890 prev_start = prev_end = end[0]
891 else:
892 prev_start = start[0]
893 if token_type == tokenize.OP:
894 if text in '([{':
895 parens += 1
896 elif text in ')]}':
897 parens -= 1
898
899
900def comparison_to_singleton(logical_line):
901 """
902 Comparisons to singletons like None should always be done
903 with "is" or "is not", never the equality operators.
904
905 Okay: if arg is not None:
906 E711: if arg != None:
907 E712: if arg == True:
908
909 Also, beware of writing if x when you really mean if x is not None --
910 e.g. when testing whether a variable or argument that defaults to None was
911 set to some other value. The other value might have a type (such as a
912 container) that could be false in a boolean context!
913 """
914 match = COMPARE_SINGLETON_REGEX.search(logical_line)
915 if match:
916 same = (match.group(1) == '==')
917 singleton = match.group(2)
918 msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
919 if singleton in ('None',):
920 code = 'E711'
921 else:
922 code = 'E712'
923 nonzero = ((singleton == 'True' and same) or
924 (singleton == 'False' and not same))
925 msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
926 yield match.start(1), ("%s comparison to %s should be %s" %
927 (code, singleton, msg))
928
929
930def comparison_type(logical_line):
931 """
932 Object type comparisons should always use isinstance() instead of
933 comparing types directly.
934
935 Okay: if isinstance(obj, int):
936 E721: if type(obj) is type(1):
937
938 When checking if an object is a string, keep in mind that it might be a
939 unicode string too! In Python 2.3, str and unicode have a common base
940 class, basestring, so you can do:
941
942 Okay: if isinstance(obj, basestring):
943 Okay: if type(a1) is type(b1):
944 """
945 match = COMPARE_TYPE_REGEX.search(logical_line)
946 if match:
947 inst = match.group(1)
948 if inst and isidentifier(inst) and inst not in SINGLETONS:
949 return # Allow comparison for types which are not obvious
950 yield match.start(0), "E721 do not compare types, use 'isinstance()'"
951
952
953def python_3000_has_key(logical_line):
954 r"""
955 The {}.has_key() method is removed in the Python 3.
956 Use the 'in' operation instead.
957
958 Okay: if "alph" in d:\n print d["alph"]
959 W601: assert d.has_key('alph')
960 """
961 pos = logical_line.find('.has_key(')
962 if pos > -1:
963 yield pos, "W601 .has_key() is deprecated, use 'in'"
964
965
966def python_3000_raise_comma(logical_line):
967 """
968 When raising an exception, use "raise ValueError('message')"
969 instead of the older form "raise ValueError, 'message'".
970
971 The paren-using form is preferred because when the exception arguments
972 are long or include string formatting, you don't need to use line
973 continuation characters thanks to the containing parentheses. The older
974 form is removed in Python 3.
975
976 Okay: raise DummyError("Message")
977 W602: raise DummyError, "Message"
978 """
979 match = RAISE_COMMA_REGEX.match(logical_line)
980 if match and not RERAISE_COMMA_REGEX.match(logical_line):
981 yield match.start(1), "W602 deprecated form of raising exception"
982
983
984def python_3000_not_equal(logical_line):
985 """
986 != can also be written <>, but this is an obsolete usage kept for
987 backwards compatibility only. New code should always use !=.
988 The older syntax is removed in Python 3.
989
990 Okay: if a != 'no':
991 W603: if a <> 'no':
992 """
993 pos = logical_line.find('<>')
994 if pos > -1:
995 yield pos, "W603 '<>' is deprecated, use '!='"
996
997
998def python_3000_backticks(logical_line):
999 """
1000 Backticks are removed in Python 3.
1001 Use repr() instead.
1002
1003 Okay: val = repr(1 + 2)
1004 W604: val = `1 + 2`
1005 """
1006 pos = logical_line.find('`')
1007 if pos > -1:
1008 yield pos, "W604 backticks are deprecated, use 'repr()'"
1009
1010
1011##############################################################################
1012# Helper functions
1013##############################################################################
1014
1015
1016if '' == ''.encode():
1017 # Python 2: implicit encoding.
1018 def readlines(filename):
1019 f = open(filename)
1020 try:
1021 return f.readlines()
1022 finally:
1023 f.close()
1024
1025 isidentifier = re.compile(r'[a-zA-Z_]\w*').match
1026 stdin_get_value = sys.stdin.read
1027else:
1028 # Python 3
1029 def readlines(filename):
1030 f = open(filename, 'rb')
1031 try:
1032 coding, lines = tokenize.detect_encoding(f.readline)
1033 f = TextIOWrapper(f, coding, line_buffering=True)
1034 return [l.decode(coding) for l in lines] + f.readlines()
1035 except (LookupError, SyntaxError, UnicodeError):
1036 f.close()
1037 # Fall back if files are improperly declared
1038 f = open(filename, encoding='latin-1')
1039 return f.readlines()
1040 finally:
1041 f.close()
1042
1043 isidentifier = str.isidentifier
1044
1045 def stdin_get_value():
1046 return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1047readlines.__doc__ = " Read the source code."
1048noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
1049
1050
1051def expand_indent(line):
1052 r"""
1053 Return the amount of indentation.
1054 Tabs are expanded to the next multiple of 8.
1055
1056 >>> expand_indent(' ')
1057 4
1058 >>> expand_indent('\t')
1059 8
1060 >>> expand_indent(' \t')
1061 8
1062 >>> expand_indent(' \t')
1063 8
1064 >>> expand_indent(' \t')
1065 16
1066 """
1067 if '\t' not in line:
1068 return len(line) - len(line.lstrip())
1069 result = 0
1070 for char in line:
1071 if char == '\t':
1072 result = result // 8 * 8 + 8
1073 elif char == ' ':
1074 result += 1
1075 else:
1076 break
1077 return result
1078
1079
1080def mute_string(text):
1081 """
1082 Replace contents with 'xxx' to prevent syntax matching.
1083
1084 >>> mute_string('"abc"')
1085 '"xxx"'
1086 >>> mute_string("'''abc'''")
1087 "'''xxx'''"
1088 >>> mute_string("r'abc'")
1089 "r'xxx'"
1090 """
1091 # String modifiers (e.g. u or r)
1092 start = text.index(text[-1]) + 1
1093 end = len(text) - 1
1094 # Triple quotes
1095 if text[-3:] in ('"""', "'''"):
1096 start += 2
1097 end -= 2
1098 return text[:start] + 'x' * (end - start) + text[end:]
1099
1100
1101def parse_udiff(diff, patterns=None, parent='.'):
1102 """Return a dictionary of matching lines."""
1103 # For each file of the diff, the entry key is the filename,
1104 # and the value is a set of row numbers to consider.
1105 rv = {}
1106 path = nrows = None
1107 for line in diff.splitlines():
1108 if nrows:
1109 if line[:1] != '-':
1110 nrows -= 1
1111 continue
1112 if line[:3] == '@@ ':
1113 hunk_match = HUNK_REGEX.match(line)
1114 row, nrows = [int(g or '1') for g in hunk_match.groups()]
1115 rv[path].update(range(row, row + nrows))
1116 elif line[:3] == '+++':
1117 path = line[4:].split('\t', 1)[0]
1118 if path[:2] == 'b/':
1119 path = path[2:]
1120 rv[path] = set()
1121 return dict([(os.path.join(parent, path), rows)
1122 for (path, rows) in rv.items()
1123 if rows and filename_match(path, patterns)])
1124
1125
1126def filename_match(filename, patterns, default=True):
1127 """
1128 Check if patterns contains a pattern that matches filename.
1129 If patterns is unspecified, this always returns True.
1130 """
1131 if not patterns:
1132 return default
1133 return any(fnmatch(filename, pattern) for pattern in patterns)
1134
1135
1136##############################################################################
1137# Framework to run all checks
1138##############################################################################
1139
1140
1141_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1142
1143
1144def register_check(check, codes=None):
1145 """
1146 Register a new check object.
1147 """
1148 def _add_check(check, kind, codes, args):
1149 if check in _checks[kind]:
1150 _checks[kind][check][0].extend(codes or [])
1151 else:
1152 _checks[kind][check] = (codes or [''], args)
1153 if inspect.isfunction(check):
1154 args = inspect.getargspec(check)[0]
1155 if args and args[0] in ('physical_line', 'logical_line'):
1156 if codes is None:
1157 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
1158 _add_check(check, args[0], codes, args)
1159 elif inspect.isclass(check):
1160 if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
1161 _add_check(check, 'tree', codes, None)
1162
1163
1164def init_checks_registry():
1165 """
1166 Register all globally visible functions where the first argument name
1167 is 'physical_line' or 'logical_line'.
1168 """
1169 mod = inspect.getmodule(register_check)
1170 for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1171 register_check(function)
1172init_checks_registry()
1173
1174
1175class Checker(object):
1176 """
1177 Load a Python source file, tokenize it, check coding style.
1178 """
1179
1180 def __init__(self, filename=None, lines=None,
1181 options=None, report=None, **kwargs):
1182 if options is None:
1183 options = StyleGuide(kwargs).options
1184 else:
1185 assert not kwargs
1186 self._io_error = None
1187 self._physical_checks = options.physical_checks
1188 self._logical_checks = options.logical_checks
1189 self._ast_checks = options.ast_checks
1190 self.max_line_length = options.max_line_length
1191 self.verbose = options.verbose
1192 self.filename = filename
1193 if filename is None:
1194 self.filename = 'stdin'
1195 self.lines = lines or []
1196 elif filename == '-':
1197 self.filename = 'stdin'
1198 self.lines = stdin_get_value().splitlines(True)
1199 elif lines is None:
1200 try:
1201 self.lines = readlines(filename)
1202 except IOError:
1203 exc_type, exc = sys.exc_info()[:2]
1204 self._io_error = '%s: %s' % (exc_type.__name__, exc)
1205 self.lines = []
1206 else:
1207 self.lines = lines
1208 self.report = report or options.report
1209 self.report_error = self.report.error
1210
1211 def report_invalid_syntax(self):
1212 exc_type, exc = sys.exc_info()[:2]
1213 offset = exc.args[1]
1214 if len(offset) > 2:
1215 offset = offset[1:3]
1216 self.report_error(offset[0], offset[1] or 0,
1217 'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
1218 self.report_invalid_syntax)
1219 report_invalid_syntax.__doc__ = " Check if the syntax is valid."
1220
1221 def readline(self):
1222 """
1223 Get the next line from the input buffer.
1224 """
1225 self.line_number += 1
1226 if self.line_number > len(self.lines):
1227 return ''
1228 return self.lines[self.line_number - 1]
1229
1230 def readline_check_physical(self):
1231 """
1232 Check and return the next physical line. This method can be
1233 used to feed tokenize.generate_tokens.
1234 """
1235 line = self.readline()
1236 if line:
1237 self.check_physical(line)
1238 return line
1239
1240 def run_check(self, check, argument_names):
1241 """
1242 Run a check plugin.
1243 """
1244 arguments = []
1245 for name in argument_names:
1246 arguments.append(getattr(self, name))
1247 return check(*arguments)
1248
1249 def check_physical(self, line):
1250 """
1251 Run all physical checks on a raw input line.
1252 """
1253 self.physical_line = line
1254 if self.indent_char is None and line[:1] in WHITESPACE:
1255 self.indent_char = line[0]
1256 for name, check, argument_names in self._physical_checks:
1257 result = self.run_check(check, argument_names)
1258 if result is not None:
1259 offset, text = result
1260 self.report_error(self.line_number, offset, text, check)
1261
1262 def build_tokens_line(self):
1263 """
1264 Build a logical line from tokens.
1265 """
1266 self.mapping = []
1267 logical = []
1268 length = 0
1269 previous = None
1270 for token in self.tokens:
1271 token_type, text = token[0:2]
1272 if token_type in SKIP_TOKENS:
1273 continue
1274 if token_type == tokenize.STRING:
1275 text = mute_string(text)
1276 if previous:
1277 end_row, end = previous[3]
1278 start_row, start = token[2]
1279 if end_row != start_row: # different row
1280 prev_text = self.lines[end_row - 1][end - 1]
1281 if prev_text == ',' or (prev_text not in '{[('
1282 and text not in '}])'):
1283 logical.append(' ')
1284 length += 1
1285 elif end != start: # different column
1286 fill = self.lines[end_row - 1][end:start]
1287 logical.append(fill)
1288 length += len(fill)
1289 self.mapping.append((length, token))
1290 logical.append(text)
1291 length += len(text)
1292 previous = token
1293 self.logical_line = ''.join(logical)
1294 # With Python 2, if the line ends with '\r\r\n' the assertion fails
1295 # assert self.logical_line.strip() == self.logical_line
1296
1297 def check_logical(self):
1298 """
1299 Build a line from tokens and run all logical checks on it.
1300 """
1301 self.build_tokens_line()
1302 self.report.increment_logical_line()
1303 first_line = self.lines[self.mapping[0][1][2][0] - 1]
1304 indent = first_line[:self.mapping[0][1][2][1]]
1305 self.previous_indent_level = self.indent_level
1306 self.indent_level = expand_indent(indent)
1307 if self.verbose >= 2:
1308 print(self.logical_line[:80].rstrip())
1309 for name, check, argument_names in self._logical_checks:
1310 if self.verbose >= 4:
1311 print(' ' + name)
1312 for result in self.run_check(check, argument_names):
1313 offset, text = result
1314 if isinstance(offset, tuple):
1315 orig_number, orig_offset = offset
1316 else:
1317 for token_offset, token in self.mapping:
1318 if offset >= token_offset:
1319 orig_number = token[2][0]
1320 orig_offset = (token[2][1] + offset - token_offset)
1321 self.report_error(orig_number, orig_offset, text, check)
1322 self.previous_logical = self.logical_line
1323
1324 def check_ast(self):
1325 try:
1326 tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
1327 except SyntaxError:
1328 return self.report_invalid_syntax()
1329 for name, cls, _ in self._ast_checks:
1330 checker = cls(tree, self.filename)
1331 for lineno, offset, text, check in checker.run():
1332 if not noqa(self.lines[lineno - 1]):
1333 self.report_error(lineno, offset, text, check)
1334
1335 def generate_tokens(self):
1336 if self._io_error:
1337 self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
1338 tokengen = tokenize.generate_tokens(self.readline_check_physical)
1339 try:
1340 for token in tokengen:
1341 yield token
1342 except (SyntaxError, tokenize.TokenError):
1343 self.report_invalid_syntax()
1344
1345 def check_all(self, expected=None, line_offset=0):
1346 """
1347 Run all checks on the input file.
1348 """
1349 self.report.init_file(self.filename, self.lines, expected, line_offset)
1350 if self._ast_checks:
1351 self.check_ast()
1352 self.line_number = 0
1353 self.indent_char = None
1354 self.indent_level = 0
1355 self.previous_logical = ''
1356 self.tokens = []
1357 self.blank_lines = blank_lines_before_comment = 0
1358 parens = 0
1359 for token in self.generate_tokens():
1360 self.tokens.append(token)
1361 token_type, text = token[0:2]
1362 if self.verbose >= 3:
1363 if token[2][0] == token[3][0]:
1364 pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
1365 else:
1366 pos = 'l.%s' % token[3][0]
1367 print('l.%s\t%s\t%s\t%r' %
1368 (token[2][0], pos, tokenize.tok_name[token[0]], text))
1369 if token_type == tokenize.OP:
1370 if text in '([{':
1371 parens += 1
1372 elif text in '}])':
1373 parens -= 1
1374 elif not parens:
1375 if token_type == tokenize.NEWLINE:
1376 if self.blank_lines < blank_lines_before_comment:
1377 self.blank_lines = blank_lines_before_comment
1378 self.check_logical()
1379 self.tokens = []
1380 self.blank_lines = blank_lines_before_comment = 0
1381 elif token_type == tokenize.NL:
1382 if len(self.tokens) == 1:
1383 # The physical line contains only this token.
1384 self.blank_lines += 1
1385 self.tokens = []
1386 elif token_type == tokenize.COMMENT and len(self.tokens) == 1:
1387 if blank_lines_before_comment < self.blank_lines:
1388 blank_lines_before_comment = self.blank_lines
1389 self.blank_lines = 0
1390 if COMMENT_WITH_NL:
1391 # The comment also ends a physical line
1392 self.tokens = []
1393 return self.report.get_file_results()
1394
1395
1396class BaseReport(object):
1397 """Collect the results of the checks."""
1398 print_filename = False
1399
1400 def __init__(self, options):
1401 self._benchmark_keys = options.benchmark_keys
1402 self._ignore_code = options.ignore_code
1403 # Results
1404 self.elapsed = 0
1405 self.total_errors = 0
1406 self.counters = dict.fromkeys(self._benchmark_keys, 0)
1407 self.messages = {}
1408
1409 def start(self):
1410 """Start the timer."""
1411 self._start_time = time.time()
1412
1413 def stop(self):
1414 """Stop the timer."""
1415 self.elapsed = time.time() - self._start_time
1416
1417 def init_file(self, filename, lines, expected, line_offset):
1418 """Signal a new file."""
1419 self.filename = filename
1420 self.lines = lines
1421 self.expected = expected or ()
1422 self.line_offset = line_offset
1423 self.file_errors = 0
1424 self.counters['files'] += 1
1425 self.counters['physical lines'] += len(lines)
1426
1427 def increment_logical_line(self):
1428 """Signal a new logical line."""
1429 self.counters['logical lines'] += 1
1430
1431 def error(self, line_number, offset, text, check):
1432 """Report an error, according to options."""
1433 code = text[:4]
1434 if self._ignore_code(code):
1435 return
1436 if code in self.counters:
1437 self.counters[code] += 1
1438 else:
1439 self.counters[code] = 1
1440 self.messages[code] = text[5:]
1441 # Don't care about expected errors or warnings
1442 if code in self.expected:
1443 return
1444 if self.print_filename and not self.file_errors:
1445 print(self.filename)
1446 self.file_errors += 1
1447 self.total_errors += 1
1448 return code
1449
1450 def get_file_results(self):
1451 """Return the count of errors and warnings for this file."""
1452 return self.file_errors
1453
1454 def get_count(self, prefix=''):
1455 """Return the total count of errors and warnings."""
1456 return sum([self.counters[key]
1457 for key in self.messages if key.startswith(prefix)])
1458
1459 def get_statistics(self, prefix=''):
1460 """
1461 Get statistics for message codes that start with the prefix.
1462
1463 prefix='' matches all errors and warnings
1464 prefix='E' matches all errors
1465 prefix='W' matches all warnings
1466 prefix='E4' matches all errors that have to do with imports
1467 """
1468 return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
1469 for key in sorted(self.messages) if key.startswith(prefix)]
1470
1471 def print_statistics(self, prefix=''):
1472 """Print overall statistics (number of errors and warnings)."""
1473 for line in self.get_statistics(prefix):
1474 print(line)
1475
1476 def print_benchmark(self):
1477 """Print benchmark numbers."""
1478 print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
1479 if self.elapsed:
1480 for key in self._benchmark_keys:
1481 print('%-7d %s per second (%d total)' %
1482 (self.counters[key] / self.elapsed, key,
1483 self.counters[key]))
1484
1485
1486class FileReport(BaseReport):
1487 """Collect the results of the checks and print only the filenames."""
1488 print_filename = True
1489
1490
1491class StandardReport(BaseReport):
1492 """Collect and print the results of the checks."""
1493
1494 def __init__(self, options):
1495 super(StandardReport, self).__init__(options)
1496 self._fmt = REPORT_FORMAT.get(options.format.lower(),
1497 options.format)
1498 self._repeat = options.repeat
1499 self._show_source = options.show_source
1500 self._show_pep8 = options.show_pep8
1501
1502 def init_file(self, filename, lines, expected, line_offset):
1503 """Signal a new file."""
1504 self._deferred_print = []
1505 return super(StandardReport, self).init_file(
1506 filename, lines, expected, line_offset)
1507
1508 def error(self, line_number, offset, text, check):
1509 """Report an error, according to options."""
1510 code = super(StandardReport, self).error(line_number, offset,
1511 text, check)
1512 if code and (self.counters[code] == 1 or self._repeat):
1513 self._deferred_print.append(
1514 (line_number, offset, code, text[5:], check.__doc__))
1515 return code
1516
1517 def get_file_results(self):
1518 """Print the result and return the overall count for this file."""
1519 self._deferred_print.sort()
1520 for line_number, offset, code, text, doc in self._deferred_print:
1521 print(self._fmt % {
1522 'path': self.filename,
1523 'row': self.line_offset + line_number, 'col': offset + 1,
1524 'code': code, 'text': text,
1525 })
1526 if self._show_source:
1527 if line_number > len(self.lines):
1528 line = ''
1529 else:
1530 line = self.lines[line_number - 1]
1531 print(line.rstrip())
1532 print(' ' * offset + '^')
1533 if self._show_pep8 and doc:
1534 print(doc.lstrip('\n').rstrip())
1535 return self.file_errors
1536
1537
1538class DiffReport(StandardReport):
1539 """Collect and print the results for the changed lines only."""
1540
1541 def __init__(self, options):
1542 super(DiffReport, self).__init__(options)
1543 self._selected = options.selected_lines
1544
1545 def error(self, line_number, offset, text, check):
1546 if line_number not in self._selected[self.filename]:
1547 return
1548 return super(DiffReport, self).error(line_number, offset, text, check)
1549
1550
1551class StyleGuide(object):
1552 """Initialize a PEP-8 instance with few options."""
1553
1554 def __init__(self, *args, **kwargs):
1555 # build options from the command line
1556 self.checker_class = kwargs.pop('checker_class', Checker)
1557 parse_argv = kwargs.pop('parse_argv', False)
1558 config_file = kwargs.pop('config_file', None)
1559 parser = kwargs.pop('parser', None)
1560 options, self.paths = process_options(
1561 parse_argv=parse_argv, config_file=config_file, parser=parser)
1562 if args or kwargs:
1563 # build options from dict
1564 options_dict = dict(*args, **kwargs)
1565 options.__dict__.update(options_dict)
1566 if 'paths' in options_dict:
1567 self.paths = options_dict['paths']
1568
1569 self.runner = self.input_file
1570 self.options = options
1571
1572 if not options.reporter:
1573 options.reporter = BaseReport if options.quiet else StandardReport
1574
1575 for index, value in enumerate(options.exclude):
1576 options.exclude[index] = value.rstrip('/')
1577 # Ignore all checks which are not explicitly selected
1578 options.select = tuple(options.select or ())
1579 options.ignore = tuple(options.ignore or options.select and ('',))
1580 options.benchmark_keys = BENCHMARK_KEYS[:]
1581 options.ignore_code = self.ignore_code
1582 options.physical_checks = self.get_checks('physical_line')
1583 options.logical_checks = self.get_checks('logical_line')
1584 options.ast_checks = self.get_checks('tree')
1585 self.init_report()
1586
1587 def init_report(self, reporter=None):
1588 """Initialize the report instance."""
1589 self.options.report = (reporter or self.options.reporter)(self.options)
1590 return self.options.report
1591
1592 def check_files(self, paths=None):
1593 """Run all checks on the paths."""
1594 if paths is None:
1595 paths = self.paths
1596 report = self.options.report
1597 runner = self.runner
1598 report.start()
1599 try:
1600 for path in paths:
1601 if os.path.isdir(path):
1602 self.input_dir(path)
1603 elif not self.excluded(path):
1604 runner(path)
1605 except KeyboardInterrupt:
1606 print('... stopped')
1607 report.stop()
1608 return report
1609
1610 def input_file(self, filename, lines=None, expected=None, line_offset=0):
1611 """Run all checks on a Python source file."""
1612 if self.options.verbose:
1613 print('checking %s' % filename)
1614 fchecker = self.checker_class(
1615 filename, lines=lines, options=self.options)
1616 return fchecker.check_all(expected=expected, line_offset=line_offset)
1617
1618 def input_dir(self, dirname):
1619 """Check all files in this directory and all subdirectories."""
1620 dirname = dirname.rstrip('/')
1621 if self.excluded(dirname):
1622 return 0
1623 counters = self.options.report.counters
1624 verbose = self.options.verbose
1625 filepatterns = self.options.filename
1626 runner = self.runner
1627 for root, dirs, files in os.walk(dirname):
1628 if verbose:
1629 print('directory ' + root)
1630 counters['directories'] += 1
1631 for subdir in sorted(dirs):
1632 if self.excluded(os.path.join(root, subdir)):
1633 dirs.remove(subdir)
1634 for filename in sorted(files):
1635 # contain a pattern that matches?
1636 if ((filename_match(filename, filepatterns) and
1637 not self.excluded(filename))):
1638 runner(os.path.join(root, filename))
1639
1640 def excluded(self, filename):
1641 """
1642 Check if options.exclude contains a pattern that matches filename.
1643 """
1644 basename = os.path.basename(filename)
1645 return any((filename_match(filename, self.options.exclude,
1646 default=False),
1647 filename_match(basename, self.options.exclude,
1648 default=False)))
1649
1650 def ignore_code(self, code):
1651 """
1652 Check if the error code should be ignored.
1653
1654 If 'options.select' contains a prefix of the error code,
1655 return False. Else, if 'options.ignore' contains a prefix of
1656 the error code, return True.
1657 """
1658 return (code.startswith(self.options.ignore) and
1659 not code.startswith(self.options.select))
1660
1661 def get_checks(self, argument_name):
1662 """
1663 Find all globally visible functions where the first argument name
1664 starts with argument_name and which contain selected tests.
1665 """
1666 checks = []
1667 for check, attrs in _checks[argument_name].items():
1668 (codes, args) = attrs
1669 if any(not (code and self.ignore_code(code)) for code in codes):
1670 checks.append((check.__name__, check, args))
1671 return sorted(checks)
1672
1673
1674def get_parser(prog='pep8', version=__version__):
1675 parser = OptionParser(prog=prog, version=version,
1676 usage="%prog [options] input ...")
1677 parser.config_options = [
1678 'exclude', 'filename', 'select', 'ignore', 'max-line-length', 'count',
1679 'format', 'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose']
1680 parser.add_option('-v', '--verbose', default=0, action='count',
1681 help="print status messages, or debug with -vv")
1682 parser.add_option('-q', '--quiet', default=0, action='count',
1683 help="report only file names, or nothing with -qq")
1684 parser.add_option('-r', '--repeat', default=True, action='store_true',
1685 help="(obsolete) show all occurrences of the same error")
1686 parser.add_option('--first', action='store_false', dest='repeat',
1687 help="show first occurrence of each error")
1688 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1689 help="exclude files or directories which match these "
1690 "comma separated patterns (default: %default)")
1691 parser.add_option('--filename', metavar='patterns', default='*.py',
1692 help="when parsing directories, only check filenames "
1693 "matching these comma separated patterns "
1694 "(default: %default)")
1695 parser.add_option('--select', metavar='errors', default='',
1696 help="select errors and warnings (e.g. E,W6)")
1697 parser.add_option('--ignore', metavar='errors', default='',
1698 help="skip errors and warnings (e.g. E4,W)")
1699 parser.add_option('--show-source', action='store_true',
1700 help="show source code for each error")
1701 parser.add_option('--show-pep8', action='store_true',
1702 help="show text of PEP 8 for each error "
1703 "(implies --first)")
1704 parser.add_option('--statistics', action='store_true',
1705 help="count errors and warnings")
1706 parser.add_option('--count', action='store_true',
1707 help="print total number of errors and warnings "
1708 "to standard error and set exit code to 1 if "
1709 "total is not null")
1710 parser.add_option('--max-line-length', type='int', metavar='n',
1711 default=MAX_LINE_LENGTH,
1712 help="set maximum allowed line length "
1713 "(default: %default)")
1714 parser.add_option('--format', metavar='format', default='default',
1715 help="set the error format [default|pylint|<custom>]")
1716 parser.add_option('--diff', action='store_true',
1717 help="report only lines changed according to the "
1718 "unified diff received on STDIN")
1719 group = parser.add_option_group("Testing Options")
1720 if os.path.exists(TESTSUITE_PATH):
1721 group.add_option('--testsuite', metavar='dir',
1722 help="run regression tests from dir")
1723 group.add_option('--doctest', action='store_true',
1724 help="run doctest on myself")
1725 group.add_option('--benchmark', action='store_true',
1726 help="measure processing speed")
1727 return parser
1728
1729
1730def read_config(options, args, arglist, parser):
1731 """Read both user configuration and local configuration."""
1732 config = RawConfigParser()
1733
1734 user_conf = options.config
1735 if user_conf and os.path.isfile(user_conf):
1736 if options.verbose:
1737 print('user configuration: %s' % user_conf)
1738 config.read(user_conf)
1739
1740 parent = tail = args and os.path.abspath(os.path.commonprefix(args))
1741 while tail:
1742 for name in PROJECT_CONFIG:
1743 local_conf = os.path.join(parent, name)
1744 if os.path.isfile(local_conf):
1745 break
1746 else:
1747 parent, tail = os.path.split(parent)
1748 continue
1749 if options.verbose:
1750 print('local configuration: %s' % local_conf)
1751 config.read(local_conf)
1752 break
1753
1754 pep8_section = parser.prog
1755 if config.has_section(pep8_section):
1756 option_list = dict([(o.dest, o.type or o.action)
1757 for o in parser.option_list])
1758
1759 # First, read the default values
1760 new_options, _ = parser.parse_args([])
1761
1762 # Second, parse the configuration
1763 for opt in config.options(pep8_section):
1764 if options.verbose > 1:
1765 print(" %s = %s" % (opt, config.get(pep8_section, opt)))
1766 if opt.replace('_', '-') not in parser.config_options:
1767 print("Unknown option: '%s'\n not in [%s]" %
1768 (opt, ' '.join(parser.config_options)))
1769 sys.exit(1)
1770 normalized_opt = opt.replace('-', '_')
1771 opt_type = option_list[normalized_opt]
1772 if opt_type in ('int', 'count'):
1773 value = config.getint(pep8_section, opt)
1774 elif opt_type == 'string':
1775 value = config.get(pep8_section, opt)
1776 else:
1777 assert opt_type in ('store_true', 'store_false')
1778 value = config.getboolean(pep8_section, opt)
1779 setattr(new_options, normalized_opt, value)
1780
1781 # Third, overwrite with the command-line options
1782 options, _ = parser.parse_args(arglist, values=new_options)
1783 options.doctest = options.testsuite = False
1784 return options
1785
1786
1787def process_options(arglist=None, parse_argv=False, config_file=None,
1788 parser=None):
1789 """Process options passed either via arglist or via command line args."""
1790 if not arglist and not parse_argv:
1791 # Don't read the command line if the module is used as a library.
1792 arglist = []
1793 if not parser:
1794 parser = get_parser()
1795 if not parser.has_option('--config'):
1796 if config_file is True:
1797 config_file = DEFAULT_CONFIG
1798 group = parser.add_option_group("Configuration", description=(
1799 "The project options are read from the [%s] section of the "
1800 "tox.ini file or the setup.cfg file located in any parent folder "
1801 "of the path(s) being processed. Allowed options are: %s." %
1802 (parser.prog, ', '.join(parser.config_options))))
1803 group.add_option('--config', metavar='path', default=config_file,
1804 help="user config file location (default: %default)")
1805 options, args = parser.parse_args(arglist)
1806 options.reporter = None
1807
1808 if options.ensure_value('testsuite', False):
1809 args.append(options.testsuite)
1810 elif not options.ensure_value('doctest', False):
1811 if parse_argv and not args:
1812 if options.diff or any(os.path.exists(name)
1813 for name in PROJECT_CONFIG):
1814 args = ['.']
1815 else:
1816 parser.error('input not specified')
1817 options = read_config(options, args, arglist, parser)
1818 options.reporter = parse_argv and options.quiet == 1 and FileReport
1819
1820 if options.filename:
1821 options.filename = options.filename.split(',')
1822 options.exclude = options.exclude.split(',')
1823 if options.select:
1824 options.select = options.select.split(',')
1825 if options.ignore:
1826 options.ignore = options.ignore.split(',')
1827 elif not (options.select or
1828 options.testsuite or options.doctest) and DEFAULT_IGNORE:
1829 # The default choice: ignore controversial checks
1830 # (for doctest and testsuite, all checks are required)
1831 options.ignore = DEFAULT_IGNORE.split(',')
1832
1833 if options.diff:
1834 options.reporter = DiffReport
1835 stdin = stdin_get_value()
1836 options.selected_lines = parse_udiff(stdin, options.filename, args[0])
1837 args = sorted(options.selected_lines)
1838
1839 return options, args
1840
1841
1842def _main():
1843 """Parse options and run checks on Python source."""
1844 pep8style = StyleGuide(parse_argv=True, config_file=True)
1845 options = pep8style.options
1846 if options.doctest or options.testsuite:
1847 sys.path[:0] = [TESTSUITE_PATH]
1848 from test_pep8 import run_tests
1849 del sys.path[0]
1850 report = run_tests(pep8style, options.doctest, options.testsuite)
1851 else:
1852 report = pep8style.check_files()
1853 if options.statistics:
1854 report.print_statistics()
1855 if options.benchmark:
1856 report.print_benchmark()
1857 if options.testsuite and not options.quiet:
1858 report.print_results()
1859 if report.total_errors:
1860 if options.count:
1861 sys.stderr.write(str(report.total_errors) + '\n')
1862 sys.exit(1)
1863
1864if __name__ == '__main__':
1865 _main()