| Tor Norbye | 3a2425a | 2013-11-04 10:16:08 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # pep8.py - Check Python source code formatting, according to PEP 8 |
| 3 | # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net> |
| 4 | # Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com> |
| 5 | # |
| 6 | # Permission is hereby granted, free of charge, to any person |
| 7 | # obtaining a copy of this software and associated documentation files |
| 8 | # (the "Software"), to deal in the Software without restriction, |
| 9 | # including without limitation the rights to use, copy, modify, merge, |
| 10 | # publish, distribute, sublicense, and/or sell copies of the Software, |
| 11 | # and to permit persons to whom the Software is furnished to do so, |
| 12 | # subject to the following conditions: |
| 13 | # |
| 14 | # The above copyright notice and this permission notice shall be |
| 15 | # included in all copies or substantial portions of the Software. |
| 16 | # |
| 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 20 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 21 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 22 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 23 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 24 | # SOFTWARE. |
| 25 | |
| 26 | r""" |
| 27 | Check Python source code formatting, according to PEP 8: |
| 28 | http://www.python.org/dev/peps/pep-0008/ |
| 29 | |
| 30 | For usage and a list of options, try this: |
| 31 | $ python pep8.py -h |
| 32 | |
| 33 | This program and its regression test suite live here: |
| 34 | http://github.com/jcrocholl/pep8 |
| 35 | |
| 36 | Groups of errors and warnings: |
| 37 | E errors |
| 38 | W warnings |
| 39 | 100 indentation |
| 40 | 200 whitespace |
| 41 | 300 blank lines |
| 42 | 400 imports |
| 43 | 500 line length |
| 44 | 600 deprecation |
| 45 | 700 statements |
| 46 | 900 syntax error |
| 47 | """ |
| 48 | __version__ = '1.4.5a0' |
| 49 | |
| 50 | import os |
| 51 | import sys |
| 52 | import re |
| 53 | import time |
| 54 | import inspect |
| 55 | import keyword |
| 56 | import tokenize |
| 57 | from optparse import OptionParser |
| 58 | from fnmatch import fnmatch |
| 59 | try: |
| 60 | from configparser import RawConfigParser |
| 61 | from io import TextIOWrapper |
| 62 | except ImportError: |
| 63 | from ConfigParser import RawConfigParser |
| 64 | |
| 65 | DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__' |
| 66 | DEFAULT_IGNORE = 'E226,E24' |
| 67 | if sys.platform == 'win32': |
| 68 | DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8') |
| 69 | else: |
| 70 | DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or |
| 71 | os.path.expanduser('~/.config'), 'pep8') |
| 72 | PROJECT_CONFIG = ('.pep8', 'tox.ini', 'setup.cfg') |
| 73 | TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite') |
| 74 | MAX_LINE_LENGTH = 79 |
| 75 | REPORT_FORMAT = { |
| 76 | 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s', |
| 77 | 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s', |
| 78 | } |
| 79 | |
| 80 | PyCF_ONLY_AST = 1024 |
| 81 | SINGLETONS = frozenset(['False', 'None', 'True']) |
| 82 | KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS |
| 83 | UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) |
| 84 | ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-']) |
| 85 | WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%']) |
| 86 | WS_NEEDED_OPERATORS = frozenset([ |
| 87 | '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>', |
| 88 | '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=']) |
| 89 | WHITESPACE = frozenset(' \t') |
| 90 | SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE, |
| 91 | tokenize.INDENT, tokenize.DEDENT]) |
| 92 | BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines'] |
| 93 | |
| 94 | INDENT_REGEX = re.compile(r'([ \t]*)') |
| 95 | RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') |
| 96 | RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+') |
| 97 | ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b') |
| 98 | DOCSTRING_REGEX = re.compile(r'u?r?["\']') |
| 99 | EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') |
| 100 | WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') |
| 101 | COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)') |
| 102 | COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type' |
| 103 | r'|\s*\(\s*([^)]*[^ )])\s*\))') |
| 104 | KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS)) |
| 105 | OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)') |
| 106 | LAMBDA_REGEX = re.compile(r'\blambda\b') |
| 107 | HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$') |
| 108 | |
| 109 | # Work around Python < 2.6 behaviour, which does not generate NL after |
| 110 | # a comment which is on a line by itself. |
| 111 | COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n' |
| 112 | |
| 113 | |
| 114 | ############################################################################## |
| 115 | # Plugins (check functions) for physical lines |
| 116 | ############################################################################## |
| 117 | |
| 118 | |
| 119 | def tabs_or_spaces(physical_line, indent_char): |
| 120 | r""" |
| 121 | Never mix tabs and spaces. |
| 122 | |
| 123 | The most popular way of indenting Python is with spaces only. The |
| 124 | second-most popular way is with tabs only. Code indented with a mixture |
| 125 | of tabs and spaces should be converted to using spaces exclusively. When |
| 126 | invoking the Python command line interpreter with the -t option, it issues |
| 127 | warnings about code that illegally mixes tabs and spaces. When using -tt |
| 128 | these warnings become errors. These options are highly recommended! |
| 129 | |
| 130 | Okay: if a == 0:\n a = 1\n b = 1 |
| 131 | E101: if a == 0:\n a = 1\n\tb = 1 |
| 132 | """ |
| 133 | indent = INDENT_REGEX.match(physical_line).group(1) |
| 134 | for offset, char in enumerate(indent): |
| 135 | if char != indent_char: |
| 136 | return offset, "E101 indentation contains mixed spaces and tabs" |
| 137 | |
| 138 | |
| 139 | def tabs_obsolete(physical_line): |
| 140 | r""" |
| 141 | For new projects, spaces-only are strongly recommended over tabs. Most |
| 142 | editors have features that make this easy to do. |
| 143 | |
| 144 | Okay: if True:\n return |
| 145 | W191: if True:\n\treturn |
| 146 | """ |
| 147 | indent = INDENT_REGEX.match(physical_line).group(1) |
| 148 | if '\t' in indent: |
| 149 | return indent.index('\t'), "W191 indentation contains tabs" |
| 150 | |
| 151 | |
| 152 | def trailing_whitespace(physical_line): |
| 153 | r""" |
| 154 | JCR: Trailing whitespace is superfluous. |
| 155 | FBM: Except when it occurs as part of a blank line (i.e. the line is |
| 156 | nothing but whitespace). According to Python docs[1] a line with only |
| 157 | whitespace is considered a blank line, and is to be ignored. However, |
| 158 | matching a blank line to its indentation level avoids mistakenly |
| 159 | terminating a multi-line statement (e.g. class declaration) when |
| 160 | pasting code into the standard Python interpreter. |
| 161 | |
| 162 | [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines |
| 163 | |
| 164 | The warning returned varies on whether the line itself is blank, for easier |
| 165 | filtering for those who want to indent their blank lines. |
| 166 | |
| 167 | Okay: spam(1)\n# |
| 168 | W291: spam(1) \n# |
| 169 | W293: class Foo(object):\n \n bang = 12 |
| 170 | """ |
| 171 | physical_line = physical_line.rstrip('\n') # chr(10), newline |
| 172 | physical_line = physical_line.rstrip('\r') # chr(13), carriage return |
| 173 | physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L |
| 174 | stripped = physical_line.rstrip(' \t\v') |
| 175 | if physical_line != stripped: |
| 176 | if stripped: |
| 177 | return len(stripped), "W291 trailing whitespace" |
| 178 | else: |
| 179 | return 0, "W293 blank line contains whitespace" |
| 180 | |
| 181 | |
| 182 | def trailing_blank_lines(physical_line, lines, line_number): |
| 183 | r""" |
| 184 | JCR: Trailing blank lines are superfluous. |
| 185 | |
| 186 | Okay: spam(1) |
| 187 | W391: spam(1)\n |
| 188 | """ |
| 189 | if not physical_line.rstrip() and line_number == len(lines): |
| 190 | return 0, "W391 blank line at end of file" |
| 191 | |
| 192 | |
| 193 | def missing_newline(physical_line): |
| 194 | """ |
| 195 | JCR: The last line should have a newline. |
| 196 | |
| 197 | Reports warning W292. |
| 198 | """ |
| 199 | if physical_line.rstrip() == physical_line: |
| 200 | return len(physical_line), "W292 no newline at end of file" |
| 201 | |
| 202 | |
| 203 | def maximum_line_length(physical_line, max_line_length): |
| 204 | """ |
| 205 | Limit all lines to a maximum of 79 characters. |
| 206 | |
| 207 | There are still many devices around that are limited to 80 character |
| 208 | lines; plus, limiting windows to 80 characters makes it possible to have |
| 209 | several windows side-by-side. The default wrapping on such devices looks |
| 210 | ugly. Therefore, please limit all lines to a maximum of 79 characters. |
| 211 | For flowing long blocks of text (docstrings or comments), limiting the |
| 212 | length to 72 characters is recommended. |
| 213 | |
| 214 | Reports error E501. |
| 215 | """ |
| 216 | line = physical_line.rstrip() |
| 217 | length = len(line) |
| 218 | if length > max_line_length: |
| 219 | if noqa(line): |
| 220 | return |
| 221 | if hasattr(line, 'decode'): # Python 2 |
| 222 | # The line could contain multi-byte characters |
| 223 | try: |
| 224 | length = len(line.decode('utf-8')) |
| 225 | except UnicodeError: |
| 226 | pass |
| 227 | if length > max_line_length: |
| 228 | return (max_line_length, "E501 line too long " |
| 229 | "(%d > %d characters)" % (length, max_line_length)) |
| 230 | |
| 231 | |
| 232 | ############################################################################## |
| 233 | # Plugins (check functions) for logical lines |
| 234 | ############################################################################## |
| 235 | |
| 236 | |
| 237 | def blank_lines(logical_line, blank_lines, indent_level, line_number, |
| 238 | previous_logical, previous_indent_level): |
| 239 | r""" |
| 240 | Separate top-level function and class definitions with two blank lines. |
| 241 | |
| 242 | Method definitions inside a class are separated by a single blank line. |
| 243 | |
| 244 | Extra blank lines may be used (sparingly) to separate groups of related |
| 245 | functions. Blank lines may be omitted between a bunch of related |
| 246 | one-liners (e.g. a set of dummy implementations). |
| 247 | |
| 248 | Use blank lines in functions, sparingly, to indicate logical sections. |
| 249 | |
| 250 | Okay: def a():\n pass\n\n\ndef b():\n pass |
| 251 | Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass |
| 252 | |
| 253 | E301: class Foo:\n b = 0\n def bar():\n pass |
| 254 | E302: def a():\n pass\n\ndef b(n):\n pass |
| 255 | E303: def a():\n pass\n\n\n\ndef b(n):\n pass |
| 256 | E303: def a():\n\n\n\n pass |
| 257 | E304: @decorator\n\ndef a():\n pass |
| 258 | """ |
| 259 | if line_number < 3 and not previous_logical: |
| 260 | return # Don't expect blank lines before the first line |
| 261 | if previous_logical.startswith('@'): |
| 262 | if blank_lines: |
| 263 | yield 0, "E304 blank lines found after function decorator" |
| 264 | elif blank_lines > 2 or (indent_level and blank_lines == 2): |
| 265 | yield 0, "E303 too many blank lines (%d)" % blank_lines |
| 266 | elif logical_line.startswith(('def ', 'class ', '@')): |
| 267 | if indent_level: |
| 268 | if not (blank_lines or previous_indent_level < indent_level or |
| 269 | DOCSTRING_REGEX.match(previous_logical)): |
| 270 | yield 0, "E301 expected 1 blank line, found 0" |
| 271 | elif blank_lines != 2: |
| 272 | yield 0, "E302 expected 2 blank lines, found %d" % blank_lines |
| 273 | |
| 274 | |
| 275 | def extraneous_whitespace(logical_line): |
| 276 | """ |
| 277 | Avoid extraneous whitespace in the following situations: |
| 278 | |
| 279 | - Immediately inside parentheses, brackets or braces. |
| 280 | |
| 281 | - Immediately before a comma, semicolon, or colon. |
| 282 | |
| 283 | Okay: spam(ham[1], {eggs: 2}) |
| 284 | E201: spam( ham[1], {eggs: 2}) |
| 285 | E201: spam(ham[ 1], {eggs: 2}) |
| 286 | E201: spam(ham[1], { eggs: 2}) |
| 287 | E202: spam(ham[1], {eggs: 2} ) |
| 288 | E202: spam(ham[1 ], {eggs: 2}) |
| 289 | E202: spam(ham[1], {eggs: 2 }) |
| 290 | |
| 291 | E203: if x == 4: print x, y; x, y = y , x |
| 292 | E203: if x == 4: print x, y ; x, y = y, x |
| 293 | E203: if x == 4 : print x, y; x, y = y, x |
| 294 | """ |
| 295 | line = logical_line |
| 296 | for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line): |
| 297 | text = match.group() |
| 298 | char = text.strip() |
| 299 | found = match.start() |
| 300 | if text == char + ' ': |
| 301 | # assert char in '([{' |
| 302 | yield found + 1, "E201 whitespace after '%s'" % char |
| 303 | elif line[found - 1] != ',': |
| 304 | code = ('E202' if char in '}])' else 'E203') # if char in ',;:' |
| 305 | yield found, "%s whitespace before '%s'" % (code, char) |
| 306 | |
| 307 | |
| 308 | def whitespace_around_keywords(logical_line): |
| 309 | r""" |
| 310 | Avoid extraneous whitespace around keywords. |
| 311 | |
| 312 | Okay: True and False |
| 313 | E271: True and False |
| 314 | E272: True and False |
| 315 | E273: True and\tFalse |
| 316 | E274: True\tand False |
| 317 | """ |
| 318 | for match in KEYWORD_REGEX.finditer(logical_line): |
| 319 | before, after = match.groups() |
| 320 | |
| 321 | if '\t' in before: |
| 322 | yield match.start(1), "E274 tab before keyword" |
| 323 | elif len(before) > 1: |
| 324 | yield match.start(1), "E272 multiple spaces before keyword" |
| 325 | |
| 326 | if '\t' in after: |
| 327 | yield match.start(2), "E273 tab after keyword" |
| 328 | elif len(after) > 1: |
| 329 | yield match.start(2), "E271 multiple spaces after keyword" |
| 330 | |
| 331 | |
| 332 | def missing_whitespace(logical_line): |
| 333 | """ |
| 334 | JCR: Each comma, semicolon or colon should be followed by whitespace. |
| 335 | |
| 336 | Okay: [a, b] |
| 337 | Okay: (3,) |
| 338 | Okay: a[1:4] |
| 339 | Okay: a[:4] |
| 340 | Okay: a[1:] |
| 341 | Okay: a[1:4:2] |
| 342 | E231: ['a','b'] |
| 343 | E231: foo(bar,baz) |
| 344 | E231: [{'a':'b'}] |
| 345 | """ |
| 346 | line = logical_line |
| 347 | for index in range(len(line) - 1): |
| 348 | char = line[index] |
| 349 | if char in ',;:' and line[index + 1] not in WHITESPACE: |
| 350 | before = line[:index] |
| 351 | if char == ':' and before.count('[') > before.count(']') and \ |
| 352 | before.rfind('{') < before.rfind('['): |
| 353 | continue # Slice syntax, no space required |
| 354 | if char == ',' and line[index + 1] == ')': |
| 355 | continue # Allow tuple with only one element: (3,) |
| 356 | yield index, "E231 missing whitespace after '%s'" % char |
| 357 | |
| 358 | |
| 359 | def indentation(logical_line, previous_logical, indent_char, |
| 360 | indent_level, previous_indent_level): |
| 361 | r""" |
| 362 | Use 4 spaces per indentation level. |
| 363 | |
| 364 | For really old code that you don't want to mess up, you can continue to |
| 365 | use 8-space tabs. |
| 366 | |
| 367 | Okay: a = 1 |
| 368 | Okay: if a == 0:\n a = 1 |
| 369 | E111: a = 1 |
| 370 | |
| 371 | Okay: for item in items:\n pass |
| 372 | E112: for item in items:\npass |
| 373 | |
| 374 | Okay: a = 1\nb = 2 |
| 375 | E113: a = 1\n b = 2 |
| 376 | """ |
| 377 | if indent_char == ' ' and indent_level % 4: |
| 378 | yield 0, "E111 indentation is not a multiple of four" |
| 379 | indent_expect = previous_logical.endswith(':') |
| 380 | if indent_expect and indent_level <= previous_indent_level: |
| 381 | yield 0, "E112 expected an indented block" |
| 382 | if indent_level > previous_indent_level and not indent_expect: |
| 383 | yield 0, "E113 unexpected indentation" |
| 384 | |
| 385 | |
| 386 | def continuation_line_indentation(logical_line, tokens, indent_level, verbose): |
| 387 | r""" |
| 388 | Continuation lines should align wrapped elements either vertically using |
| 389 | Python's implicit line joining inside parentheses, brackets and braces, or |
| 390 | using a hanging indent. |
| 391 | |
| 392 | When using a hanging indent the following considerations should be applied: |
| 393 | |
| 394 | - there should be no arguments on the first line, and |
| 395 | |
| 396 | - further indentation should be used to clearly distinguish itself as a |
| 397 | continuation line. |
| 398 | |
| 399 | Okay: a = (\n) |
| 400 | E123: a = (\n ) |
| 401 | |
| 402 | Okay: a = (\n 42) |
| 403 | E121: a = (\n 42) |
| 404 | E122: a = (\n42) |
| 405 | E123: a = (\n 42\n ) |
| 406 | E124: a = (24,\n 42\n) |
| 407 | E125: if (a or\n b):\n pass |
| 408 | E126: a = (\n 42) |
| 409 | E127: a = (24,\n 42) |
| 410 | E128: a = (24,\n 42) |
| 411 | """ |
| 412 | first_row = tokens[0][2][0] |
| 413 | nrows = 1 + tokens[-1][2][0] - first_row |
| 414 | if nrows == 1 or noqa(tokens[0][4]): |
| 415 | return |
| 416 | |
| 417 | # indent_next tells us whether the next block is indented; assuming |
| 418 | # that it is indented by 4 spaces, then we should not allow 4-space |
| 419 | # indents on the final continuation line; in turn, some other |
| 420 | # indents are allowed to have an extra 4 spaces. |
| 421 | indent_next = logical_line.endswith(':') |
| 422 | |
| 423 | row = depth = 0 |
| 424 | # remember how many brackets were opened on each line |
| 425 | parens = [0] * nrows |
| 426 | # relative indents of physical lines |
| 427 | rel_indent = [0] * nrows |
| 428 | # visual indents |
| 429 | indent_chances = {} |
| 430 | last_indent = tokens[0][2] |
| 431 | indent = [last_indent[1]] |
| 432 | if verbose >= 3: |
| 433 | print(">>> " + tokens[0][4].rstrip()) |
| 434 | |
| 435 | for token_type, text, start, end, line in tokens: |
| 436 | |
| 437 | newline = row < start[0] - first_row |
| 438 | if newline: |
| 439 | row = start[0] - first_row |
| 440 | newline = (not last_token_multiline and |
| 441 | token_type not in (tokenize.NL, tokenize.NEWLINE)) |
| 442 | |
| 443 | if newline: |
| 444 | # this is the beginning of a continuation line. |
| 445 | last_indent = start |
| 446 | if verbose >= 3: |
| 447 | print("... " + line.rstrip()) |
| 448 | |
| 449 | # record the initial indent. |
| 450 | rel_indent[row] = expand_indent(line) - indent_level |
| 451 | |
| 452 | if depth: |
| 453 | # a bracket expression in a continuation line. |
| 454 | # find the line that it was opened on |
| 455 | for open_row in range(row - 1, -1, -1): |
| 456 | if parens[open_row]: |
| 457 | break |
| 458 | else: |
| 459 | # an unbracketed continuation line (ie, backslash) |
| 460 | open_row = 0 |
| 461 | hang = rel_indent[row] - rel_indent[open_row] |
| 462 | visual_indent = indent_chances.get(start[1]) |
| 463 | |
| 464 | if token_type == tokenize.OP and text in ']})': |
| 465 | # this line starts with a closing bracket |
| 466 | if indent[depth]: |
| 467 | if start[1] != indent[depth]: |
| 468 | yield (start, "E124 closing bracket does not match " |
| 469 | "visual indentation") |
| 470 | elif hang: |
| 471 | yield (start, "E123 closing bracket does not match " |
| 472 | "indentation of opening bracket's line") |
| 473 | elif visual_indent is True: |
| 474 | # visual indent is verified |
| 475 | if not indent[depth]: |
| 476 | indent[depth] = start[1] |
| 477 | elif visual_indent in (text, str): |
| 478 | # ignore token lined up with matching one from a previous line |
| 479 | pass |
| 480 | elif indent[depth] and start[1] < indent[depth]: |
| 481 | # visual indent is broken |
| 482 | yield (start, "E128 continuation line " |
| 483 | "under-indented for visual indent") |
| 484 | elif hang == 4 or (indent_next and rel_indent[row] == 8): |
| 485 | # hanging indent is verified |
| 486 | pass |
| 487 | else: |
| 488 | # indent is broken |
| 489 | if hang <= 0: |
| 490 | error = "E122", "missing indentation or outdented" |
| 491 | elif indent[depth]: |
| 492 | error = "E127", "over-indented for visual indent" |
| 493 | elif hang % 4: |
| 494 | error = "E121", "indentation is not a multiple of four" |
| 495 | else: |
| 496 | error = "E126", "over-indented for hanging indent" |
| 497 | yield start, "%s continuation line %s" % error |
| 498 | |
| 499 | # look for visual indenting |
| 500 | if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT) |
| 501 | and not indent[depth]): |
| 502 | indent[depth] = start[1] |
| 503 | indent_chances[start[1]] = True |
| 504 | if verbose >= 4: |
| 505 | print("bracket depth %s indent to %s" % (depth, start[1])) |
| 506 | # deal with implicit string concatenation |
| 507 | elif (token_type in (tokenize.STRING, tokenize.COMMENT) or |
| 508 | text in ('u', 'ur', 'b', 'br')): |
| 509 | indent_chances[start[1]] = str |
| 510 | # special case for the "if" statement because len("if (") == 4 |
| 511 | elif not indent_chances and not row and not depth and text == 'if': |
| 512 | indent_chances[end[1] + 1] = True |
| 513 | |
| 514 | # keep track of bracket depth |
| 515 | if token_type == tokenize.OP: |
| 516 | if text in '([{': |
| 517 | depth += 1 |
| 518 | indent.append(0) |
| 519 | parens[row] += 1 |
| 520 | if verbose >= 4: |
| 521 | print("bracket depth %s seen, col %s, visual min = %s" % |
| 522 | (depth, start[1], indent[depth])) |
| 523 | elif text in ')]}' and depth > 0: |
| 524 | # parent indents should not be more than this one |
| 525 | prev_indent = indent.pop() or last_indent[1] |
| 526 | for d in range(depth): |
| 527 | if indent[d] > prev_indent: |
| 528 | indent[d] = 0 |
| 529 | for ind in list(indent_chances): |
| 530 | if ind >= prev_indent: |
| 531 | del indent_chances[ind] |
| 532 | depth -= 1 |
| 533 | if depth: |
| 534 | indent_chances[indent[depth]] = True |
| 535 | for idx in range(row, -1, -1): |
| 536 | if parens[idx]: |
| 537 | parens[idx] -= 1 |
| 538 | break |
| 539 | assert len(indent) == depth + 1 |
| 540 | if start[1] not in indent_chances: |
| 541 | # allow to line up tokens |
| 542 | indent_chances[start[1]] = text |
| 543 | |
| 544 | last_token_multiline = (start[0] != end[0]) |
| 545 | |
| 546 | if indent_next and rel_indent[-1] == 4: |
| 547 | yield (last_indent, "E125 continuation line does not distinguish " |
| 548 | "itself from next logical line") |
| 549 | |
| 550 | |
| 551 | def whitespace_before_parameters(logical_line, tokens): |
| 552 | """ |
| 553 | Avoid extraneous whitespace in the following situations: |
| 554 | |
| 555 | - Immediately before the open parenthesis that starts the argument |
| 556 | list of a function call. |
| 557 | |
| 558 | - Immediately before the open parenthesis that starts an indexing or |
| 559 | slicing. |
| 560 | |
| 561 | Okay: spam(1) |
| 562 | E211: spam (1) |
| 563 | |
| 564 | Okay: dict['key'] = list[index] |
| 565 | E211: dict ['key'] = list[index] |
| 566 | E211: dict['key'] = list [index] |
| 567 | """ |
| 568 | prev_type = tokens[0][0] |
| 569 | prev_text = tokens[0][1] |
| 570 | prev_end = tokens[0][3] |
| 571 | for index in range(1, len(tokens)): |
| 572 | token_type, text, start, end, line = tokens[index] |
| 573 | if (token_type == tokenize.OP and |
| 574 | text in '([' and |
| 575 | start != prev_end and |
| 576 | (prev_type == tokenize.NAME or prev_text in '}])') and |
| 577 | # Syntax "class A (B):" is allowed, but avoid it |
| 578 | (index < 2 or tokens[index - 2][1] != 'class') and |
| 579 | # Allow "return (a.foo for a in range(5))" |
| 580 | not keyword.iskeyword(prev_text)): |
| 581 | yield prev_end, "E211 whitespace before '%s'" % text |
| 582 | prev_type = token_type |
| 583 | prev_text = text |
| 584 | prev_end = end |
| 585 | |
| 586 | |
| 587 | def whitespace_around_operator(logical_line): |
| 588 | r""" |
| 589 | Avoid extraneous whitespace in the following situations: |
| 590 | |
| 591 | - More than one space around an assignment (or other) operator to |
| 592 | align it with another. |
| 593 | |
| 594 | Okay: a = 12 + 3 |
| 595 | E221: a = 4 + 5 |
| 596 | E222: a = 4 + 5 |
| 597 | E223: a = 4\t+ 5 |
| 598 | E224: a = 4 +\t5 |
| 599 | """ |
| 600 | for match in OPERATOR_REGEX.finditer(logical_line): |
| 601 | before, after = match.groups() |
| 602 | |
| 603 | if '\t' in before: |
| 604 | yield match.start(1), "E223 tab before operator" |
| 605 | elif len(before) > 1: |
| 606 | yield match.start(1), "E221 multiple spaces before operator" |
| 607 | |
| 608 | if '\t' in after: |
| 609 | yield match.start(2), "E224 tab after operator" |
| 610 | elif len(after) > 1: |
| 611 | yield match.start(2), "E222 multiple spaces after operator" |
| 612 | |
| 613 | |
| 614 | def missing_whitespace_around_operator(logical_line, tokens): |
| 615 | r""" |
| 616 | - Always surround these binary operators with a single space on |
| 617 | either side: assignment (=), augmented assignment (+=, -= etc.), |
| 618 | comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), |
| 619 | Booleans (and, or, not). |
| 620 | |
| 621 | - Use spaces around arithmetic operators. |
| 622 | |
| 623 | Okay: i = i + 1 |
| 624 | Okay: submitted += 1 |
| 625 | Okay: x = x * 2 - 1 |
| 626 | Okay: hypot2 = x * x + y * y |
| 627 | Okay: c = (a + b) * (a - b) |
| 628 | Okay: foo(bar, key='word', *args, **kwargs) |
| 629 | Okay: alpha[:-i] |
| 630 | |
| 631 | E225: i=i+1 |
| 632 | E225: submitted +=1 |
| 633 | E225: x = x /2 - 1 |
| 634 | E225: z = x **y |
| 635 | E226: c = (a+b) * (a-b) |
| 636 | E226: hypot2 = x*x + y*y |
| 637 | E227: c = a|b |
| 638 | E228: msg = fmt%(errno, errmsg) |
| 639 | """ |
| 640 | parens = 0 |
| 641 | need_space = False |
| 642 | prev_type = tokenize.OP |
| 643 | prev_text = prev_end = None |
| 644 | for token_type, text, start, end, line in tokens: |
| 645 | if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): |
| 646 | # ERRORTOKEN is triggered by backticks in Python 3 |
| 647 | continue |
| 648 | if text in ('(', 'lambda'): |
| 649 | parens += 1 |
| 650 | elif text == ')': |
| 651 | parens -= 1 |
| 652 | if need_space: |
| 653 | if start != prev_end: |
| 654 | # Found a (probably) needed space |
| 655 | if need_space is not True and not need_space[1]: |
| 656 | yield (need_space[0], |
| 657 | "E225 missing whitespace around operator") |
| 658 | need_space = False |
| 659 | elif text == '>' and prev_text in ('<', '-'): |
| 660 | # Tolerate the "<>" operator, even if running Python 3 |
| 661 | # Deal with Python 3's annotated return value "->" |
| 662 | pass |
| 663 | else: |
| 664 | if need_space is True or need_space[1]: |
| 665 | # A needed trailing space was not found |
| 666 | yield prev_end, "E225 missing whitespace around operator" |
| 667 | else: |
| 668 | code, optype = 'E226', 'arithmetic' |
| 669 | if prev_text == '%': |
| 670 | code, optype = 'E228', 'modulo' |
| 671 | elif prev_text not in ARITHMETIC_OP: |
| 672 | code, optype = 'E227', 'bitwise or shift' |
| 673 | yield (need_space[0], "%s missing whitespace " |
| 674 | "around %s operator" % (code, optype)) |
| 675 | need_space = False |
| 676 | elif token_type == tokenize.OP and prev_end is not None: |
| 677 | if text == '=' and parens: |
| 678 | # Allow keyword args or defaults: foo(bar=None). |
| 679 | pass |
| 680 | elif text in WS_NEEDED_OPERATORS: |
| 681 | need_space = True |
| 682 | elif text in UNARY_OPERATORS: |
| 683 | # Check if the operator is being used as a binary operator |
| 684 | # Allow unary operators: -123, -x, +1. |
| 685 | # Allow argument unpacking: foo(*args, **kwargs). |
| 686 | if prev_type == tokenize.OP: |
| 687 | binary_usage = (prev_text in '}])') |
| 688 | elif prev_type == tokenize.NAME: |
| 689 | binary_usage = (prev_text not in KEYWORDS) |
| 690 | else: |
| 691 | binary_usage = (prev_type not in SKIP_TOKENS) |
| 692 | |
| 693 | if binary_usage: |
| 694 | if text in WS_OPTIONAL_OPERATORS: |
| 695 | need_space = None |
| 696 | else: |
| 697 | need_space = True |
| 698 | elif text in WS_OPTIONAL_OPERATORS: |
| 699 | need_space = None |
| 700 | |
| 701 | if need_space is None: |
| 702 | # Surrounding space is optional, but ensure that |
| 703 | # trailing space matches opening space |
| 704 | need_space = (prev_end, start != prev_end) |
| 705 | elif need_space and start == prev_end: |
| 706 | # A needed opening space was not found |
| 707 | yield prev_end, "E225 missing whitespace around operator" |
| 708 | need_space = False |
| 709 | prev_type = token_type |
| 710 | prev_text = text |
| 711 | prev_end = end |
| 712 | |
| 713 | |
| 714 | def whitespace_around_comma(logical_line): |
| 715 | r""" |
| 716 | Avoid extraneous whitespace in the following situations: |
| 717 | |
| 718 | - More than one space around an assignment (or other) operator to |
| 719 | align it with another. |
| 720 | |
| 721 | Note: these checks are disabled by default |
| 722 | |
| 723 | Okay: a = (1, 2) |
| 724 | E241: a = (1, 2) |
| 725 | E242: a = (1,\t2) |
| 726 | """ |
| 727 | line = logical_line |
| 728 | for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line): |
| 729 | found = m.start() + 1 |
| 730 | if '\t' in m.group(): |
| 731 | yield found, "E242 tab after '%s'" % m.group()[0] |
| 732 | else: |
| 733 | yield found, "E241 multiple spaces after '%s'" % m.group()[0] |
| 734 | |
| 735 | |
| 736 | def whitespace_around_named_parameter_equals(logical_line, tokens): |
| 737 | """ |
| 738 | Don't use spaces around the '=' sign when used to indicate a |
| 739 | keyword argument or a default parameter value. |
| 740 | |
| 741 | Okay: def complex(real, imag=0.0): |
| 742 | Okay: return magic(r=real, i=imag) |
| 743 | Okay: boolean(a == b) |
| 744 | Okay: boolean(a != b) |
| 745 | Okay: boolean(a <= b) |
| 746 | Okay: boolean(a >= b) |
| 747 | |
| 748 | E251: def complex(real, imag = 0.0): |
| 749 | E251: return magic(r = real, i = imag) |
| 750 | """ |
| 751 | parens = 0 |
| 752 | no_space = False |
| 753 | prev_end = None |
| 754 | message = "E251 unexpected spaces around keyword / parameter equals" |
| 755 | for token_type, text, start, end, line in tokens: |
| 756 | if no_space: |
| 757 | no_space = False |
| 758 | if start != prev_end: |
| 759 | yield (prev_end, message) |
| 760 | elif token_type == tokenize.OP: |
| 761 | if text == '(': |
| 762 | parens += 1 |
| 763 | elif text == ')': |
| 764 | parens -= 1 |
| 765 | elif parens and text == '=': |
| 766 | no_space = True |
| 767 | if start != prev_end: |
| 768 | yield (prev_end, message) |
| 769 | prev_end = end |
| 770 | |
| 771 | |
| 772 | def whitespace_before_inline_comment(logical_line, tokens): |
| 773 | """ |
| 774 | Separate inline comments by at least two spaces. |
| 775 | |
| 776 | An inline comment is a comment on the same line as a statement. Inline |
| 777 | comments should be separated by at least two spaces from the statement. |
| 778 | They should start with a # and a single space. |
| 779 | |
| 780 | Okay: x = x + 1 # Increment x |
| 781 | Okay: x = x + 1 # Increment x |
| 782 | E261: x = x + 1 # Increment x |
| 783 | E262: x = x + 1 #Increment x |
| 784 | E262: x = x + 1 # Increment x |
| 785 | """ |
| 786 | prev_end = (0, 0) |
| 787 | for token_type, text, start, end, line in tokens: |
| 788 | if token_type == tokenize.COMMENT: |
| 789 | if not line[:start[1]].strip(): |
| 790 | continue |
| 791 | if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: |
| 792 | yield (prev_end, |
| 793 | "E261 at least two spaces before inline comment") |
| 794 | symbol, sp, comment = text.partition(' ') |
| 795 | if symbol not in ('#', '#:') or comment[:1].isspace(): |
| 796 | yield start, "E262 inline comment should start with '# '" |
| 797 | elif token_type != tokenize.NL: |
| 798 | prev_end = end |
| 799 | |
| 800 | |
| 801 | def imports_on_separate_lines(logical_line): |
| 802 | r""" |
| 803 | Imports should usually be on separate lines. |
| 804 | |
| 805 | Okay: import os\nimport sys |
| 806 | E401: import sys, os |
| 807 | |
| 808 | Okay: from subprocess import Popen, PIPE |
| 809 | Okay: from myclas import MyClass |
| 810 | Okay: from foo.bar.yourclass import YourClass |
| 811 | Okay: import myclass |
| 812 | Okay: import foo.bar.yourclass |
| 813 | """ |
| 814 | line = logical_line |
| 815 | if line.startswith('import '): |
| 816 | found = line.find(',') |
| 817 | if -1 < found and ';' not in line[:found]: |
| 818 | yield found, "E401 multiple imports on one line" |
| 819 | |
| 820 | |
| 821 | def compound_statements(logical_line): |
| 822 | r""" |
| 823 | Compound statements (multiple statements on the same line) are |
| 824 | generally discouraged. |
| 825 | |
| 826 | While sometimes it's okay to put an if/for/while with a small body |
| 827 | on the same line, never do this for multi-clause statements. Also |
| 828 | avoid folding such long lines! |
| 829 | |
| 830 | Okay: if foo == 'blah':\n do_blah_thing() |
| 831 | Okay: do_one() |
| 832 | Okay: do_two() |
| 833 | Okay: do_three() |
| 834 | |
| 835 | E701: if foo == 'blah': do_blah_thing() |
| 836 | E701: for x in lst: total += x |
| 837 | E701: while t < 10: t = delay() |
| 838 | E701: if foo == 'blah': do_blah_thing() |
| 839 | E701: else: do_non_blah_thing() |
| 840 | E701: try: something() |
| 841 | E701: finally: cleanup() |
| 842 | E701: if foo == 'blah': one(); two(); three() |
| 843 | |
| 844 | E702: do_one(); do_two(); do_three() |
| 845 | E703: do_four(); # useless semicolon |
| 846 | """ |
| 847 | line = logical_line |
| 848 | last_char = len(line) - 1 |
| 849 | found = line.find(':') |
| 850 | if -1 < found < last_char: |
| 851 | before = line[:found] |
| 852 | if (before.count('{') <= before.count('}') and # {'a': 1} (dict) |
| 853 | before.count('[') <= before.count(']') and # [1:2] (slice) |
| 854 | before.count('(') <= before.count(')') and # (Python 3 annotation) |
| 855 | not LAMBDA_REGEX.search(before)): # lambda x: x |
| 856 | yield found, "E701 multiple statements on one line (colon)" |
| 857 | found = line.find(';') |
| 858 | if -1 < found: |
| 859 | if found < last_char: |
| 860 | yield found, "E702 multiple statements on one line (semicolon)" |
| 861 | else: |
| 862 | yield found, "E703 statement ends with a semicolon" |
| 863 | |
| 864 | |
| 865 | def explicit_line_join(logical_line, tokens): |
| 866 | r""" |
| 867 | Avoid explicit line join between brackets. |
| 868 | |
| 869 | The preferred way of wrapping long lines is by using Python's implied line |
| 870 | continuation inside parentheses, brackets and braces. Long lines can be |
| 871 | broken over multiple lines by wrapping expressions in parentheses. These |
| 872 | should be used in preference to using a backslash for line continuation. |
| 873 | |
| 874 | E502: aaa = [123, \\n 123] |
| 875 | E502: aaa = ("bbb " \\n "ccc") |
| 876 | |
| 877 | Okay: aaa = [123,\n 123] |
| 878 | Okay: aaa = ("bbb "\n "ccc") |
| 879 | Okay: aaa = "bbb " \\n "ccc" |
| 880 | """ |
| 881 | prev_start = prev_end = parens = 0 |
| 882 | for token_type, text, start, end, line in tokens: |
| 883 | if start[0] != prev_start and parens and backslash: |
| 884 | yield backslash, "E502 the backslash is redundant between brackets" |
| 885 | if end[0] != prev_end: |
| 886 | if line.rstrip('\r\n').endswith('\\'): |
| 887 | backslash = (end[0], len(line.splitlines()[-1]) - 1) |
| 888 | else: |
| 889 | backslash = None |
| 890 | prev_start = prev_end = end[0] |
| 891 | else: |
| 892 | prev_start = start[0] |
| 893 | if token_type == tokenize.OP: |
| 894 | if text in '([{': |
| 895 | parens += 1 |
| 896 | elif text in ')]}': |
| 897 | parens -= 1 |
| 898 | |
| 899 | |
| 900 | def comparison_to_singleton(logical_line): |
| 901 | """ |
| 902 | Comparisons to singletons like None should always be done |
| 903 | with "is" or "is not", never the equality operators. |
| 904 | |
| 905 | Okay: if arg is not None: |
| 906 | E711: if arg != None: |
| 907 | E712: if arg == True: |
| 908 | |
| 909 | Also, beware of writing if x when you really mean if x is not None -- |
| 910 | e.g. when testing whether a variable or argument that defaults to None was |
| 911 | set to some other value. The other value might have a type (such as a |
| 912 | container) that could be false in a boolean context! |
| 913 | """ |
| 914 | match = COMPARE_SINGLETON_REGEX.search(logical_line) |
| 915 | if match: |
| 916 | same = (match.group(1) == '==') |
| 917 | singleton = match.group(2) |
| 918 | msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton) |
| 919 | if singleton in ('None',): |
| 920 | code = 'E711' |
| 921 | else: |
| 922 | code = 'E712' |
| 923 | nonzero = ((singleton == 'True' and same) or |
| 924 | (singleton == 'False' and not same)) |
| 925 | msg += " or 'if %scond:'" % ('' if nonzero else 'not ') |
| 926 | yield match.start(1), ("%s comparison to %s should be %s" % |
| 927 | (code, singleton, msg)) |
| 928 | |
| 929 | |
| 930 | def comparison_type(logical_line): |
| 931 | """ |
| 932 | Object type comparisons should always use isinstance() instead of |
| 933 | comparing types directly. |
| 934 | |
| 935 | Okay: if isinstance(obj, int): |
| 936 | E721: if type(obj) is type(1): |
| 937 | |
| 938 | When checking if an object is a string, keep in mind that it might be a |
| 939 | unicode string too! In Python 2.3, str and unicode have a common base |
| 940 | class, basestring, so you can do: |
| 941 | |
| 942 | Okay: if isinstance(obj, basestring): |
| 943 | Okay: if type(a1) is type(b1): |
| 944 | """ |
| 945 | match = COMPARE_TYPE_REGEX.search(logical_line) |
| 946 | if match: |
| 947 | inst = match.group(1) |
| 948 | if inst and isidentifier(inst) and inst not in SINGLETONS: |
| 949 | return # Allow comparison for types which are not obvious |
| 950 | yield match.start(0), "E721 do not compare types, use 'isinstance()'" |
| 951 | |
| 952 | |
| 953 | def python_3000_has_key(logical_line): |
| 954 | r""" |
| 955 | The {}.has_key() method is removed in the Python 3. |
| 956 | Use the 'in' operation instead. |
| 957 | |
| 958 | Okay: if "alph" in d:\n print d["alph"] |
| 959 | W601: assert d.has_key('alph') |
| 960 | """ |
| 961 | pos = logical_line.find('.has_key(') |
| 962 | if pos > -1: |
| 963 | yield pos, "W601 .has_key() is deprecated, use 'in'" |
| 964 | |
| 965 | |
| 966 | def python_3000_raise_comma(logical_line): |
| 967 | """ |
| 968 | When raising an exception, use "raise ValueError('message')" |
| 969 | instead of the older form "raise ValueError, 'message'". |
| 970 | |
| 971 | The paren-using form is preferred because when the exception arguments |
| 972 | are long or include string formatting, you don't need to use line |
| 973 | continuation characters thanks to the containing parentheses. The older |
| 974 | form is removed in Python 3. |
| 975 | |
| 976 | Okay: raise DummyError("Message") |
| 977 | W602: raise DummyError, "Message" |
| 978 | """ |
| 979 | match = RAISE_COMMA_REGEX.match(logical_line) |
| 980 | if match and not RERAISE_COMMA_REGEX.match(logical_line): |
| 981 | yield match.start(1), "W602 deprecated form of raising exception" |
| 982 | |
| 983 | |
| 984 | def python_3000_not_equal(logical_line): |
| 985 | """ |
| 986 | != can also be written <>, but this is an obsolete usage kept for |
| 987 | backwards compatibility only. New code should always use !=. |
| 988 | The older syntax is removed in Python 3. |
| 989 | |
| 990 | Okay: if a != 'no': |
| 991 | W603: if a <> 'no': |
| 992 | """ |
| 993 | pos = logical_line.find('<>') |
| 994 | if pos > -1: |
| 995 | yield pos, "W603 '<>' is deprecated, use '!='" |
| 996 | |
| 997 | |
| 998 | def python_3000_backticks(logical_line): |
| 999 | """ |
| 1000 | Backticks are removed in Python 3. |
| 1001 | Use repr() instead. |
| 1002 | |
| 1003 | Okay: val = repr(1 + 2) |
| 1004 | W604: val = `1 + 2` |
| 1005 | """ |
| 1006 | pos = logical_line.find('`') |
| 1007 | if pos > -1: |
| 1008 | yield pos, "W604 backticks are deprecated, use 'repr()'" |
| 1009 | |
| 1010 | |
| 1011 | ############################################################################## |
| 1012 | # Helper functions |
| 1013 | ############################################################################## |
| 1014 | |
| 1015 | |
| 1016 | if '' == ''.encode(): |
| 1017 | # Python 2: implicit encoding. |
| 1018 | def readlines(filename): |
| 1019 | f = open(filename) |
| 1020 | try: |
| 1021 | return f.readlines() |
| 1022 | finally: |
| 1023 | f.close() |
| 1024 | |
| 1025 | isidentifier = re.compile(r'[a-zA-Z_]\w*').match |
| 1026 | stdin_get_value = sys.stdin.read |
| 1027 | else: |
| 1028 | # Python 3 |
| 1029 | def readlines(filename): |
| 1030 | f = open(filename, 'rb') |
| 1031 | try: |
| 1032 | coding, lines = tokenize.detect_encoding(f.readline) |
| 1033 | f = TextIOWrapper(f, coding, line_buffering=True) |
| 1034 | return [l.decode(coding) for l in lines] + f.readlines() |
| 1035 | except (LookupError, SyntaxError, UnicodeError): |
| 1036 | f.close() |
| 1037 | # Fall back if files are improperly declared |
| 1038 | f = open(filename, encoding='latin-1') |
| 1039 | return f.readlines() |
| 1040 | finally: |
| 1041 | f.close() |
| 1042 | |
| 1043 | isidentifier = str.isidentifier |
| 1044 | |
| 1045 | def stdin_get_value(): |
| 1046 | return TextIOWrapper(sys.stdin.buffer, errors='ignore').read() |
| 1047 | readlines.__doc__ = " Read the source code." |
| 1048 | noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search |
| 1049 | |
| 1050 | |
| 1051 | def expand_indent(line): |
| 1052 | r""" |
| 1053 | Return the amount of indentation. |
| 1054 | Tabs are expanded to the next multiple of 8. |
| 1055 | |
| 1056 | >>> expand_indent(' ') |
| 1057 | 4 |
| 1058 | >>> expand_indent('\t') |
| 1059 | 8 |
| 1060 | >>> expand_indent(' \t') |
| 1061 | 8 |
| 1062 | >>> expand_indent(' \t') |
| 1063 | 8 |
| 1064 | >>> expand_indent(' \t') |
| 1065 | 16 |
| 1066 | """ |
| 1067 | if '\t' not in line: |
| 1068 | return len(line) - len(line.lstrip()) |
| 1069 | result = 0 |
| 1070 | for char in line: |
| 1071 | if char == '\t': |
| 1072 | result = result // 8 * 8 + 8 |
| 1073 | elif char == ' ': |
| 1074 | result += 1 |
| 1075 | else: |
| 1076 | break |
| 1077 | return result |
| 1078 | |
| 1079 | |
| 1080 | def mute_string(text): |
| 1081 | """ |
| 1082 | Replace contents with 'xxx' to prevent syntax matching. |
| 1083 | |
| 1084 | >>> mute_string('"abc"') |
| 1085 | '"xxx"' |
| 1086 | >>> mute_string("'''abc'''") |
| 1087 | "'''xxx'''" |
| 1088 | >>> mute_string("r'abc'") |
| 1089 | "r'xxx'" |
| 1090 | """ |
| 1091 | # String modifiers (e.g. u or r) |
| 1092 | start = text.index(text[-1]) + 1 |
| 1093 | end = len(text) - 1 |
| 1094 | # Triple quotes |
| 1095 | if text[-3:] in ('"""', "'''"): |
| 1096 | start += 2 |
| 1097 | end -= 2 |
| 1098 | return text[:start] + 'x' * (end - start) + text[end:] |
| 1099 | |
| 1100 | |
| 1101 | def parse_udiff(diff, patterns=None, parent='.'): |
| 1102 | """Return a dictionary of matching lines.""" |
| 1103 | # For each file of the diff, the entry key is the filename, |
| 1104 | # and the value is a set of row numbers to consider. |
| 1105 | rv = {} |
| 1106 | path = nrows = None |
| 1107 | for line in diff.splitlines(): |
| 1108 | if nrows: |
| 1109 | if line[:1] != '-': |
| 1110 | nrows -= 1 |
| 1111 | continue |
| 1112 | if line[:3] == '@@ ': |
| 1113 | hunk_match = HUNK_REGEX.match(line) |
| 1114 | row, nrows = [int(g or '1') for g in hunk_match.groups()] |
| 1115 | rv[path].update(range(row, row + nrows)) |
| 1116 | elif line[:3] == '+++': |
| 1117 | path = line[4:].split('\t', 1)[0] |
| 1118 | if path[:2] == 'b/': |
| 1119 | path = path[2:] |
| 1120 | rv[path] = set() |
| 1121 | return dict([(os.path.join(parent, path), rows) |
| 1122 | for (path, rows) in rv.items() |
| 1123 | if rows and filename_match(path, patterns)]) |
| 1124 | |
| 1125 | |
| 1126 | def filename_match(filename, patterns, default=True): |
| 1127 | """ |
| 1128 | Check if patterns contains a pattern that matches filename. |
| 1129 | If patterns is unspecified, this always returns True. |
| 1130 | """ |
| 1131 | if not patterns: |
| 1132 | return default |
| 1133 | return any(fnmatch(filename, pattern) for pattern in patterns) |
| 1134 | |
| 1135 | |
| 1136 | ############################################################################## |
| 1137 | # Framework to run all checks |
| 1138 | ############################################################################## |
| 1139 | |
| 1140 | |
| 1141 | _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} |
| 1142 | |
| 1143 | |
| 1144 | def register_check(check, codes=None): |
| 1145 | """ |
| 1146 | Register a new check object. |
| 1147 | """ |
| 1148 | def _add_check(check, kind, codes, args): |
| 1149 | if check in _checks[kind]: |
| 1150 | _checks[kind][check][0].extend(codes or []) |
| 1151 | else: |
| 1152 | _checks[kind][check] = (codes or [''], args) |
| 1153 | if inspect.isfunction(check): |
| 1154 | args = inspect.getargspec(check)[0] |
| 1155 | if args and args[0] in ('physical_line', 'logical_line'): |
| 1156 | if codes is None: |
| 1157 | codes = ERRORCODE_REGEX.findall(check.__doc__ or '') |
| 1158 | _add_check(check, args[0], codes, args) |
| 1159 | elif inspect.isclass(check): |
| 1160 | if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']: |
| 1161 | _add_check(check, 'tree', codes, None) |
| 1162 | |
| 1163 | |
| 1164 | def init_checks_registry(): |
| 1165 | """ |
| 1166 | Register all globally visible functions where the first argument name |
| 1167 | is 'physical_line' or 'logical_line'. |
| 1168 | """ |
| 1169 | mod = inspect.getmodule(register_check) |
| 1170 | for (name, function) in inspect.getmembers(mod, inspect.isfunction): |
| 1171 | register_check(function) |
| 1172 | init_checks_registry() |
| 1173 | |
| 1174 | |
| 1175 | class Checker(object): |
| 1176 | """ |
| 1177 | Load a Python source file, tokenize it, check coding style. |
| 1178 | """ |
| 1179 | |
| 1180 | def __init__(self, filename=None, lines=None, |
| 1181 | options=None, report=None, **kwargs): |
| 1182 | if options is None: |
| 1183 | options = StyleGuide(kwargs).options |
| 1184 | else: |
| 1185 | assert not kwargs |
| 1186 | self._io_error = None |
| 1187 | self._physical_checks = options.physical_checks |
| 1188 | self._logical_checks = options.logical_checks |
| 1189 | self._ast_checks = options.ast_checks |
| 1190 | self.max_line_length = options.max_line_length |
| 1191 | self.verbose = options.verbose |
| 1192 | self.filename = filename |
| 1193 | if filename is None: |
| 1194 | self.filename = 'stdin' |
| 1195 | self.lines = lines or [] |
| 1196 | elif filename == '-': |
| 1197 | self.filename = 'stdin' |
| 1198 | self.lines = stdin_get_value().splitlines(True) |
| 1199 | elif lines is None: |
| 1200 | try: |
| 1201 | self.lines = readlines(filename) |
| 1202 | except IOError: |
| 1203 | exc_type, exc = sys.exc_info()[:2] |
| 1204 | self._io_error = '%s: %s' % (exc_type.__name__, exc) |
| 1205 | self.lines = [] |
| 1206 | else: |
| 1207 | self.lines = lines |
| 1208 | self.report = report or options.report |
| 1209 | self.report_error = self.report.error |
| 1210 | |
| 1211 | def report_invalid_syntax(self): |
| 1212 | exc_type, exc = sys.exc_info()[:2] |
| 1213 | offset = exc.args[1] |
| 1214 | if len(offset) > 2: |
| 1215 | offset = offset[1:3] |
| 1216 | self.report_error(offset[0], offset[1] or 0, |
| 1217 | 'E901 %s: %s' % (exc_type.__name__, exc.args[0]), |
| 1218 | self.report_invalid_syntax) |
| 1219 | report_invalid_syntax.__doc__ = " Check if the syntax is valid." |
| 1220 | |
| 1221 | def readline(self): |
| 1222 | """ |
| 1223 | Get the next line from the input buffer. |
| 1224 | """ |
| 1225 | self.line_number += 1 |
| 1226 | if self.line_number > len(self.lines): |
| 1227 | return '' |
| 1228 | return self.lines[self.line_number - 1] |
| 1229 | |
| 1230 | def readline_check_physical(self): |
| 1231 | """ |
| 1232 | Check and return the next physical line. This method can be |
| 1233 | used to feed tokenize.generate_tokens. |
| 1234 | """ |
| 1235 | line = self.readline() |
| 1236 | if line: |
| 1237 | self.check_physical(line) |
| 1238 | return line |
| 1239 | |
| 1240 | def run_check(self, check, argument_names): |
| 1241 | """ |
| 1242 | Run a check plugin. |
| 1243 | """ |
| 1244 | arguments = [] |
| 1245 | for name in argument_names: |
| 1246 | arguments.append(getattr(self, name)) |
| 1247 | return check(*arguments) |
| 1248 | |
| 1249 | def check_physical(self, line): |
| 1250 | """ |
| 1251 | Run all physical checks on a raw input line. |
| 1252 | """ |
| 1253 | self.physical_line = line |
| 1254 | if self.indent_char is None and line[:1] in WHITESPACE: |
| 1255 | self.indent_char = line[0] |
| 1256 | for name, check, argument_names in self._physical_checks: |
| 1257 | result = self.run_check(check, argument_names) |
| 1258 | if result is not None: |
| 1259 | offset, text = result |
| 1260 | self.report_error(self.line_number, offset, text, check) |
| 1261 | |
| 1262 | def build_tokens_line(self): |
| 1263 | """ |
| 1264 | Build a logical line from tokens. |
| 1265 | """ |
| 1266 | self.mapping = [] |
| 1267 | logical = [] |
| 1268 | length = 0 |
| 1269 | previous = None |
| 1270 | for token in self.tokens: |
| 1271 | token_type, text = token[0:2] |
| 1272 | if token_type in SKIP_TOKENS: |
| 1273 | continue |
| 1274 | if token_type == tokenize.STRING: |
| 1275 | text = mute_string(text) |
| 1276 | if previous: |
| 1277 | end_row, end = previous[3] |
| 1278 | start_row, start = token[2] |
| 1279 | if end_row != start_row: # different row |
| 1280 | prev_text = self.lines[end_row - 1][end - 1] |
| 1281 | if prev_text == ',' or (prev_text not in '{[(' |
| 1282 | and text not in '}])'): |
| 1283 | logical.append(' ') |
| 1284 | length += 1 |
| 1285 | elif end != start: # different column |
| 1286 | fill = self.lines[end_row - 1][end:start] |
| 1287 | logical.append(fill) |
| 1288 | length += len(fill) |
| 1289 | self.mapping.append((length, token)) |
| 1290 | logical.append(text) |
| 1291 | length += len(text) |
| 1292 | previous = token |
| 1293 | self.logical_line = ''.join(logical) |
| 1294 | # With Python 2, if the line ends with '\r\r\n' the assertion fails |
| 1295 | # assert self.logical_line.strip() == self.logical_line |
| 1296 | |
| 1297 | def check_logical(self): |
| 1298 | """ |
| 1299 | Build a line from tokens and run all logical checks on it. |
| 1300 | """ |
| 1301 | self.build_tokens_line() |
| 1302 | self.report.increment_logical_line() |
| 1303 | first_line = self.lines[self.mapping[0][1][2][0] - 1] |
| 1304 | indent = first_line[:self.mapping[0][1][2][1]] |
| 1305 | self.previous_indent_level = self.indent_level |
| 1306 | self.indent_level = expand_indent(indent) |
| 1307 | if self.verbose >= 2: |
| 1308 | print(self.logical_line[:80].rstrip()) |
| 1309 | for name, check, argument_names in self._logical_checks: |
| 1310 | if self.verbose >= 4: |
| 1311 | print(' ' + name) |
| 1312 | for result in self.run_check(check, argument_names): |
| 1313 | offset, text = result |
| 1314 | if isinstance(offset, tuple): |
| 1315 | orig_number, orig_offset = offset |
| 1316 | else: |
| 1317 | for token_offset, token in self.mapping: |
| 1318 | if offset >= token_offset: |
| 1319 | orig_number = token[2][0] |
| 1320 | orig_offset = (token[2][1] + offset - token_offset) |
| 1321 | self.report_error(orig_number, orig_offset, text, check) |
| 1322 | self.previous_logical = self.logical_line |
| 1323 | |
| 1324 | def check_ast(self): |
| 1325 | try: |
| 1326 | tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST) |
| 1327 | except SyntaxError: |
| 1328 | return self.report_invalid_syntax() |
| 1329 | for name, cls, _ in self._ast_checks: |
| 1330 | checker = cls(tree, self.filename) |
| 1331 | for lineno, offset, text, check in checker.run(): |
| 1332 | if not noqa(self.lines[lineno - 1]): |
| 1333 | self.report_error(lineno, offset, text, check) |
| 1334 | |
| 1335 | def generate_tokens(self): |
| 1336 | if self._io_error: |
| 1337 | self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) |
| 1338 | tokengen = tokenize.generate_tokens(self.readline_check_physical) |
| 1339 | try: |
| 1340 | for token in tokengen: |
| 1341 | yield token |
| 1342 | except (SyntaxError, tokenize.TokenError): |
| 1343 | self.report_invalid_syntax() |
| 1344 | |
| 1345 | def check_all(self, expected=None, line_offset=0): |
| 1346 | """ |
| 1347 | Run all checks on the input file. |
| 1348 | """ |
| 1349 | self.report.init_file(self.filename, self.lines, expected, line_offset) |
| 1350 | if self._ast_checks: |
| 1351 | self.check_ast() |
| 1352 | self.line_number = 0 |
| 1353 | self.indent_char = None |
| 1354 | self.indent_level = 0 |
| 1355 | self.previous_logical = '' |
| 1356 | self.tokens = [] |
| 1357 | self.blank_lines = blank_lines_before_comment = 0 |
| 1358 | parens = 0 |
| 1359 | for token in self.generate_tokens(): |
| 1360 | self.tokens.append(token) |
| 1361 | token_type, text = token[0:2] |
| 1362 | if self.verbose >= 3: |
| 1363 | if token[2][0] == token[3][0]: |
| 1364 | pos = '[%s:%s]' % (token[2][1] or '', token[3][1]) |
| 1365 | else: |
| 1366 | pos = 'l.%s' % token[3][0] |
| 1367 | print('l.%s\t%s\t%s\t%r' % |
| 1368 | (token[2][0], pos, tokenize.tok_name[token[0]], text)) |
| 1369 | if token_type == tokenize.OP: |
| 1370 | if text in '([{': |
| 1371 | parens += 1 |
| 1372 | elif text in '}])': |
| 1373 | parens -= 1 |
| 1374 | elif not parens: |
| 1375 | if token_type == tokenize.NEWLINE: |
| 1376 | if self.blank_lines < blank_lines_before_comment: |
| 1377 | self.blank_lines = blank_lines_before_comment |
| 1378 | self.check_logical() |
| 1379 | self.tokens = [] |
| 1380 | self.blank_lines = blank_lines_before_comment = 0 |
| 1381 | elif token_type == tokenize.NL: |
| 1382 | if len(self.tokens) == 1: |
| 1383 | # The physical line contains only this token. |
| 1384 | self.blank_lines += 1 |
| 1385 | self.tokens = [] |
| 1386 | elif token_type == tokenize.COMMENT and len(self.tokens) == 1: |
| 1387 | if blank_lines_before_comment < self.blank_lines: |
| 1388 | blank_lines_before_comment = self.blank_lines |
| 1389 | self.blank_lines = 0 |
| 1390 | if COMMENT_WITH_NL: |
| 1391 | # The comment also ends a physical line |
| 1392 | self.tokens = [] |
| 1393 | return self.report.get_file_results() |
| 1394 | |
| 1395 | |
| 1396 | class BaseReport(object): |
| 1397 | """Collect the results of the checks.""" |
| 1398 | print_filename = False |
| 1399 | |
| 1400 | def __init__(self, options): |
| 1401 | self._benchmark_keys = options.benchmark_keys |
| 1402 | self._ignore_code = options.ignore_code |
| 1403 | # Results |
| 1404 | self.elapsed = 0 |
| 1405 | self.total_errors = 0 |
| 1406 | self.counters = dict.fromkeys(self._benchmark_keys, 0) |
| 1407 | self.messages = {} |
| 1408 | |
| 1409 | def start(self): |
| 1410 | """Start the timer.""" |
| 1411 | self._start_time = time.time() |
| 1412 | |
| 1413 | def stop(self): |
| 1414 | """Stop the timer.""" |
| 1415 | self.elapsed = time.time() - self._start_time |
| 1416 | |
| 1417 | def init_file(self, filename, lines, expected, line_offset): |
| 1418 | """Signal a new file.""" |
| 1419 | self.filename = filename |
| 1420 | self.lines = lines |
| 1421 | self.expected = expected or () |
| 1422 | self.line_offset = line_offset |
| 1423 | self.file_errors = 0 |
| 1424 | self.counters['files'] += 1 |
| 1425 | self.counters['physical lines'] += len(lines) |
| 1426 | |
| 1427 | def increment_logical_line(self): |
| 1428 | """Signal a new logical line.""" |
| 1429 | self.counters['logical lines'] += 1 |
| 1430 | |
| 1431 | def error(self, line_number, offset, text, check): |
| 1432 | """Report an error, according to options.""" |
| 1433 | code = text[:4] |
| 1434 | if self._ignore_code(code): |
| 1435 | return |
| 1436 | if code in self.counters: |
| 1437 | self.counters[code] += 1 |
| 1438 | else: |
| 1439 | self.counters[code] = 1 |
| 1440 | self.messages[code] = text[5:] |
| 1441 | # Don't care about expected errors or warnings |
| 1442 | if code in self.expected: |
| 1443 | return |
| 1444 | if self.print_filename and not self.file_errors: |
| 1445 | print(self.filename) |
| 1446 | self.file_errors += 1 |
| 1447 | self.total_errors += 1 |
| 1448 | return code |
| 1449 | |
| 1450 | def get_file_results(self): |
| 1451 | """Return the count of errors and warnings for this file.""" |
| 1452 | return self.file_errors |
| 1453 | |
| 1454 | def get_count(self, prefix=''): |
| 1455 | """Return the total count of errors and warnings.""" |
| 1456 | return sum([self.counters[key] |
| 1457 | for key in self.messages if key.startswith(prefix)]) |
| 1458 | |
| 1459 | def get_statistics(self, prefix=''): |
| 1460 | """ |
| 1461 | Get statistics for message codes that start with the prefix. |
| 1462 | |
| 1463 | prefix='' matches all errors and warnings |
| 1464 | prefix='E' matches all errors |
| 1465 | prefix='W' matches all warnings |
| 1466 | prefix='E4' matches all errors that have to do with imports |
| 1467 | """ |
| 1468 | return ['%-7s %s %s' % (self.counters[key], key, self.messages[key]) |
| 1469 | for key in sorted(self.messages) if key.startswith(prefix)] |
| 1470 | |
| 1471 | def print_statistics(self, prefix=''): |
| 1472 | """Print overall statistics (number of errors and warnings).""" |
| 1473 | for line in self.get_statistics(prefix): |
| 1474 | print(line) |
| 1475 | |
| 1476 | def print_benchmark(self): |
| 1477 | """Print benchmark numbers.""" |
| 1478 | print('%-7.2f %s' % (self.elapsed, 'seconds elapsed')) |
| 1479 | if self.elapsed: |
| 1480 | for key in self._benchmark_keys: |
| 1481 | print('%-7d %s per second (%d total)' % |
| 1482 | (self.counters[key] / self.elapsed, key, |
| 1483 | self.counters[key])) |
| 1484 | |
| 1485 | |
| 1486 | class FileReport(BaseReport): |
| 1487 | """Collect the results of the checks and print only the filenames.""" |
| 1488 | print_filename = True |
| 1489 | |
| 1490 | |
| 1491 | class StandardReport(BaseReport): |
| 1492 | """Collect and print the results of the checks.""" |
| 1493 | |
| 1494 | def __init__(self, options): |
| 1495 | super(StandardReport, self).__init__(options) |
| 1496 | self._fmt = REPORT_FORMAT.get(options.format.lower(), |
| 1497 | options.format) |
| 1498 | self._repeat = options.repeat |
| 1499 | self._show_source = options.show_source |
| 1500 | self._show_pep8 = options.show_pep8 |
| 1501 | |
| 1502 | def init_file(self, filename, lines, expected, line_offset): |
| 1503 | """Signal a new file.""" |
| 1504 | self._deferred_print = [] |
| 1505 | return super(StandardReport, self).init_file( |
| 1506 | filename, lines, expected, line_offset) |
| 1507 | |
| 1508 | def error(self, line_number, offset, text, check): |
| 1509 | """Report an error, according to options.""" |
| 1510 | code = super(StandardReport, self).error(line_number, offset, |
| 1511 | text, check) |
| 1512 | if code and (self.counters[code] == 1 or self._repeat): |
| 1513 | self._deferred_print.append( |
| 1514 | (line_number, offset, code, text[5:], check.__doc__)) |
| 1515 | return code |
| 1516 | |
| 1517 | def get_file_results(self): |
| 1518 | """Print the result and return the overall count for this file.""" |
| 1519 | self._deferred_print.sort() |
| 1520 | for line_number, offset, code, text, doc in self._deferred_print: |
| 1521 | print(self._fmt % { |
| 1522 | 'path': self.filename, |
| 1523 | 'row': self.line_offset + line_number, 'col': offset + 1, |
| 1524 | 'code': code, 'text': text, |
| 1525 | }) |
| 1526 | if self._show_source: |
| 1527 | if line_number > len(self.lines): |
| 1528 | line = '' |
| 1529 | else: |
| 1530 | line = self.lines[line_number - 1] |
| 1531 | print(line.rstrip()) |
| 1532 | print(' ' * offset + '^') |
| 1533 | if self._show_pep8 and doc: |
| 1534 | print(doc.lstrip('\n').rstrip()) |
| 1535 | return self.file_errors |
| 1536 | |
| 1537 | |
| 1538 | class DiffReport(StandardReport): |
| 1539 | """Collect and print the results for the changed lines only.""" |
| 1540 | |
| 1541 | def __init__(self, options): |
| 1542 | super(DiffReport, self).__init__(options) |
| 1543 | self._selected = options.selected_lines |
| 1544 | |
| 1545 | def error(self, line_number, offset, text, check): |
| 1546 | if line_number not in self._selected[self.filename]: |
| 1547 | return |
| 1548 | return super(DiffReport, self).error(line_number, offset, text, check) |
| 1549 | |
| 1550 | |
| 1551 | class StyleGuide(object): |
| 1552 | """Initialize a PEP-8 instance with few options.""" |
| 1553 | |
| 1554 | def __init__(self, *args, **kwargs): |
| 1555 | # build options from the command line |
| 1556 | self.checker_class = kwargs.pop('checker_class', Checker) |
| 1557 | parse_argv = kwargs.pop('parse_argv', False) |
| 1558 | config_file = kwargs.pop('config_file', None) |
| 1559 | parser = kwargs.pop('parser', None) |
| 1560 | options, self.paths = process_options( |
| 1561 | parse_argv=parse_argv, config_file=config_file, parser=parser) |
| 1562 | if args or kwargs: |
| 1563 | # build options from dict |
| 1564 | options_dict = dict(*args, **kwargs) |
| 1565 | options.__dict__.update(options_dict) |
| 1566 | if 'paths' in options_dict: |
| 1567 | self.paths = options_dict['paths'] |
| 1568 | |
| 1569 | self.runner = self.input_file |
| 1570 | self.options = options |
| 1571 | |
| 1572 | if not options.reporter: |
| 1573 | options.reporter = BaseReport if options.quiet else StandardReport |
| 1574 | |
| 1575 | for index, value in enumerate(options.exclude): |
| 1576 | options.exclude[index] = value.rstrip('/') |
| 1577 | # Ignore all checks which are not explicitly selected |
| 1578 | options.select = tuple(options.select or ()) |
| 1579 | options.ignore = tuple(options.ignore or options.select and ('',)) |
| 1580 | options.benchmark_keys = BENCHMARK_KEYS[:] |
| 1581 | options.ignore_code = self.ignore_code |
| 1582 | options.physical_checks = self.get_checks('physical_line') |
| 1583 | options.logical_checks = self.get_checks('logical_line') |
| 1584 | options.ast_checks = self.get_checks('tree') |
| 1585 | self.init_report() |
| 1586 | |
| 1587 | def init_report(self, reporter=None): |
| 1588 | """Initialize the report instance.""" |
| 1589 | self.options.report = (reporter or self.options.reporter)(self.options) |
| 1590 | return self.options.report |
| 1591 | |
| 1592 | def check_files(self, paths=None): |
| 1593 | """Run all checks on the paths.""" |
| 1594 | if paths is None: |
| 1595 | paths = self.paths |
| 1596 | report = self.options.report |
| 1597 | runner = self.runner |
| 1598 | report.start() |
| 1599 | try: |
| 1600 | for path in paths: |
| 1601 | if os.path.isdir(path): |
| 1602 | self.input_dir(path) |
| 1603 | elif not self.excluded(path): |
| 1604 | runner(path) |
| 1605 | except KeyboardInterrupt: |
| 1606 | print('... stopped') |
| 1607 | report.stop() |
| 1608 | return report |
| 1609 | |
| 1610 | def input_file(self, filename, lines=None, expected=None, line_offset=0): |
| 1611 | """Run all checks on a Python source file.""" |
| 1612 | if self.options.verbose: |
| 1613 | print('checking %s' % filename) |
| 1614 | fchecker = self.checker_class( |
| 1615 | filename, lines=lines, options=self.options) |
| 1616 | return fchecker.check_all(expected=expected, line_offset=line_offset) |
| 1617 | |
| 1618 | def input_dir(self, dirname): |
| 1619 | """Check all files in this directory and all subdirectories.""" |
| 1620 | dirname = dirname.rstrip('/') |
| 1621 | if self.excluded(dirname): |
| 1622 | return 0 |
| 1623 | counters = self.options.report.counters |
| 1624 | verbose = self.options.verbose |
| 1625 | filepatterns = self.options.filename |
| 1626 | runner = self.runner |
| 1627 | for root, dirs, files in os.walk(dirname): |
| 1628 | if verbose: |
| 1629 | print('directory ' + root) |
| 1630 | counters['directories'] += 1 |
| 1631 | for subdir in sorted(dirs): |
| 1632 | if self.excluded(os.path.join(root, subdir)): |
| 1633 | dirs.remove(subdir) |
| 1634 | for filename in sorted(files): |
| 1635 | # contain a pattern that matches? |
| 1636 | if ((filename_match(filename, filepatterns) and |
| 1637 | not self.excluded(filename))): |
| 1638 | runner(os.path.join(root, filename)) |
| 1639 | |
| 1640 | def excluded(self, filename): |
| 1641 | """ |
| 1642 | Check if options.exclude contains a pattern that matches filename. |
| 1643 | """ |
| 1644 | basename = os.path.basename(filename) |
| 1645 | return any((filename_match(filename, self.options.exclude, |
| 1646 | default=False), |
| 1647 | filename_match(basename, self.options.exclude, |
| 1648 | default=False))) |
| 1649 | |
| 1650 | def ignore_code(self, code): |
| 1651 | """ |
| 1652 | Check if the error code should be ignored. |
| 1653 | |
| 1654 | If 'options.select' contains a prefix of the error code, |
| 1655 | return False. Else, if 'options.ignore' contains a prefix of |
| 1656 | the error code, return True. |
| 1657 | """ |
| 1658 | return (code.startswith(self.options.ignore) and |
| 1659 | not code.startswith(self.options.select)) |
| 1660 | |
| 1661 | def get_checks(self, argument_name): |
| 1662 | """ |
| 1663 | Find all globally visible functions where the first argument name |
| 1664 | starts with argument_name and which contain selected tests. |
| 1665 | """ |
| 1666 | checks = [] |
| 1667 | for check, attrs in _checks[argument_name].items(): |
| 1668 | (codes, args) = attrs |
| 1669 | if any(not (code and self.ignore_code(code)) for code in codes): |
| 1670 | checks.append((check.__name__, check, args)) |
| 1671 | return sorted(checks) |
| 1672 | |
| 1673 | |
| 1674 | def get_parser(prog='pep8', version=__version__): |
| 1675 | parser = OptionParser(prog=prog, version=version, |
| 1676 | usage="%prog [options] input ...") |
| 1677 | parser.config_options = [ |
| 1678 | 'exclude', 'filename', 'select', 'ignore', 'max-line-length', 'count', |
| 1679 | 'format', 'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose'] |
| 1680 | parser.add_option('-v', '--verbose', default=0, action='count', |
| 1681 | help="print status messages, or debug with -vv") |
| 1682 | parser.add_option('-q', '--quiet', default=0, action='count', |
| 1683 | help="report only file names, or nothing with -qq") |
| 1684 | parser.add_option('-r', '--repeat', default=True, action='store_true', |
| 1685 | help="(obsolete) show all occurrences of the same error") |
| 1686 | parser.add_option('--first', action='store_false', dest='repeat', |
| 1687 | help="show first occurrence of each error") |
| 1688 | parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, |
| 1689 | help="exclude files or directories which match these " |
| 1690 | "comma separated patterns (default: %default)") |
| 1691 | parser.add_option('--filename', metavar='patterns', default='*.py', |
| 1692 | help="when parsing directories, only check filenames " |
| 1693 | "matching these comma separated patterns " |
| 1694 | "(default: %default)") |
| 1695 | parser.add_option('--select', metavar='errors', default='', |
| 1696 | help="select errors and warnings (e.g. E,W6)") |
| 1697 | parser.add_option('--ignore', metavar='errors', default='', |
| 1698 | help="skip errors and warnings (e.g. E4,W)") |
| 1699 | parser.add_option('--show-source', action='store_true', |
| 1700 | help="show source code for each error") |
| 1701 | parser.add_option('--show-pep8', action='store_true', |
| 1702 | help="show text of PEP 8 for each error " |
| 1703 | "(implies --first)") |
| 1704 | parser.add_option('--statistics', action='store_true', |
| 1705 | help="count errors and warnings") |
| 1706 | parser.add_option('--count', action='store_true', |
| 1707 | help="print total number of errors and warnings " |
| 1708 | "to standard error and set exit code to 1 if " |
| 1709 | "total is not null") |
| 1710 | parser.add_option('--max-line-length', type='int', metavar='n', |
| 1711 | default=MAX_LINE_LENGTH, |
| 1712 | help="set maximum allowed line length " |
| 1713 | "(default: %default)") |
| 1714 | parser.add_option('--format', metavar='format', default='default', |
| 1715 | help="set the error format [default|pylint|<custom>]") |
| 1716 | parser.add_option('--diff', action='store_true', |
| 1717 | help="report only lines changed according to the " |
| 1718 | "unified diff received on STDIN") |
| 1719 | group = parser.add_option_group("Testing Options") |
| 1720 | if os.path.exists(TESTSUITE_PATH): |
| 1721 | group.add_option('--testsuite', metavar='dir', |
| 1722 | help="run regression tests from dir") |
| 1723 | group.add_option('--doctest', action='store_true', |
| 1724 | help="run doctest on myself") |
| 1725 | group.add_option('--benchmark', action='store_true', |
| 1726 | help="measure processing speed") |
| 1727 | return parser |
| 1728 | |
| 1729 | |
| 1730 | def read_config(options, args, arglist, parser): |
| 1731 | """Read both user configuration and local configuration.""" |
| 1732 | config = RawConfigParser() |
| 1733 | |
| 1734 | user_conf = options.config |
| 1735 | if user_conf and os.path.isfile(user_conf): |
| 1736 | if options.verbose: |
| 1737 | print('user configuration: %s' % user_conf) |
| 1738 | config.read(user_conf) |
| 1739 | |
| 1740 | parent = tail = args and os.path.abspath(os.path.commonprefix(args)) |
| 1741 | while tail: |
| 1742 | for name in PROJECT_CONFIG: |
| 1743 | local_conf = os.path.join(parent, name) |
| 1744 | if os.path.isfile(local_conf): |
| 1745 | break |
| 1746 | else: |
| 1747 | parent, tail = os.path.split(parent) |
| 1748 | continue |
| 1749 | if options.verbose: |
| 1750 | print('local configuration: %s' % local_conf) |
| 1751 | config.read(local_conf) |
| 1752 | break |
| 1753 | |
| 1754 | pep8_section = parser.prog |
| 1755 | if config.has_section(pep8_section): |
| 1756 | option_list = dict([(o.dest, o.type or o.action) |
| 1757 | for o in parser.option_list]) |
| 1758 | |
| 1759 | # First, read the default values |
| 1760 | new_options, _ = parser.parse_args([]) |
| 1761 | |
| 1762 | # Second, parse the configuration |
| 1763 | for opt in config.options(pep8_section): |
| 1764 | if options.verbose > 1: |
| 1765 | print(" %s = %s" % (opt, config.get(pep8_section, opt))) |
| 1766 | if opt.replace('_', '-') not in parser.config_options: |
| 1767 | print("Unknown option: '%s'\n not in [%s]" % |
| 1768 | (opt, ' '.join(parser.config_options))) |
| 1769 | sys.exit(1) |
| 1770 | normalized_opt = opt.replace('-', '_') |
| 1771 | opt_type = option_list[normalized_opt] |
| 1772 | if opt_type in ('int', 'count'): |
| 1773 | value = config.getint(pep8_section, opt) |
| 1774 | elif opt_type == 'string': |
| 1775 | value = config.get(pep8_section, opt) |
| 1776 | else: |
| 1777 | assert opt_type in ('store_true', 'store_false') |
| 1778 | value = config.getboolean(pep8_section, opt) |
| 1779 | setattr(new_options, normalized_opt, value) |
| 1780 | |
| 1781 | # Third, overwrite with the command-line options |
| 1782 | options, _ = parser.parse_args(arglist, values=new_options) |
| 1783 | options.doctest = options.testsuite = False |
| 1784 | return options |
| 1785 | |
| 1786 | |
| 1787 | def process_options(arglist=None, parse_argv=False, config_file=None, |
| 1788 | parser=None): |
| 1789 | """Process options passed either via arglist or via command line args.""" |
| 1790 | if not arglist and not parse_argv: |
| 1791 | # Don't read the command line if the module is used as a library. |
| 1792 | arglist = [] |
| 1793 | if not parser: |
| 1794 | parser = get_parser() |
| 1795 | if not parser.has_option('--config'): |
| 1796 | if config_file is True: |
| 1797 | config_file = DEFAULT_CONFIG |
| 1798 | group = parser.add_option_group("Configuration", description=( |
| 1799 | "The project options are read from the [%s] section of the " |
| 1800 | "tox.ini file or the setup.cfg file located in any parent folder " |
| 1801 | "of the path(s) being processed. Allowed options are: %s." % |
| 1802 | (parser.prog, ', '.join(parser.config_options)))) |
| 1803 | group.add_option('--config', metavar='path', default=config_file, |
| 1804 | help="user config file location (default: %default)") |
| 1805 | options, args = parser.parse_args(arglist) |
| 1806 | options.reporter = None |
| 1807 | |
| 1808 | if options.ensure_value('testsuite', False): |
| 1809 | args.append(options.testsuite) |
| 1810 | elif not options.ensure_value('doctest', False): |
| 1811 | if parse_argv and not args: |
| 1812 | if options.diff or any(os.path.exists(name) |
| 1813 | for name in PROJECT_CONFIG): |
| 1814 | args = ['.'] |
| 1815 | else: |
| 1816 | parser.error('input not specified') |
| 1817 | options = read_config(options, args, arglist, parser) |
| 1818 | options.reporter = parse_argv and options.quiet == 1 and FileReport |
| 1819 | |
| 1820 | if options.filename: |
| 1821 | options.filename = options.filename.split(',') |
| 1822 | options.exclude = options.exclude.split(',') |
| 1823 | if options.select: |
| 1824 | options.select = options.select.split(',') |
| 1825 | if options.ignore: |
| 1826 | options.ignore = options.ignore.split(',') |
| 1827 | elif not (options.select or |
| 1828 | options.testsuite or options.doctest) and DEFAULT_IGNORE: |
| 1829 | # The default choice: ignore controversial checks |
| 1830 | # (for doctest and testsuite, all checks are required) |
| 1831 | options.ignore = DEFAULT_IGNORE.split(',') |
| 1832 | |
| 1833 | if options.diff: |
| 1834 | options.reporter = DiffReport |
| 1835 | stdin = stdin_get_value() |
| 1836 | options.selected_lines = parse_udiff(stdin, options.filename, args[0]) |
| 1837 | args = sorted(options.selected_lines) |
| 1838 | |
| 1839 | return options, args |
| 1840 | |
| 1841 | |
| 1842 | def _main(): |
| 1843 | """Parse options and run checks on Python source.""" |
| 1844 | pep8style = StyleGuide(parse_argv=True, config_file=True) |
| 1845 | options = pep8style.options |
| 1846 | if options.doctest or options.testsuite: |
| 1847 | sys.path[:0] = [TESTSUITE_PATH] |
| 1848 | from test_pep8 import run_tests |
| 1849 | del sys.path[0] |
| 1850 | report = run_tests(pep8style, options.doctest, options.testsuite) |
| 1851 | else: |
| 1852 | report = pep8style.check_files() |
| 1853 | if options.statistics: |
| 1854 | report.print_statistics() |
| 1855 | if options.benchmark: |
| 1856 | report.print_benchmark() |
| 1857 | if options.testsuite and not options.quiet: |
| 1858 | report.print_results() |
| 1859 | if report.total_errors: |
| 1860 | if options.count: |
| 1861 | sys.stderr.write(str(report.total_errors) + '\n') |
| 1862 | sys.exit(1) |
| 1863 | |
| 1864 | if __name__ == '__main__': |
| 1865 | _main() |