blob: 5804d34734085c27434fb91486fa52d03d3e625f [file] [log] [blame]
erg@google.com720121a2012-05-11 16:31:47 +00001#!/usr/bin/python
erg@google.com4e00b9a2009-01-12 23:05:11 +00002#
erg@google.com8f91ab22011-09-06 21:04:45 +00003# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004#
erg@google.com969161c2009-06-26 22:06:46 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
erg@google.com4e00b9a2009-01-12 23:05:11 +00008#
erg@google.com969161c2009-06-26 22:06:46 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
erg@google.com4e00b9a2009-01-12 23:05:11 +000018#
erg@google.com969161c2009-06-26 22:06:46 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com4e00b9a2009-01-12 23:05:11 +000030
31# Here are some issues that I've had people identify in my code during reviews,
32# that I think are possible to flag automatically in a lint tool. If these were
33# caught by lint, it would save time both for myself and that of my reviewers.
34# Most likely, some of these are beyond the scope of the current lint framework,
35# but I think it is valuable to retain these wish-list items even if they cannot
36# be immediately implemented.
37#
38# Suggestions
39# -----------
40# - Check for no 'explicit' for multi-arg ctor
41# - Check for boolean assign RHS in parens
42# - Check for ctor initializer-list colon position and spacing
43# - Check that if there's a ctor, there should be a dtor
44# - Check accessors that return non-pointer member variables are
45# declared const
46# - Check accessors that return non-const pointer member vars are
47# *not* declared const
48# - Check for using public includes for testing
49# - Check for spaces between brackets in one-line inline method
50# - Check for no assert()
51# - Check for spaces surrounding operators
52# - Check for 0 in pointer context (should be NULL)
53# - Check for 0 in char context (should be '\0')
54# - Check for camel-case method name conventions for methods
55# that are not simple inline getters and setters
erg@google.com4e00b9a2009-01-12 23:05:11 +000056# - Do not indent namespace contents
57# - Avoid inlining non-trivial constructors in header files
erg@google.com4e00b9a2009-01-12 23:05:11 +000058# - Check for old-school (void) cast for call-sites of functions
59# ignored return value
60# - Check gUnit usage of anonymous namespace
61# - Check for class declaration order (typedefs, consts, enums,
62# ctor(s?), dtor, friend declarations, methods, member vars)
63#
64
65"""Does google-lint on c++ files.
66
67The goal of this script is to identify places in the code that *may*
68be in non-compliance with google style. It does not attempt to fix
69up these problems -- the point is to educate. It does also not
70attempt to find all problems, or to ensure that everything it does
71find is legitimately a problem.
72
73In particular, we can get very confused by /* and // inside strings!
74We do a small hack, which is to ignore //'s with "'s after them on the
75same line, but it is far from perfect (in either direction).
76"""
77
78import codecs
erg@google.comd350fe52013-01-14 17:51:48 +000079import copy
erg@google.com4e00b9a2009-01-12 23:05:11 +000080import getopt
81import math # for log
82import os
83import re
84import sre_compile
85import string
86import sys
87import unicodedata
88
89
90_USAGE = """
91Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.coma868d2d2009-10-09 21:18:45 +000092 [--counting=total|toplevel|detailed]
erg@google.com4e00b9a2009-01-12 23:05:11 +000093 <file> [file] ...
94
95 The style guidelines this tries to follow are those in
96 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
97
98 Every problem is given a confidence score from 1-5, with 5 meaning we are
99 certain of the problem, and 1 meaning it could be a legitimate construct.
100 This will miss some errors, and is not a substitute for a code review.
101
erg+personal@google.com05189642010-04-30 20:43:03 +0000102 To suppress false-positive errors of a certain category, add a
103 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
104 suppresses errors of all categories on that line.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000105
106 The files passed in will be linted; at least one file must be provided.
107 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
108
109 Flags:
110
111 output=vs7
112 By default, the output is formatted to ease emacs parsing. Visual Studio
113 compatible output (vs7) may also be used. Other formats are unsupported.
114
115 verbose=#
116 Specify a number 0-5 to restrict errors to certain verbosity levels.
117
118 filter=-x,+y,...
119 Specify a comma-separated list of category-filters to apply: only
120 error messages whose category names pass the filters will be printed.
121 (Category names are printed with the message and look like
122 "[whitespace/indent]".) Filters are evaluated left to right.
123 "-FOO" and "FOO" means "do not print categories that start with FOO".
124 "+FOO" means "do print categories that start with FOO".
125
126 Examples: --filter=-whitespace,+whitespace/braces
127 --filter=whitespace,runtime/printf,+runtime/printf_format
128 --filter=-,+build/include_what_you_use
129
130 To see a list of all the categories used in cpplint, pass no arg:
131 --filter=
erg@google.coma868d2d2009-10-09 21:18:45 +0000132
133 counting=total|toplevel|detailed
134 The total number of errors found is always printed. If
135 'toplevel' is provided, then the count of errors in each of
136 the top-level categories like 'build' and 'whitespace' will
137 also be printed. If 'detailed' is provided, then a count
138 is provided for each category like 'build/class'.
erg@google.com4d70a882013-04-16 21:06:32 +0000139
140 root=subdir
141 The root directory used for deriving header guard CPP variable.
142 By default, the header guard CPP variable is calculated as the relative
143 path to the directory that contains .git, .hg, or .svn. When this flag
144 is specified, the relative path is calculated from the specified
145 directory. If the specified directory does not exist, this flag is
146 ignored.
147
148 Examples:
149 Assuing that src/.git exists, the header guard CPP variables for
150 src/chrome/browser/ui/browser.h are:
151
152 No flag => CHROME_BROWSER_UI_BROWSER_H_
153 --root=chrome => BROWSER_UI_BROWSER_H_
154 --root=chrome/browser => UI_BROWSER_H_
erg@google.com4e00b9a2009-01-12 23:05:11 +0000155"""
156
157# We categorize each error message we print. Here are the categories.
158# We want an explicit list so we can list them all in cpplint --filter=.
159# If you add a new error message with a new category, add it to the list
160# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.coma87abb82009-02-24 01:41:01 +0000161# \ used for clearer layout -- pylint: disable-msg=C6013
erg+personal@google.com05189642010-04-30 20:43:03 +0000162_ERROR_CATEGORIES = [
163 'build/class',
164 'build/deprecated',
165 'build/endif_comment',
erg@google.com8a95ecc2011-09-08 00:45:54 +0000166 'build/explicit_make_pair',
erg+personal@google.com05189642010-04-30 20:43:03 +0000167 'build/forward_decl',
168 'build/header_guard',
169 'build/include',
170 'build/include_alpha',
171 'build/include_order',
172 'build/include_what_you_use',
173 'build/namespaces',
174 'build/printf_format',
175 'build/storage_class',
176 'legal/copyright',
erg@google.comd350fe52013-01-14 17:51:48 +0000177 'readability/alt_tokens',
erg+personal@google.com05189642010-04-30 20:43:03 +0000178 'readability/braces',
179 'readability/casting',
180 'readability/check',
181 'readability/constructors',
182 'readability/fn_size',
183 'readability/function',
184 'readability/multiline_comment',
185 'readability/multiline_string',
erg@google.comd350fe52013-01-14 17:51:48 +0000186 'readability/namespace',
erg+personal@google.com05189642010-04-30 20:43:03 +0000187 'readability/nolint',
188 'readability/streams',
189 'readability/todo',
190 'readability/utf8',
191 'runtime/arrays',
192 'runtime/casting',
193 'runtime/explicit',
194 'runtime/int',
195 'runtime/init',
196 'runtime/invalid_increment',
197 'runtime/member_string_references',
198 'runtime/memset',
199 'runtime/operator',
200 'runtime/printf',
201 'runtime/printf_format',
202 'runtime/references',
erg+personal@google.com05189642010-04-30 20:43:03 +0000203 'runtime/string',
204 'runtime/threadsafe_fn',
erg+personal@google.com05189642010-04-30 20:43:03 +0000205 'whitespace/blank_line',
206 'whitespace/braces',
207 'whitespace/comma',
208 'whitespace/comments',
erg@google.comd350fe52013-01-14 17:51:48 +0000209 'whitespace/empty_loop_body',
erg+personal@google.com05189642010-04-30 20:43:03 +0000210 'whitespace/end_of_line',
211 'whitespace/ending_newline',
erg@google.comd350fe52013-01-14 17:51:48 +0000212 'whitespace/forcolon',
erg+personal@google.com05189642010-04-30 20:43:03 +0000213 'whitespace/indent',
erg+personal@google.com05189642010-04-30 20:43:03 +0000214 'whitespace/line_length',
215 'whitespace/newline',
216 'whitespace/operators',
217 'whitespace/parens',
218 'whitespace/semicolon',
219 'whitespace/tab',
220 'whitespace/todo'
221 ]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000222
erg@google.come35f7652009-06-19 20:52:09 +0000223# The default state of the category filter. This is overrided by the --filter=
224# flag. By default all errors are on, so only add here categories that should be
225# off by default (i.e., categories that must be enabled by the --filter= flags).
226# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com8a95ecc2011-09-08 00:45:54 +0000227_DEFAULT_FILTERS = ['-build/include_alpha']
erg@google.come35f7652009-06-19 20:52:09 +0000228
erg@google.com4e00b9a2009-01-12 23:05:11 +0000229# We used to check for high-bit characters, but after much discussion we
230# decided those were OK, as long as they were in UTF-8 and didn't represent
erg@google.com8a95ecc2011-09-08 00:45:54 +0000231# hard-coded international strings, which belong in a separate i18n file.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000232
erg@google.com4e00b9a2009-01-12 23:05:11 +0000233
erg@google.comfd5da632013-10-25 17:39:45 +0000234# C++ headers
erg@google.com4e00b9a2009-01-12 23:05:11 +0000235_CPP_HEADERS = frozenset([
erg@google.comfd5da632013-10-25 17:39:45 +0000236 # Legacy
237 'algobase.h',
238 'algo.h',
239 'alloc.h',
240 'builtinbuf.h',
241 'bvector.h',
242 'complex.h',
243 'defalloc.h',
244 'deque.h',
245 'editbuf.h',
246 'fstream.h',
247 'function.h',
248 'hash_map',
249 'hash_map.h',
250 'hash_set',
251 'hash_set.h',
252 'hashtable.h',
253 'heap.h',
254 'indstream.h',
255 'iomanip.h',
256 'iostream.h',
257 'istream.h',
258 'iterator.h',
259 'list.h',
260 'map.h',
261 'multimap.h',
262 'multiset.h',
263 'ostream.h',
264 'pair.h',
265 'parsestream.h',
266 'pfstream.h',
267 'procbuf.h',
268 'pthread_alloc',
269 'pthread_alloc.h',
270 'rope',
271 'rope.h',
272 'ropeimpl.h',
273 'set.h',
274 'slist',
275 'slist.h',
276 'stack.h',
277 'stdiostream.h',
278 'stl_alloc.h',
279 'stl_relops.h',
280 'streambuf.h',
281 'stream.h',
282 'strfile.h',
283 'strstream.h',
284 'tempbuf.h',
285 'tree.h',
286 'type_traits.h',
287 'vector.h',
288 # 17.6.1.2 C++ library headers
289 'algorithm',
290 'array',
291 'atomic',
292 'bitset',
293 'chrono',
294 'codecvt',
295 'complex',
296 'condition_variable',
297 'deque',
298 'exception',
299 'forward_list',
300 'fstream',
301 'functional',
302 'future',
303 'initializer_list',
304 'iomanip',
305 'ios',
306 'iosfwd',
307 'iostream',
308 'istream',
309 'iterator',
310 'limits',
311 'list',
312 'locale',
313 'map',
314 'memory',
315 'mutex',
316 'new',
317 'numeric',
318 'ostream',
319 'queue',
320 'random',
321 'ratio',
322 'regex',
323 'set',
324 'sstream',
325 'stack',
326 'stdexcept',
327 'streambuf',
328 'string',
329 'strstream',
330 'system_error',
331 'thread',
332 'tuple',
333 'typeindex',
334 'typeinfo',
335 'type_traits',
336 'unordered_map',
337 'unordered_set',
338 'utility',
erg@google.com5d00c562013-07-12 19:57:05 +0000339 'valarray',
erg@google.comfd5da632013-10-25 17:39:45 +0000340 'vector',
341 # 17.6.1.2 C++ headers for C library facilities
342 'cassert',
343 'ccomplex',
344 'cctype',
345 'cerrno',
346 'cfenv',
347 'cfloat',
348 'cinttypes',
349 'ciso646',
350 'climits',
351 'clocale',
352 'cmath',
353 'csetjmp',
354 'csignal',
355 'cstdalign',
356 'cstdarg',
357 'cstdbool',
358 'cstddef',
359 'cstdint',
360 'cstdio',
361 'cstdlib',
362 'cstring',
363 'ctgmath',
364 'ctime',
365 'cuchar',
366 'cwchar',
367 'cwctype',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000368 ])
369
erg@google.com4e00b9a2009-01-12 23:05:11 +0000370# Assertion macros. These are defined in base/logging.h and
371# testing/base/gunit.h. Note that the _M versions need to come first
372# for substring matching to work.
373_CHECK_MACROS = [
erg@google.come35f7652009-06-19 20:52:09 +0000374 'DCHECK', 'CHECK',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000375 'EXPECT_TRUE_M', 'EXPECT_TRUE',
376 'ASSERT_TRUE_M', 'ASSERT_TRUE',
377 'EXPECT_FALSE_M', 'EXPECT_FALSE',
378 'ASSERT_FALSE_M', 'ASSERT_FALSE',
379 ]
380
erg@google.come35f7652009-06-19 20:52:09 +0000381# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com4e00b9a2009-01-12 23:05:11 +0000382_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
383
384for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
385 ('>=', 'GE'), ('>', 'GT'),
386 ('<=', 'LE'), ('<', 'LT')]:
erg@google.come35f7652009-06-19 20:52:09 +0000387 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com4e00b9a2009-01-12 23:05:11 +0000388 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
389 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
390 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
391 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
392 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
393
394for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
395 ('>=', 'LT'), ('>', 'LE'),
396 ('<=', 'GT'), ('<', 'GE')]:
397 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
398 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
399 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
400 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
401
erg@google.comd350fe52013-01-14 17:51:48 +0000402# Alternative tokens and their replacements. For full list, see section 2.5
403# Alternative tokens [lex.digraph] in the C++ standard.
404#
405# Digraphs (such as '%:') are not included here since it's a mess to
406# match those on a word boundary.
407_ALT_TOKEN_REPLACEMENT = {
408 'and': '&&',
409 'bitor': '|',
410 'or': '||',
411 'xor': '^',
412 'compl': '~',
413 'bitand': '&',
414 'and_eq': '&=',
415 'or_eq': '|=',
416 'xor_eq': '^=',
417 'not': '!',
418 'not_eq': '!='
419 }
420
421# Compile regular expression that matches all the above keywords. The "[ =()]"
422# bit is meant to avoid matching these keywords outside of boolean expressions.
423#
424# False positives include C-style multi-line comments (http://go/nsiut )
425# and multi-line strings (http://go/beujw ), but those have always been
426# troublesome for cpplint.
427_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
428 r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
429
erg@google.com4e00b9a2009-01-12 23:05:11 +0000430
431# These constants define types of headers for use with
432# _IncludeState.CheckNextIncludeOrder().
433_C_SYS_HEADER = 1
434_CPP_SYS_HEADER = 2
435_LIKELY_MY_HEADER = 3
436_POSSIBLE_MY_HEADER = 4
437_OTHER_HEADER = 5
438
erg@google.comd350fe52013-01-14 17:51:48 +0000439# These constants define the current inline assembly state
440_NO_ASM = 0 # Outside of inline assembly block
441_INSIDE_ASM = 1 # Inside inline assembly block
442_END_ASM = 2 # Last line of inline assembly block
443_BLOCK_ASM = 3 # The whole block is an inline assembly block
444
445# Match start of assembly blocks
446_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
447 r'(?:\s+(volatile|__volatile__))?'
448 r'\s*[{(]')
449
erg@google.com4e00b9a2009-01-12 23:05:11 +0000450
451_regexp_compile_cache = {}
452
erg+personal@google.com05189642010-04-30 20:43:03 +0000453# Finds occurrences of NOLINT or NOLINT(...).
454_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
455
456# {str, set(int)}: a map from error categories to sets of linenumbers
457# on which those errors are expected and should be suppressed.
458_error_suppressions = {}
459
erg@google.com4d70a882013-04-16 21:06:32 +0000460# The root directory used for deriving header guard CPP variable.
461# This is set by --root flag.
462_root = None
463
erg+personal@google.com05189642010-04-30 20:43:03 +0000464def ParseNolintSuppressions(filename, raw_line, linenum, error):
465 """Updates the global list of error-suppressions.
466
467 Parses any NOLINT comments on the current line, updating the global
468 error_suppressions store. Reports an error if the NOLINT comment
469 was malformed.
470
471 Args:
472 filename: str, the name of the input file.
473 raw_line: str, the line of input text, with comments.
474 linenum: int, the number of the current line.
475 error: function, an error handler.
476 """
477 # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
erg@google.com8a95ecc2011-09-08 00:45:54 +0000478 matched = _RE_SUPPRESSION.search(raw_line)
479 if matched:
480 category = matched.group(1)
erg+personal@google.com05189642010-04-30 20:43:03 +0000481 if category in (None, '(*)'): # => "suppress all"
482 _error_suppressions.setdefault(None, set()).add(linenum)
483 else:
484 if category.startswith('(') and category.endswith(')'):
485 category = category[1:-1]
486 if category in _ERROR_CATEGORIES:
487 _error_suppressions.setdefault(category, set()).add(linenum)
488 else:
489 error(filename, linenum, 'readability/nolint', 5,
erg@google.com8a95ecc2011-09-08 00:45:54 +0000490 'Unknown NOLINT error category: %s' % category)
erg+personal@google.com05189642010-04-30 20:43:03 +0000491
492
493def ResetNolintSuppressions():
494 "Resets the set of NOLINT suppressions to empty."
495 _error_suppressions.clear()
496
497
498def IsErrorSuppressedByNolint(category, linenum):
499 """Returns true if the specified error category is suppressed on this line.
500
501 Consults the global error_suppressions map populated by
502 ParseNolintSuppressions/ResetNolintSuppressions.
503
504 Args:
505 category: str, the category of the error.
506 linenum: int, the current line number.
507 Returns:
508 bool, True iff the error should be suppressed due to a NOLINT comment.
509 """
510 return (linenum in _error_suppressions.get(category, set()) or
511 linenum in _error_suppressions.get(None, set()))
erg@google.com4e00b9a2009-01-12 23:05:11 +0000512
513def Match(pattern, s):
514 """Matches the string with the pattern, caching the compiled regexp."""
515 # The regexp compilation caching is inlined in both Match and Search for
516 # performance reasons; factoring it out into a separate function turns out
517 # to be noticeably expensive.
518 if not pattern in _regexp_compile_cache:
519 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
520 return _regexp_compile_cache[pattern].match(s)
521
522
erg@google.comfd5da632013-10-25 17:39:45 +0000523def ReplaceAll(pattern, rep, s):
524 """Replaces instances of pattern in a string with a replacement.
525
526 The compiled regex is kept in a cache shared by Match and Search.
527
528 Args:
529 pattern: regex pattern
530 rep: replacement text
531 s: search string
532
533 Returns:
534 string with replacements made (or original string if no replacements)
535 """
536 if pattern not in _regexp_compile_cache:
537 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
538 return _regexp_compile_cache[pattern].sub(rep, s)
539
540
erg@google.com4e00b9a2009-01-12 23:05:11 +0000541def Search(pattern, s):
542 """Searches the string for the pattern, caching the compiled regexp."""
543 if not pattern in _regexp_compile_cache:
544 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
545 return _regexp_compile_cache[pattern].search(s)
546
547
548class _IncludeState(dict):
549 """Tracks line numbers for includes, and the order in which includes appear.
550
551 As a dict, an _IncludeState object serves as a mapping between include
552 filename and line number on which that file was included.
553
554 Call CheckNextIncludeOrder() once for each header in the file, passing
555 in the type constants defined above. Calls in an illegal order will
556 raise an _IncludeError with an appropriate error message.
557
558 """
559 # self._section will move monotonically through this set. If it ever
560 # needs to move backwards, CheckNextIncludeOrder will raise an error.
561 _INITIAL_SECTION = 0
562 _MY_H_SECTION = 1
563 _C_SECTION = 2
564 _CPP_SECTION = 3
565 _OTHER_H_SECTION = 4
566
567 _TYPE_NAMES = {
568 _C_SYS_HEADER: 'C system header',
569 _CPP_SYS_HEADER: 'C++ system header',
570 _LIKELY_MY_HEADER: 'header this file implements',
571 _POSSIBLE_MY_HEADER: 'header this file may implement',
572 _OTHER_HEADER: 'other header',
573 }
574 _SECTION_NAMES = {
575 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
576 _MY_H_SECTION: 'a header this file implements',
577 _C_SECTION: 'C system header',
578 _CPP_SECTION: 'C++ system header',
579 _OTHER_H_SECTION: 'other header',
580 }
581
582 def __init__(self):
583 dict.__init__(self)
erg@google.coma868d2d2009-10-09 21:18:45 +0000584 # The name of the current section.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000585 self._section = self._INITIAL_SECTION
erg@google.coma868d2d2009-10-09 21:18:45 +0000586 # The path of last found header.
587 self._last_header = ''
588
erg@google.comfd5da632013-10-25 17:39:45 +0000589 def SetLastHeader(self, header_path):
590 self._last_header = header_path
591
erg@google.coma868d2d2009-10-09 21:18:45 +0000592 def CanonicalizeAlphabeticalOrder(self, header_path):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000593 """Returns a path canonicalized for alphabetical comparison.
erg@google.coma868d2d2009-10-09 21:18:45 +0000594
595 - replaces "-" with "_" so they both cmp the same.
596 - removes '-inl' since we don't require them to be after the main header.
597 - lowercase everything, just in case.
598
599 Args:
600 header_path: Path to be canonicalized.
601
602 Returns:
603 Canonicalized path.
604 """
605 return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
606
erg@google.comfd5da632013-10-25 17:39:45 +0000607 def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
erg@google.coma868d2d2009-10-09 21:18:45 +0000608 """Check if a header is in alphabetical order with the previous header.
609
610 Args:
erg@google.comfd5da632013-10-25 17:39:45 +0000611 clean_lines: A CleansedLines instance containing the file.
612 linenum: The number of the line to check.
613 header_path: Canonicalized header to be checked.
erg@google.coma868d2d2009-10-09 21:18:45 +0000614
615 Returns:
616 Returns true if the header is in alphabetical order.
617 """
erg@google.comfd5da632013-10-25 17:39:45 +0000618 # If previous section is different from current section, _last_header will
619 # be reset to empty string, so it's always less than current header.
620 #
621 # If previous line was a blank line, assume that the headers are
622 # intentionally sorted the way they are.
623 if (self._last_header > header_path and
624 not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
erg@google.coma868d2d2009-10-09 21:18:45 +0000625 return False
erg@google.coma868d2d2009-10-09 21:18:45 +0000626 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +0000627
628 def CheckNextIncludeOrder(self, header_type):
629 """Returns a non-empty error message if the next header is out of order.
630
631 This function also updates the internal state to be ready to check
632 the next include.
633
634 Args:
635 header_type: One of the _XXX_HEADER constants defined above.
636
637 Returns:
638 The empty string if the header is in the right order, or an
639 error message describing what's wrong.
640
641 """
642 error_message = ('Found %s after %s' %
643 (self._TYPE_NAMES[header_type],
644 self._SECTION_NAMES[self._section]))
645
erg@google.coma868d2d2009-10-09 21:18:45 +0000646 last_section = self._section
647
erg@google.com4e00b9a2009-01-12 23:05:11 +0000648 if header_type == _C_SYS_HEADER:
649 if self._section <= self._C_SECTION:
650 self._section = self._C_SECTION
651 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000652 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000653 return error_message
654 elif header_type == _CPP_SYS_HEADER:
655 if self._section <= self._CPP_SECTION:
656 self._section = self._CPP_SECTION
657 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000658 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000659 return error_message
660 elif header_type == _LIKELY_MY_HEADER:
661 if self._section <= self._MY_H_SECTION:
662 self._section = self._MY_H_SECTION
663 else:
664 self._section = self._OTHER_H_SECTION
665 elif header_type == _POSSIBLE_MY_HEADER:
666 if self._section <= self._MY_H_SECTION:
667 self._section = self._MY_H_SECTION
668 else:
669 # This will always be the fallback because we're not sure
670 # enough that the header is associated with this file.
671 self._section = self._OTHER_H_SECTION
672 else:
673 assert header_type == _OTHER_HEADER
674 self._section = self._OTHER_H_SECTION
675
erg@google.coma868d2d2009-10-09 21:18:45 +0000676 if last_section != self._section:
677 self._last_header = ''
678
erg@google.com4e00b9a2009-01-12 23:05:11 +0000679 return ''
680
681
682class _CppLintState(object):
683 """Maintains module-wide state.."""
684
685 def __init__(self):
686 self.verbose_level = 1 # global setting.
687 self.error_count = 0 # global count of reported errors
erg@google.come35f7652009-06-19 20:52:09 +0000688 # filters to apply when emitting error messages
689 self.filters = _DEFAULT_FILTERS[:]
erg@google.coma868d2d2009-10-09 21:18:45 +0000690 self.counting = 'total' # In what way are we counting errors?
691 self.errors_by_category = {} # string to int dict storing error counts
erg@google.com4e00b9a2009-01-12 23:05:11 +0000692
693 # output format:
694 # "emacs" - format that emacs can parse (default)
695 # "vs7" - format that Microsoft Visual Studio 7 can parse
696 self.output_format = 'emacs'
697
698 def SetOutputFormat(self, output_format):
699 """Sets the output format for errors."""
700 self.output_format = output_format
701
702 def SetVerboseLevel(self, level):
703 """Sets the module's verbosity, and returns the previous setting."""
704 last_verbose_level = self.verbose_level
705 self.verbose_level = level
706 return last_verbose_level
707
erg@google.coma868d2d2009-10-09 21:18:45 +0000708 def SetCountingStyle(self, counting_style):
709 """Sets the module's counting options."""
710 self.counting = counting_style
711
erg@google.com4e00b9a2009-01-12 23:05:11 +0000712 def SetFilters(self, filters):
713 """Sets the error-message filters.
714
715 These filters are applied when deciding whether to emit a given
716 error message.
717
718 Args:
719 filters: A string of comma-separated filters (eg "+whitespace/indent").
720 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000721
722 Raises:
723 ValueError: The comma-separated filters did not all start with '+' or '-'.
724 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000725 """
erg@google.come35f7652009-06-19 20:52:09 +0000726 # Default filters always have less priority than the flag ones.
727 self.filters = _DEFAULT_FILTERS[:]
728 for filt in filters.split(','):
729 clean_filt = filt.strip()
730 if clean_filt:
731 self.filters.append(clean_filt)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000732 for filt in self.filters:
733 if not (filt.startswith('+') or filt.startswith('-')):
734 raise ValueError('Every filter in --filters must start with + or -'
735 ' (%s does not)' % filt)
736
erg@google.coma868d2d2009-10-09 21:18:45 +0000737 def ResetErrorCounts(self):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000738 """Sets the module's error statistic back to zero."""
739 self.error_count = 0
erg@google.coma868d2d2009-10-09 21:18:45 +0000740 self.errors_by_category = {}
erg@google.com4e00b9a2009-01-12 23:05:11 +0000741
erg@google.coma868d2d2009-10-09 21:18:45 +0000742 def IncrementErrorCount(self, category):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000743 """Bumps the module's error statistic."""
744 self.error_count += 1
erg@google.coma868d2d2009-10-09 21:18:45 +0000745 if self.counting in ('toplevel', 'detailed'):
746 if self.counting != 'detailed':
747 category = category.split('/')[0]
748 if category not in self.errors_by_category:
749 self.errors_by_category[category] = 0
750 self.errors_by_category[category] += 1
erg@google.com4e00b9a2009-01-12 23:05:11 +0000751
erg@google.coma868d2d2009-10-09 21:18:45 +0000752 def PrintErrorCounts(self):
753 """Print a summary of errors by category, and the total."""
754 for category, count in self.errors_by_category.iteritems():
755 sys.stderr.write('Category \'%s\' errors found: %d\n' %
756 (category, count))
757 sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000758
759_cpplint_state = _CppLintState()
760
761
762def _OutputFormat():
763 """Gets the module's output format."""
764 return _cpplint_state.output_format
765
766
767def _SetOutputFormat(output_format):
768 """Sets the module's output format."""
769 _cpplint_state.SetOutputFormat(output_format)
770
771
772def _VerboseLevel():
773 """Returns the module's verbosity setting."""
774 return _cpplint_state.verbose_level
775
776
777def _SetVerboseLevel(level):
778 """Sets the module's verbosity, and returns the previous setting."""
779 return _cpplint_state.SetVerboseLevel(level)
780
781
erg@google.coma868d2d2009-10-09 21:18:45 +0000782def _SetCountingStyle(level):
783 """Sets the module's counting options."""
784 _cpplint_state.SetCountingStyle(level)
785
786
erg@google.com4e00b9a2009-01-12 23:05:11 +0000787def _Filters():
788 """Returns the module's list of output filters, as a list."""
789 return _cpplint_state.filters
790
791
792def _SetFilters(filters):
793 """Sets the module's error-message filters.
794
795 These filters are applied when deciding whether to emit a given
796 error message.
797
798 Args:
799 filters: A string of comma-separated filters (eg "whitespace/indent").
800 Each filter should start with + or -; else we die.
801 """
802 _cpplint_state.SetFilters(filters)
803
804
805class _FunctionState(object):
806 """Tracks current function name and the number of lines in its body."""
807
808 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
809 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
810
811 def __init__(self):
812 self.in_a_function = False
813 self.lines_in_function = 0
814 self.current_function = ''
815
816 def Begin(self, function_name):
817 """Start analyzing function body.
818
819 Args:
820 function_name: The name of the function being tracked.
821 """
822 self.in_a_function = True
823 self.lines_in_function = 0
824 self.current_function = function_name
825
826 def Count(self):
827 """Count line in current function body."""
828 if self.in_a_function:
829 self.lines_in_function += 1
830
831 def Check(self, error, filename, linenum):
832 """Report if too many lines in function body.
833
834 Args:
835 error: The function to call with any errors found.
836 filename: The name of the current file.
837 linenum: The number of the line to check.
838 """
839 if Match(r'T(EST|est)', self.current_function):
840 base_trigger = self._TEST_TRIGGER
841 else:
842 base_trigger = self._NORMAL_TRIGGER
843 trigger = base_trigger * 2**_VerboseLevel()
844
845 if self.lines_in_function > trigger:
846 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
847 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
848 if error_level > 5:
849 error_level = 5
850 error(filename, linenum, 'readability/fn_size', error_level,
851 'Small and focused functions are preferred:'
852 ' %s has %d non-comment lines'
853 ' (error triggered by exceeding %d lines).' % (
854 self.current_function, self.lines_in_function, trigger))
855
856 def End(self):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000857 """Stop analyzing function body."""
erg@google.com4e00b9a2009-01-12 23:05:11 +0000858 self.in_a_function = False
859
860
861class _IncludeError(Exception):
862 """Indicates a problem with the include order in a file."""
863 pass
864
865
866class FileInfo:
867 """Provides utility functions for filenames.
868
869 FileInfo provides easy access to the components of a file's path
870 relative to the project root.
871 """
872
873 def __init__(self, filename):
874 self._filename = filename
875
876 def FullName(self):
877 """Make Windows paths like Unix."""
878 return os.path.abspath(self._filename).replace('\\', '/')
879
880 def RepositoryName(self):
881 """FullName after removing the local path to the repository.
882
883 If we have a real absolute path name here we can try to do something smart:
884 detecting the root of the checkout and truncating /path/to/checkout from
885 the name so that we get header guards that don't include things like
886 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
887 people on different computers who have checked the source out to different
888 locations won't see bogus errors.
889 """
890 fullname = self.FullName()
891
892 if os.path.exists(fullname):
893 project_dir = os.path.dirname(fullname)
894
895 if os.path.exists(os.path.join(project_dir, ".svn")):
896 # If there's a .svn file in the current directory, we recursively look
897 # up the directory tree for the top of the SVN checkout
898 root_dir = project_dir
899 one_up_dir = os.path.dirname(root_dir)
900 while os.path.exists(os.path.join(one_up_dir, ".svn")):
901 root_dir = os.path.dirname(root_dir)
902 one_up_dir = os.path.dirname(one_up_dir)
903
904 prefix = os.path.commonprefix([root_dir, project_dir])
905 return fullname[len(prefix) + 1:]
906
erg@google.com3dc74262011-11-30 01:12:00 +0000907 # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
908 # searching up from the current path.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000909 root_dir = os.path.dirname(fullname)
910 while (root_dir != os.path.dirname(root_dir) and
erg@google.com5e169692010-01-28 20:17:01 +0000911 not os.path.exists(os.path.join(root_dir, ".git")) and
erg@google.com3dc74262011-11-30 01:12:00 +0000912 not os.path.exists(os.path.join(root_dir, ".hg")) and
913 not os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000914 root_dir = os.path.dirname(root_dir)
erg@google.com42e59b02010-10-04 22:18:07 +0000915
916 if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@google.com3dc74262011-11-30 01:12:00 +0000917 os.path.exists(os.path.join(root_dir, ".hg")) or
918 os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com42e59b02010-10-04 22:18:07 +0000919 prefix = os.path.commonprefix([root_dir, project_dir])
920 return fullname[len(prefix) + 1:]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000921
922 # Don't know what to do; header guard warnings may be wrong...
923 return fullname
924
925 def Split(self):
926 """Splits the file into the directory, basename, and extension.
927
928 For 'chrome/browser/browser.cc', Split() would
929 return ('chrome/browser', 'browser', '.cc')
930
931 Returns:
932 A tuple of (directory, basename, extension).
933 """
934
935 googlename = self.RepositoryName()
936 project, rest = os.path.split(googlename)
937 return (project,) + os.path.splitext(rest)
938
939 def BaseName(self):
940 """File base name - text after the final slash, before the final period."""
941 return self.Split()[1]
942
943 def Extension(self):
944 """File extension - text following the final period."""
945 return self.Split()[2]
946
947 def NoExtension(self):
948 """File has no source file extension."""
949 return '/'.join(self.Split()[0:2])
950
951 def IsSource(self):
952 """File has a source file extension."""
953 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
954
955
erg+personal@google.com05189642010-04-30 20:43:03 +0000956def _ShouldPrintError(category, confidence, linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000957 """If confidence >= verbose, category passes filter and is not suppressed."""
erg+personal@google.com05189642010-04-30 20:43:03 +0000958
959 # There are three ways we might decide not to print an error message:
960 # a "NOLINT(category)" comment appears in the source,
erg@google.com4e00b9a2009-01-12 23:05:11 +0000961 # the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com05189642010-04-30 20:43:03 +0000962 if IsErrorSuppressedByNolint(category, linenum):
963 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +0000964 if confidence < _cpplint_state.verbose_level:
965 return False
966
967 is_filtered = False
968 for one_filter in _Filters():
969 if one_filter.startswith('-'):
970 if category.startswith(one_filter[1:]):
971 is_filtered = True
972 elif one_filter.startswith('+'):
973 if category.startswith(one_filter[1:]):
974 is_filtered = False
975 else:
976 assert False # should have been checked for in SetFilter.
977 if is_filtered:
978 return False
979
980 return True
981
982
983def Error(filename, linenum, category, confidence, message):
984 """Logs the fact we've found a lint error.
985
986 We log where the error was found, and also our confidence in the error,
987 that is, how certain we are this is a legitimate style regression, and
988 not a misidentification or a use that's sometimes justified.
989
erg+personal@google.com05189642010-04-30 20:43:03 +0000990 False positives can be suppressed by the use of
991 "cpplint(category)" comments on the offending line. These are
992 parsed into _error_suppressions.
993
erg@google.com4e00b9a2009-01-12 23:05:11 +0000994 Args:
995 filename: The name of the file containing the error.
996 linenum: The number of the line containing the error.
997 category: A string used to describe the "category" this bug
998 falls under: "whitespace", say, or "runtime". Categories
999 may have a hierarchy separated by slashes: "whitespace/indent".
1000 confidence: A number from 1-5 representing a confidence score for
1001 the error, with 5 meaning that we are certain of the problem,
1002 and 1 meaning that it could be a legitimate construct.
1003 message: The error message.
1004 """
erg+personal@google.com05189642010-04-30 20:43:03 +00001005 if _ShouldPrintError(category, confidence, linenum):
erg@google.coma868d2d2009-10-09 21:18:45 +00001006 _cpplint_state.IncrementErrorCount(category)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001007 if _cpplint_state.output_format == 'vs7':
1008 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
1009 filename, linenum, message, category, confidence))
erg@google.com02c27fd2013-05-28 21:34:34 +00001010 elif _cpplint_state.output_format == 'eclipse':
1011 sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
1012 filename, linenum, message, category, confidence))
erg@google.com4e00b9a2009-01-12 23:05:11 +00001013 else:
1014 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
1015 filename, linenum, message, category, confidence))
1016
1017
1018# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
1019_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1020 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1021# Matches strings. Escape codes should already be removed by ESCAPES.
1022_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
1023# Matches characters. Escape codes should already be removed by ESCAPES.
1024_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
1025# Matches multi-line C++ comments.
1026# This RE is a little bit more complicated than one might expect, because we
1027# have to take care of space removals tools so we can handle comments inside
1028# statements better.
1029# The current rule is: We only clear spaces from both sides when we're at the
1030# end of the line. Otherwise, we try to remove spaces from the right side,
1031# if this doesn't work we try on left side but only if there's a non-character
1032# on the right.
1033_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1034 r"""(\s*/\*.*\*/\s*$|
1035 /\*.*\*/\s+|
1036 \s+/\*.*\*/(?=\W)|
1037 /\*.*\*/)""", re.VERBOSE)
1038
1039
1040def IsCppString(line):
1041 """Does line terminate so, that the next symbol is in string constant.
1042
1043 This function does not consider single-line nor multi-line comments.
1044
1045 Args:
1046 line: is a partial line of code starting from the 0..n.
1047
1048 Returns:
1049 True, if next character appended to 'line' is inside a
1050 string constant.
1051 """
1052
1053 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
1054 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1055
1056
1057def FindNextMultiLineCommentStart(lines, lineix):
1058 """Find the beginning marker for a multiline comment."""
1059 while lineix < len(lines):
1060 if lines[lineix].strip().startswith('/*'):
1061 # Only return this marker if the comment goes beyond this line
1062 if lines[lineix].strip().find('*/', 2) < 0:
1063 return lineix
1064 lineix += 1
1065 return len(lines)
1066
1067
1068def FindNextMultiLineCommentEnd(lines, lineix):
1069 """We are inside a comment, find the end marker."""
1070 while lineix < len(lines):
1071 if lines[lineix].strip().endswith('*/'):
1072 return lineix
1073 lineix += 1
1074 return len(lines)
1075
1076
1077def RemoveMultiLineCommentsFromRange(lines, begin, end):
1078 """Clears a range of lines for multi-line comments."""
1079 # Having // dummy comments makes the lines non-empty, so we will not get
1080 # unnecessary blank line warnings later in the code.
1081 for i in range(begin, end):
1082 lines[i] = '// dummy'
1083
1084
1085def RemoveMultiLineComments(filename, lines, error):
1086 """Removes multiline (c-style) comments from lines."""
1087 lineix = 0
1088 while lineix < len(lines):
1089 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1090 if lineix_begin >= len(lines):
1091 return
1092 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1093 if lineix_end >= len(lines):
1094 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1095 'Could not find end of multi-line comment')
1096 return
1097 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1098 lineix = lineix_end + 1
1099
1100
1101def CleanseComments(line):
1102 """Removes //-comments and single-line C-style /* */ comments.
1103
1104 Args:
1105 line: A line of C++ source.
1106
1107 Returns:
1108 The line with single-line comments removed.
1109 """
1110 commentpos = line.find('//')
1111 if commentpos != -1 and not IsCppString(line[:commentpos]):
erg@google.comd7d27472011-09-07 17:36:35 +00001112 line = line[:commentpos].rstrip()
erg@google.com4e00b9a2009-01-12 23:05:11 +00001113 # get rid of /* ... */
1114 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1115
1116
erg@google.coma87abb82009-02-24 01:41:01 +00001117class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001118 """Holds 3 copies of all lines with different preprocessing applied to them.
1119
1120 1) elided member contains lines without strings and comments,
1121 2) lines member contains lines without comments, and
erg@google.comd350fe52013-01-14 17:51:48 +00001122 3) raw_lines member contains all the lines without processing.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001123 All these three members are of <type 'list'>, and of the same length.
1124 """
1125
1126 def __init__(self, lines):
1127 self.elided = []
1128 self.lines = []
1129 self.raw_lines = lines
1130 self.num_lines = len(lines)
1131 for linenum in range(len(lines)):
1132 self.lines.append(CleanseComments(lines[linenum]))
1133 elided = self._CollapseStrings(lines[linenum])
1134 self.elided.append(CleanseComments(elided))
1135
1136 def NumLines(self):
1137 """Returns the number of lines represented."""
1138 return self.num_lines
1139
1140 @staticmethod
1141 def _CollapseStrings(elided):
1142 """Collapses strings and chars on a line to simple "" or '' blocks.
1143
1144 We nix strings first so we're not fooled by text like '"http://"'
1145
1146 Args:
1147 elided: The line being processed.
1148
1149 Returns:
1150 The line with collapsed strings.
1151 """
1152 if not _RE_PATTERN_INCLUDE.match(elided):
1153 # Remove escaped characters first to make quote/single quote collapsing
1154 # basic. Things that look like escaped characters shouldn't occur
1155 # outside of strings and chars.
1156 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1157 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
1158 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
1159 return elided
1160
1161
erg@google.comd350fe52013-01-14 17:51:48 +00001162def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
1163 """Find the position just after the matching endchar.
1164
1165 Args:
1166 line: a CleansedLines line.
1167 startpos: start searching at this position.
1168 depth: nesting level at startpos.
1169 startchar: expression opening character.
1170 endchar: expression closing character.
1171
1172 Returns:
1173 Index just after endchar.
1174 """
1175 for i in xrange(startpos, len(line)):
1176 if line[i] == startchar:
1177 depth += 1
1178 elif line[i] == endchar:
1179 depth -= 1
1180 if depth == 0:
1181 return i + 1
1182 return -1
1183
1184
erg@google.com4e00b9a2009-01-12 23:05:11 +00001185def CloseExpression(clean_lines, linenum, pos):
1186 """If input points to ( or { or [, finds the position that closes it.
1187
erg@google.com8a95ecc2011-09-08 00:45:54 +00001188 If lines[linenum][pos] points to a '(' or '{' or '[', finds the
erg@google.com4e00b9a2009-01-12 23:05:11 +00001189 linenum/pos that correspond to the closing of the expression.
1190
1191 Args:
1192 clean_lines: A CleansedLines instance containing the file.
1193 linenum: The number of the line to check.
1194 pos: A position on the line.
1195
1196 Returns:
1197 A tuple (line, linenum, pos) pointer *past* the closing brace, or
1198 (line, len(lines), -1) if we never find a close. Note we ignore
1199 strings and comments when matching; and the line we return is the
1200 'cleansed' line at linenum.
1201 """
1202
1203 line = clean_lines.elided[linenum]
1204 startchar = line[pos]
1205 if startchar not in '({[':
1206 return (line, clean_lines.NumLines(), -1)
1207 if startchar == '(': endchar = ')'
1208 if startchar == '[': endchar = ']'
1209 if startchar == '{': endchar = '}'
1210
erg@google.comd350fe52013-01-14 17:51:48 +00001211 # Check first line
1212 end_pos = FindEndOfExpressionInLine(line, pos, 0, startchar, endchar)
1213 if end_pos > -1:
1214 return (line, linenum, end_pos)
1215 tail = line[pos:]
1216 num_open = tail.count(startchar) - tail.count(endchar)
1217 while linenum < clean_lines.NumLines() - 1:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001218 linenum += 1
1219 line = clean_lines.elided[linenum]
erg@google.comd350fe52013-01-14 17:51:48 +00001220 delta = line.count(startchar) - line.count(endchar)
1221 if num_open + delta <= 0:
1222 return (line, linenum,
1223 FindEndOfExpressionInLine(line, 0, num_open, startchar, endchar))
1224 num_open += delta
erg@google.com4e00b9a2009-01-12 23:05:11 +00001225
erg@google.comd350fe52013-01-14 17:51:48 +00001226 # Did not find endchar before end of file, give up
1227 return (line, clean_lines.NumLines(), -1)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001228
1229def CheckForCopyright(filename, lines, error):
1230 """Logs an error if no Copyright message appears at the top of the file."""
1231
1232 # We'll say it should occur by line 10. Don't forget there's a
1233 # dummy line at the front.
1234 for line in xrange(1, min(len(lines), 11)):
1235 if re.search(r'Copyright', lines[line], re.I): break
1236 else: # means no copyright line was found
1237 error(filename, 0, 'legal/copyright', 5,
1238 'No copyright message found. '
1239 'You should have a line: "Copyright [year] <Copyright Owner>"')
1240
1241
1242def GetHeaderGuardCPPVariable(filename):
1243 """Returns the CPP variable that should be used as a header guard.
1244
1245 Args:
1246 filename: The name of a C++ header file.
1247
1248 Returns:
1249 The CPP variable that should be used as a header guard in the
1250 named file.
1251
1252 """
1253
erg+personal@google.com05189642010-04-30 20:43:03 +00001254 # Restores original filename in case that cpplint is invoked from Emacs's
1255 # flymake.
1256 filename = re.sub(r'_flymake\.h$', '.h', filename)
erg@google.comd350fe52013-01-14 17:51:48 +00001257 filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
erg+personal@google.com05189642010-04-30 20:43:03 +00001258
erg@google.com4e00b9a2009-01-12 23:05:11 +00001259 fileinfo = FileInfo(filename)
erg@google.com4d70a882013-04-16 21:06:32 +00001260 file_path_from_root = fileinfo.RepositoryName()
1261 if _root:
1262 file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1263 return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
erg@google.com4e00b9a2009-01-12 23:05:11 +00001264
1265
1266def CheckForHeaderGuard(filename, lines, error):
1267 """Checks that the file contains a header guard.
1268
erg@google.coma87abb82009-02-24 01:41:01 +00001269 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +00001270 headers, checks that the full pathname is used.
1271
1272 Args:
1273 filename: The name of the C++ header file.
1274 lines: An array of strings, each representing a line of the file.
1275 error: The function to call with any errors found.
1276 """
1277
1278 cppvar = GetHeaderGuardCPPVariable(filename)
1279
1280 ifndef = None
1281 ifndef_linenum = 0
1282 define = None
1283 endif = None
1284 endif_linenum = 0
1285 for linenum, line in enumerate(lines):
1286 linesplit = line.split()
1287 if len(linesplit) >= 2:
1288 # find the first occurrence of #ifndef and #define, save arg
1289 if not ifndef and linesplit[0] == '#ifndef':
1290 # set ifndef to the header guard presented on the #ifndef line.
1291 ifndef = linesplit[1]
1292 ifndef_linenum = linenum
1293 if not define and linesplit[0] == '#define':
1294 define = linesplit[1]
1295 # find the last occurrence of #endif, save entire line
1296 if line.startswith('#endif'):
1297 endif = line
1298 endif_linenum = linenum
1299
erg@google.comdc289702012-01-26 20:30:03 +00001300 if not ifndef:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001301 error(filename, 0, 'build/header_guard', 5,
1302 'No #ifndef header guard found, suggested CPP variable is: %s' %
1303 cppvar)
1304 return
1305
erg@google.comdc289702012-01-26 20:30:03 +00001306 if not define:
1307 error(filename, 0, 'build/header_guard', 5,
1308 'No #define header guard found, suggested CPP variable is: %s' %
1309 cppvar)
1310 return
1311
erg@google.com4e00b9a2009-01-12 23:05:11 +00001312 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1313 # for backward compatibility.
erg+personal@google.com05189642010-04-30 20:43:03 +00001314 if ifndef != cppvar:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001315 error_level = 0
1316 if ifndef != cppvar + '_':
1317 error_level = 5
1318
erg+personal@google.com05189642010-04-30 20:43:03 +00001319 ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1320 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001321 error(filename, ifndef_linenum, 'build/header_guard', error_level,
1322 '#ifndef header guard has wrong style, please use: %s' % cppvar)
1323
erg@google.comdc289702012-01-26 20:30:03 +00001324 if define != ifndef:
1325 error(filename, 0, 'build/header_guard', 5,
1326 '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1327 cppvar)
1328 return
1329
erg+personal@google.com05189642010-04-30 20:43:03 +00001330 if endif != ('#endif // %s' % cppvar):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001331 error_level = 0
1332 if endif != ('#endif // %s' % (cppvar + '_')):
1333 error_level = 5
1334
erg+personal@google.com05189642010-04-30 20:43:03 +00001335 ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1336 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001337 error(filename, endif_linenum, 'build/header_guard', error_level,
1338 '#endif line should be "#endif // %s"' % cppvar)
1339
1340
1341def CheckForUnicodeReplacementCharacters(filename, lines, error):
1342 """Logs an error for each line containing Unicode replacement characters.
1343
1344 These indicate that either the file contained invalid UTF-8 (likely)
1345 or Unicode replacement characters (which it shouldn't). Note that
1346 it's possible for this to throw off line numbering if the invalid
1347 UTF-8 occurred adjacent to a newline.
1348
1349 Args:
1350 filename: The name of the current file.
1351 lines: An array of strings, each representing a line of the file.
1352 error: The function to call with any errors found.
1353 """
1354 for linenum, line in enumerate(lines):
1355 if u'\ufffd' in line:
1356 error(filename, linenum, 'readability/utf8', 5,
1357 'Line contains invalid UTF-8 (or Unicode replacement character).')
1358
1359
1360def CheckForNewlineAtEOF(filename, lines, error):
1361 """Logs an error if there is no newline char at the end of the file.
1362
1363 Args:
1364 filename: The name of the current file.
1365 lines: An array of strings, each representing a line of the file.
1366 error: The function to call with any errors found.
1367 """
1368
1369 # The array lines() was created by adding two newlines to the
1370 # original file (go figure), then splitting on \n.
1371 # To verify that the file ends in \n, we just have to make sure the
1372 # last-but-two element of lines() exists and is empty.
1373 if len(lines) < 3 or lines[-2]:
1374 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1375 'Could not find a newline character at the end of the file.')
1376
1377
1378def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1379 """Logs an error if we see /* ... */ or "..." that extend past one line.
1380
1381 /* ... */ comments are legit inside macros, for one line.
1382 Otherwise, we prefer // comments, so it's ok to warn about the
1383 other. Likewise, it's ok for strings to extend across multiple
1384 lines, as long as a line continuation character (backslash)
1385 terminates each line. Although not currently prohibited by the C++
1386 style guide, it's ugly and unnecessary. We don't do well with either
1387 in this lint program, so we warn about both.
1388
1389 Args:
1390 filename: The name of the current file.
1391 clean_lines: A CleansedLines instance containing the file.
1392 linenum: The number of the line to check.
1393 error: The function to call with any errors found.
1394 """
1395 line = clean_lines.elided[linenum]
1396
1397 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1398 # second (escaped) slash may trigger later \" detection erroneously.
1399 line = line.replace('\\\\', '')
1400
1401 if line.count('/*') > line.count('*/'):
1402 error(filename, linenum, 'readability/multiline_comment', 5,
1403 'Complex multi-line /*...*/-style comment found. '
1404 'Lint may give bogus warnings. '
1405 'Consider replacing these with //-style comments, '
1406 'with #if 0...#endif, '
1407 'or with more clearly structured multi-line comments.')
1408
1409 if (line.count('"') - line.count('\\"')) % 2:
1410 error(filename, linenum, 'readability/multiline_string', 5,
1411 'Multi-line string ("...") found. This lint script doesn\'t '
1412 'do well with such strings, and may give bogus warnings. They\'re '
1413 'ugly and unnecessary, and you should use concatenation instead".')
1414
1415
1416threading_list = (
1417 ('asctime(', 'asctime_r('),
1418 ('ctime(', 'ctime_r('),
1419 ('getgrgid(', 'getgrgid_r('),
1420 ('getgrnam(', 'getgrnam_r('),
1421 ('getlogin(', 'getlogin_r('),
1422 ('getpwnam(', 'getpwnam_r('),
1423 ('getpwuid(', 'getpwuid_r('),
1424 ('gmtime(', 'gmtime_r('),
1425 ('localtime(', 'localtime_r('),
1426 ('rand(', 'rand_r('),
1427 ('readdir(', 'readdir_r('),
1428 ('strtok(', 'strtok_r('),
1429 ('ttyname(', 'ttyname_r('),
1430 )
1431
1432
1433def CheckPosixThreading(filename, clean_lines, linenum, error):
1434 """Checks for calls to thread-unsafe functions.
1435
1436 Much code has been originally written without consideration of
1437 multi-threading. Also, engineers are relying on their old experience;
1438 they have learned posix before threading extensions were added. These
1439 tests guide the engineers to use thread-safe functions (when using
1440 posix directly).
1441
1442 Args:
1443 filename: The name of the current file.
1444 clean_lines: A CleansedLines instance containing the file.
1445 linenum: The number of the line to check.
1446 error: The function to call with any errors found.
1447 """
1448 line = clean_lines.elided[linenum]
1449 for single_thread_function, multithread_safe_function in threading_list:
1450 ix = line.find(single_thread_function)
erg@google.coma87abb82009-02-24 01:41:01 +00001451 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001452 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1453 line[ix - 1] not in ('_', '.', '>'))):
1454 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1455 'Consider using ' + multithread_safe_function +
1456 '...) instead of ' + single_thread_function +
1457 '...) for improved thread safety.')
1458
1459
erg@google.coma868d2d2009-10-09 21:18:45 +00001460# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com36649102009-03-25 21:18:36 +00001461# incrementing a value.
erg@google.coma868d2d2009-10-09 21:18:45 +00001462_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com36649102009-03-25 21:18:36 +00001463 r'^\s*\*\w+(\+\+|--);')
1464
1465
1466def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.coma868d2d2009-10-09 21:18:45 +00001467 """Checks for invalid increment *count++.
erg@google.com36649102009-03-25 21:18:36 +00001468
1469 For example following function:
1470 void increment_counter(int* count) {
1471 *count++;
1472 }
1473 is invalid, because it effectively does count++, moving pointer, and should
1474 be replaced with ++*count, (*count)++ or *count += 1.
1475
1476 Args:
1477 filename: The name of the current file.
1478 clean_lines: A CleansedLines instance containing the file.
1479 linenum: The number of the line to check.
1480 error: The function to call with any errors found.
1481 """
1482 line = clean_lines.elided[linenum]
erg@google.coma868d2d2009-10-09 21:18:45 +00001483 if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com36649102009-03-25 21:18:36 +00001484 error(filename, linenum, 'runtime/invalid_increment', 5,
1485 'Changing pointer instead of value (or unused value of operator*).')
1486
1487
erg@google.comd350fe52013-01-14 17:51:48 +00001488class _BlockInfo(object):
1489 """Stores information about a generic block of code."""
1490
1491 def __init__(self, seen_open_brace):
1492 self.seen_open_brace = seen_open_brace
1493 self.open_parentheses = 0
1494 self.inline_asm = _NO_ASM
1495
1496 def CheckBegin(self, filename, clean_lines, linenum, error):
1497 """Run checks that applies to text up to the opening brace.
1498
1499 This is mostly for checking the text after the class identifier
1500 and the "{", usually where the base class is specified. For other
1501 blocks, there isn't much to check, so we always pass.
1502
1503 Args:
1504 filename: The name of the current file.
1505 clean_lines: A CleansedLines instance containing the file.
1506 linenum: The number of the line to check.
1507 error: The function to call with any errors found.
1508 """
1509 pass
1510
1511 def CheckEnd(self, filename, clean_lines, linenum, error):
1512 """Run checks that applies to text after the closing brace.
1513
1514 This is mostly used for checking end of namespace comments.
1515
1516 Args:
1517 filename: The name of the current file.
1518 clean_lines: A CleansedLines instance containing the file.
1519 linenum: The number of the line to check.
1520 error: The function to call with any errors found.
1521 """
1522 pass
1523
1524
1525class _ClassInfo(_BlockInfo):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001526 """Stores information about a class."""
1527
erg@google.comd350fe52013-01-14 17:51:48 +00001528 def __init__(self, name, class_or_struct, clean_lines, linenum):
1529 _BlockInfo.__init__(self, False)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001530 self.name = name
erg@google.comd350fe52013-01-14 17:51:48 +00001531 self.starting_linenum = linenum
erg@google.com4e00b9a2009-01-12 23:05:11 +00001532 self.is_derived = False
erg@google.comd350fe52013-01-14 17:51:48 +00001533 if class_or_struct == 'struct':
1534 self.access = 'public'
erg@google.comfd5da632013-10-25 17:39:45 +00001535 self.is_struct = True
erg@google.comd350fe52013-01-14 17:51:48 +00001536 else:
1537 self.access = 'private'
erg@google.comfd5da632013-10-25 17:39:45 +00001538 self.is_struct = False
1539
1540 # Remember initial indentation level for this class. Using raw_lines here
1541 # instead of elided to account for leading comments like http://go/abjhm
1542 initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
1543 if initial_indent:
1544 self.class_indent = len(initial_indent.group(1))
1545 else:
1546 self.class_indent = 0
erg@google.com4e00b9a2009-01-12 23:05:11 +00001547
erg@google.com8a95ecc2011-09-08 00:45:54 +00001548 # Try to find the end of the class. This will be confused by things like:
1549 # class A {
1550 # } *x = { ...
1551 #
1552 # But it's still good enough for CheckSectionSpacing.
1553 self.last_line = 0
1554 depth = 0
1555 for i in range(linenum, clean_lines.NumLines()):
erg@google.comd350fe52013-01-14 17:51:48 +00001556 line = clean_lines.elided[i]
erg@google.com8a95ecc2011-09-08 00:45:54 +00001557 depth += line.count('{') - line.count('}')
1558 if not depth:
1559 self.last_line = i
1560 break
1561
erg@google.comd350fe52013-01-14 17:51:48 +00001562 def CheckBegin(self, filename, clean_lines, linenum, error):
1563 # Look for a bare ':'
1564 if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1565 self.is_derived = True
erg@google.com4e00b9a2009-01-12 23:05:11 +00001566
erg@google.comfd5da632013-10-25 17:39:45 +00001567 def CheckEnd(self, filename, clean_lines, linenum, error):
1568 # Check that closing brace is aligned with beginning of the class.
1569 # Only do this if the closing brace is indented by only whitespaces.
1570 # This means we will not check single-line class definitions.
1571 indent = Match(r'^( *)\}', clean_lines.elided[linenum])
1572 if indent and len(indent.group(1)) != self.class_indent:
1573 if self.is_struct:
1574 parent = 'struct ' + self.name
1575 else:
1576 parent = 'class ' + self.name
1577 error(filename, linenum, 'whitespace/indent', 3,
1578 'Closing brace should be aligned with beginning of %s' % parent)
1579
erg@google.com4e00b9a2009-01-12 23:05:11 +00001580
erg@google.comd350fe52013-01-14 17:51:48 +00001581class _NamespaceInfo(_BlockInfo):
1582 """Stores information about a namespace."""
1583
1584 def __init__(self, name, linenum):
1585 _BlockInfo.__init__(self, False)
1586 self.name = name or ''
1587 self.starting_linenum = linenum
1588
1589 def CheckEnd(self, filename, clean_lines, linenum, error):
1590 """Check end of namespace comments."""
1591 line = clean_lines.raw_lines[linenum]
1592
1593 # Check how many lines is enclosed in this namespace. Don't issue
1594 # warning for missing namespace comments if there aren't enough
1595 # lines. However, do apply checks if there is already an end of
1596 # namespace comment and it's incorrect.
1597 #
1598 # TODO(unknown): We always want to check end of namespace comments
1599 # if a namespace is large, but sometimes we also want to apply the
1600 # check if a short namespace contained nontrivial things (something
1601 # other than forward declarations). There is currently no logic on
1602 # deciding what these nontrivial things are, so this check is
1603 # triggered by namespace size only, which works most of the time.
1604 if (linenum - self.starting_linenum < 10
1605 and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
1606 return
1607
1608 # Look for matching comment at end of namespace.
1609 #
1610 # Note that we accept C style "/* */" comments for terminating
1611 # namespaces, so that code that terminate namespaces inside
1612 # preprocessor macros can be cpplint clean. Example: http://go/nxpiz
1613 #
1614 # We also accept stuff like "// end of namespace <name>." with the
1615 # period at the end.
1616 #
1617 # Besides these, we don't accept anything else, otherwise we might
1618 # get false negatives when existing comment is a substring of the
1619 # expected namespace. Example: http://go/ldkdc, http://cl/23548205
1620 if self.name:
1621 # Named namespace
1622 if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
1623 r'[\*/\.\\\s]*$'),
1624 line):
1625 error(filename, linenum, 'readability/namespace', 5,
1626 'Namespace should be terminated with "// namespace %s"' %
1627 self.name)
1628 else:
1629 # Anonymous namespace
1630 if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
1631 error(filename, linenum, 'readability/namespace', 5,
1632 'Namespace should be terminated with "// namespace"')
1633
1634
1635class _PreprocessorInfo(object):
1636 """Stores checkpoints of nesting stacks when #if/#else is seen."""
1637
1638 def __init__(self, stack_before_if):
1639 # The entire nesting stack before #if
1640 self.stack_before_if = stack_before_if
1641
1642 # The entire nesting stack up to #else
1643 self.stack_before_else = []
1644
1645 # Whether we have already seen #else or #elif
1646 self.seen_else = False
1647
1648
1649class _NestingState(object):
1650 """Holds states related to parsing braces."""
erg@google.com4e00b9a2009-01-12 23:05:11 +00001651
1652 def __init__(self):
erg@google.comd350fe52013-01-14 17:51:48 +00001653 # Stack for tracking all braces. An object is pushed whenever we
1654 # see a "{", and popped when we see a "}". Only 3 types of
1655 # objects are possible:
1656 # - _ClassInfo: a class or struct.
1657 # - _NamespaceInfo: a namespace.
1658 # - _BlockInfo: some other type of block.
1659 self.stack = []
erg@google.com4e00b9a2009-01-12 23:05:11 +00001660
erg@google.comd350fe52013-01-14 17:51:48 +00001661 # Stack of _PreprocessorInfo objects.
1662 self.pp_stack = []
1663
1664 def SeenOpenBrace(self):
1665 """Check if we have seen the opening brace for the innermost block.
1666
1667 Returns:
1668 True if we have seen the opening brace, False if the innermost
1669 block is still expecting an opening brace.
1670 """
1671 return (not self.stack) or self.stack[-1].seen_open_brace
1672
1673 def InNamespaceBody(self):
1674 """Check if we are currently one level inside a namespace body.
1675
1676 Returns:
1677 True if top of the stack is a namespace block, False otherwise.
1678 """
1679 return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
1680
1681 def UpdatePreprocessor(self, line):
1682 """Update preprocessor stack.
1683
1684 We need to handle preprocessors due to classes like this:
1685 #ifdef SWIG
1686 struct ResultDetailsPageElementExtensionPoint {
1687 #else
1688 struct ResultDetailsPageElementExtensionPoint : public Extension {
1689 #endif
1690 (see http://go/qwddn for original example)
1691
1692 We make the following assumptions (good enough for most files):
1693 - Preprocessor condition evaluates to true from #if up to first
1694 #else/#elif/#endif.
1695
1696 - Preprocessor condition evaluates to false from #else/#elif up
1697 to #endif. We still perform lint checks on these lines, but
1698 these do not affect nesting stack.
1699
1700 Args:
1701 line: current line to check.
1702 """
1703 if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
1704 # Beginning of #if block, save the nesting stack here. The saved
1705 # stack will allow us to restore the parsing state in the #else case.
1706 self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
1707 elif Match(r'^\s*#\s*(else|elif)\b', line):
1708 # Beginning of #else block
1709 if self.pp_stack:
1710 if not self.pp_stack[-1].seen_else:
1711 # This is the first #else or #elif block. Remember the
1712 # whole nesting stack up to this point. This is what we
1713 # keep after the #endif.
1714 self.pp_stack[-1].seen_else = True
1715 self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
1716
1717 # Restore the stack to how it was before the #if
1718 self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
1719 else:
1720 # TODO(unknown): unexpected #else, issue warning?
1721 pass
1722 elif Match(r'^\s*#\s*endif\b', line):
1723 # End of #if or #else blocks.
1724 if self.pp_stack:
1725 # If we saw an #else, we will need to restore the nesting
1726 # stack to its former state before the #else, otherwise we
1727 # will just continue from where we left off.
1728 if self.pp_stack[-1].seen_else:
1729 # Here we can just use a shallow copy since we are the last
1730 # reference to it.
1731 self.stack = self.pp_stack[-1].stack_before_else
1732 # Drop the corresponding #if
1733 self.pp_stack.pop()
1734 else:
1735 # TODO(unknown): unexpected #endif, issue warning?
1736 pass
1737
1738 def Update(self, filename, clean_lines, linenum, error):
1739 """Update nesting state with current line.
1740
1741 Args:
1742 filename: The name of the current file.
1743 clean_lines: A CleansedLines instance containing the file.
1744 linenum: The number of the line to check.
1745 error: The function to call with any errors found.
1746 """
1747 line = clean_lines.elided[linenum]
1748
1749 # Update pp_stack first
1750 self.UpdatePreprocessor(line)
1751
1752 # Count parentheses. This is to avoid adding struct arguments to
1753 # the nesting stack.
1754 if self.stack:
1755 inner_block = self.stack[-1]
1756 depth_change = line.count('(') - line.count(')')
1757 inner_block.open_parentheses += depth_change
1758
1759 # Also check if we are starting or ending an inline assembly block.
1760 if inner_block.inline_asm in (_NO_ASM, _END_ASM):
1761 if (depth_change != 0 and
1762 inner_block.open_parentheses == 1 and
1763 _MATCH_ASM.match(line)):
1764 # Enter assembly block
1765 inner_block.inline_asm = _INSIDE_ASM
1766 else:
1767 # Not entering assembly block. If previous line was _END_ASM,
1768 # we will now shift to _NO_ASM state.
1769 inner_block.inline_asm = _NO_ASM
1770 elif (inner_block.inline_asm == _INSIDE_ASM and
1771 inner_block.open_parentheses == 0):
1772 # Exit assembly block
1773 inner_block.inline_asm = _END_ASM
1774
1775 # Consume namespace declaration at the beginning of the line. Do
1776 # this in a loop so that we catch same line declarations like this:
1777 # namespace proto2 { namespace bridge { class MessageSet; } }
1778 while True:
1779 # Match start of namespace. The "\b\s*" below catches namespace
1780 # declarations even if it weren't followed by a whitespace, this
1781 # is so that we don't confuse our namespace checker. The
1782 # missing spaces will be flagged by CheckSpacing.
1783 namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
1784 if not namespace_decl_match:
1785 break
1786
1787 new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
1788 self.stack.append(new_namespace)
1789
1790 line = namespace_decl_match.group(2)
1791 if line.find('{') != -1:
1792 new_namespace.seen_open_brace = True
1793 line = line[line.find('{') + 1:]
1794
1795 # Look for a class declaration in whatever is left of the line
1796 # after parsing namespaces. The regexp accounts for decorated classes
1797 # such as in:
1798 # class LOCKABLE API Object {
1799 # };
1800 #
1801 # Templates with class arguments may confuse the parser, for example:
1802 # template <class T
1803 # class Comparator = less<T>,
1804 # class Vector = vector<T> >
1805 # class HeapQueue {
1806 #
1807 # Because this parser has no nesting state about templates, by the
1808 # time it saw "class Comparator", it may think that it's a new class.
1809 # Nested templates have a similar problem:
1810 # template <
1811 # typename ExportedType,
1812 # typename TupleType,
1813 # template <typename, typename> class ImplTemplate>
1814 #
1815 # To avoid these cases, we ignore classes that are followed by '=' or '>'
1816 class_decl_match = Match(
1817 r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
erg@google.comfd5da632013-10-25 17:39:45 +00001818 r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
1819 r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line)
erg@google.comd350fe52013-01-14 17:51:48 +00001820 if (class_decl_match and
1821 (not self.stack or self.stack[-1].open_parentheses == 0)):
1822 self.stack.append(_ClassInfo(
1823 class_decl_match.group(4), class_decl_match.group(2),
1824 clean_lines, linenum))
1825 line = class_decl_match.group(5)
1826
1827 # If we have not yet seen the opening brace for the innermost block,
1828 # run checks here.
1829 if not self.SeenOpenBrace():
1830 self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
1831
1832 # Update access control if we are inside a class/struct
1833 if self.stack and isinstance(self.stack[-1], _ClassInfo):
erg@google.comfd5da632013-10-25 17:39:45 +00001834 classinfo = self.stack[-1]
1835 access_match = Match(
1836 r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
1837 r':(?:[^:]|$)',
1838 line)
erg@google.comd350fe52013-01-14 17:51:48 +00001839 if access_match:
erg@google.comfd5da632013-10-25 17:39:45 +00001840 classinfo.access = access_match.group(2)
1841
1842 # Check that access keywords are indented +1 space. Skip this
1843 # check if the keywords are not preceded by whitespaces, examples:
1844 # http://go/cfudb + http://go/vxnkk
1845 indent = access_match.group(1)
1846 if (len(indent) != classinfo.class_indent + 1 and
1847 Match(r'^\s*$', indent)):
1848 if classinfo.is_struct:
1849 parent = 'struct ' + classinfo.name
1850 else:
1851 parent = 'class ' + classinfo.name
1852 slots = ''
1853 if access_match.group(3):
1854 slots = access_match.group(3)
1855 error(filename, linenum, 'whitespace/indent', 3,
1856 '%s%s: should be indented +1 space inside %s' % (
1857 access_match.group(2), slots, parent))
erg@google.comd350fe52013-01-14 17:51:48 +00001858
1859 # Consume braces or semicolons from what's left of the line
1860 while True:
1861 # Match first brace, semicolon, or closed parenthesis.
1862 matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
1863 if not matched:
1864 break
1865
1866 token = matched.group(1)
1867 if token == '{':
1868 # If namespace or class hasn't seen a opening brace yet, mark
1869 # namespace/class head as complete. Push a new block onto the
1870 # stack otherwise.
1871 if not self.SeenOpenBrace():
1872 self.stack[-1].seen_open_brace = True
1873 else:
1874 self.stack.append(_BlockInfo(True))
1875 if _MATCH_ASM.match(line):
1876 self.stack[-1].inline_asm = _BLOCK_ASM
1877 elif token == ';' or token == ')':
1878 # If we haven't seen an opening brace yet, but we already saw
1879 # a semicolon, this is probably a forward declaration. Pop
1880 # the stack for these.
1881 #
1882 # Similarly, if we haven't seen an opening brace yet, but we
1883 # already saw a closing parenthesis, then these are probably
1884 # function arguments with extra "class" or "struct" keywords.
1885 # Also pop these stack for these.
1886 if not self.SeenOpenBrace():
1887 self.stack.pop()
1888 else: # token == '}'
1889 # Perform end of block checks and pop the stack.
1890 if self.stack:
1891 self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
1892 self.stack.pop()
1893 line = matched.group(2)
1894
1895 def InnermostClass(self):
1896 """Get class info on the top of the stack.
1897
1898 Returns:
1899 A _ClassInfo object if we are inside a class, or None otherwise.
1900 """
1901 for i in range(len(self.stack), 0, -1):
1902 classinfo = self.stack[i - 1]
1903 if isinstance(classinfo, _ClassInfo):
1904 return classinfo
1905 return None
1906
1907 def CheckClassFinished(self, filename, error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001908 """Checks that all classes have been completely parsed.
1909
1910 Call this when all lines in a file have been processed.
1911 Args:
1912 filename: The name of the current file.
1913 error: The function to call with any errors found.
1914 """
erg@google.comd350fe52013-01-14 17:51:48 +00001915 # Note: This test can result in false positives if #ifdef constructs
1916 # get in the way of brace matching. See the testBuildClass test in
1917 # cpplint_unittest.py for an example of this.
1918 for obj in self.stack:
1919 if isinstance(obj, _ClassInfo):
1920 error(filename, obj.starting_linenum, 'build/class', 5,
1921 'Failed to find complete declaration of class %s' %
1922 obj.name)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001923
1924
1925def CheckForNonStandardConstructs(filename, clean_lines, linenum,
erg@google.comd350fe52013-01-14 17:51:48 +00001926 nesting_state, error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001927 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1928
1929 Complain about several constructs which gcc-2 accepts, but which are
1930 not standard C++. Warning about these in lint is one way to ease the
1931 transition to new compilers.
1932 - put storage class first (e.g. "static const" instead of "const static").
1933 - "%lld" instead of %qd" in printf-type functions.
1934 - "%1$d" is non-standard in printf-type functions.
1935 - "\%" is an undefined character escape sequence.
1936 - text after #endif is not allowed.
1937 - invalid inner-style forward declaration.
1938 - >? and <? operators, and their >?= and <?= cousins.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001939
erg@google.coma868d2d2009-10-09 21:18:45 +00001940 Additionally, check for constructor/destructor style violations and reference
1941 members, as it is very convenient to do so while checking for
1942 gcc-2 compliance.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001943
1944 Args:
1945 filename: The name of the current file.
1946 clean_lines: A CleansedLines instance containing the file.
1947 linenum: The number of the line to check.
erg@google.comd350fe52013-01-14 17:51:48 +00001948 nesting_state: A _NestingState instance which maintains information about
1949 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001950 error: A callable to which errors are reported, which takes 4 arguments:
1951 filename, line number, error level, and message
1952 """
1953
1954 # Remove comments from the line, but leave in strings for now.
1955 line = clean_lines.lines[linenum]
1956
1957 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1958 error(filename, linenum, 'runtime/printf_format', 3,
1959 '%q in format strings is deprecated. Use %ll instead.')
1960
1961 if Search(r'printf\s*\(.*".*%\d+\$', line):
1962 error(filename, linenum, 'runtime/printf_format', 2,
1963 '%N$ formats are unconventional. Try rewriting to avoid them.')
1964
1965 # Remove escaped backslashes before looking for undefined escapes.
1966 line = line.replace('\\\\', '')
1967
1968 if Search(r'("|\').*\\(%|\[|\(|{)', line):
1969 error(filename, linenum, 'build/printf_format', 3,
1970 '%, [, (, and { are undefined character escapes. Unescape them.')
1971
1972 # For the rest, work with both comments and strings removed.
1973 line = clean_lines.elided[linenum]
1974
1975 if Search(r'\b(const|volatile|void|char|short|int|long'
1976 r'|float|double|signed|unsigned'
1977 r'|schar|u?int8|u?int16|u?int32|u?int64)'
erg@google.comd350fe52013-01-14 17:51:48 +00001978 r'\s+(register|static|extern|typedef)\b',
erg@google.com4e00b9a2009-01-12 23:05:11 +00001979 line):
1980 error(filename, linenum, 'build/storage_class', 5,
1981 'Storage class (static, extern, typedef, etc) should be first.')
1982
1983 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1984 error(filename, linenum, 'build/endif_comment', 5,
1985 'Uncommented text after #endif is non-standard. Use a comment.')
1986
1987 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1988 error(filename, linenum, 'build/forward_decl', 5,
1989 'Inner-style forward declarations are invalid. Remove this line.')
1990
1991 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1992 line):
1993 error(filename, linenum, 'build/deprecated', 3,
1994 '>? and <? (max and min) operators are non-standard and deprecated.')
1995
erg@google.coma868d2d2009-10-09 21:18:45 +00001996 if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
1997 # TODO(unknown): Could it be expanded safely to arbitrary references,
1998 # without triggering too many false positives? The first
1999 # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2000 # the restriction.
2001 # Here's the original regexp, for the reference:
2002 # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2003 # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2004 error(filename, linenum, 'runtime/member_string_references', 2,
2005 'const string& members are dangerous. It is much better to use '
2006 'alternatives, such as pointers or simple constants.')
2007
erg@google.comd350fe52013-01-14 17:51:48 +00002008 # Everything else in this function operates on class declarations.
2009 # Return early if the top of the nesting stack is not a class, or if
2010 # the class head is not completed yet.
2011 classinfo = nesting_state.InnermostClass()
2012 if not classinfo or not classinfo.seen_open_brace:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002013 return
2014
erg@google.com4e00b9a2009-01-12 23:05:11 +00002015 # The class may have been declared with namespace or classname qualifiers.
2016 # The constructor and destructor will not have those qualifiers.
2017 base_classname = classinfo.name.split('::')[-1]
2018
2019 # Look for single-argument constructors that aren't marked explicit.
2020 # Technically a valid construct, but against style.
erg@google.com8a95ecc2011-09-08 00:45:54 +00002021 args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
erg@google.com4e00b9a2009-01-12 23:05:11 +00002022 % re.escape(base_classname),
2023 line)
2024 if (args and
2025 args.group(1) != 'void' and
erg@google.comfd5da632013-10-25 17:39:45 +00002026 not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
2027 % re.escape(base_classname), args.group(1).strip())):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002028 error(filename, linenum, 'runtime/explicit', 5,
2029 'Single-argument constructors should be marked explicit.')
2030
erg@google.com4e00b9a2009-01-12 23:05:11 +00002031
2032def CheckSpacingForFunctionCall(filename, line, linenum, error):
2033 """Checks for the correctness of various spacing around function calls.
2034
2035 Args:
2036 filename: The name of the current file.
2037 line: The text of the line to check.
2038 linenum: The number of the line to check.
2039 error: The function to call with any errors found.
2040 """
2041
2042 # Since function calls often occur inside if/for/while/switch
2043 # expressions - which have their own, more liberal conventions - we
2044 # first see if we should be looking inside such an expression for a
2045 # function call, to which we can apply more strict standards.
2046 fncall = line # if there's no control flow construct, look at whole line
2047 for pattern in (r'\bif\s*\((.*)\)\s*{',
2048 r'\bfor\s*\((.*)\)\s*{',
2049 r'\bwhile\s*\((.*)\)\s*[{;]',
2050 r'\bswitch\s*\((.*)\)\s*{'):
2051 match = Search(pattern, line)
2052 if match:
2053 fncall = match.group(1) # look inside the parens for function calls
2054 break
2055
2056 # Except in if/for/while/switch, there should never be space
2057 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
2058 # for nested parens ( (a+b) + c ). Likewise, there should never be
2059 # a space before a ( when it's a function argument. I assume it's a
2060 # function argument when the char before the whitespace is legal in
2061 # a function name (alnum + _) and we're not starting a macro. Also ignore
2062 # pointers and references to arrays and functions coz they're too tricky:
2063 # we use a very simple way to recognize these:
2064 # " (something)(maybe-something)" or
2065 # " (something)(maybe-something," or
2066 # " (something)[something]"
2067 # Note that we assume the contents of [] to be short enough that
2068 # they'll never need to wrap.
2069 if ( # Ignore control structures.
erg@google.comfd5da632013-10-25 17:39:45 +00002070 not Search(r'\b(if|for|while|switch|return|delete|catch)\b', fncall) and
erg@google.com4e00b9a2009-01-12 23:05:11 +00002071 # Ignore pointers/references to functions.
2072 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2073 # Ignore pointers/references to arrays.
2074 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com36649102009-03-25 21:18:36 +00002075 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
erg@google.com4e00b9a2009-01-12 23:05:11 +00002076 error(filename, linenum, 'whitespace/parens', 4,
2077 'Extra space after ( in function call')
erg@google.com36649102009-03-25 21:18:36 +00002078 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002079 error(filename, linenum, 'whitespace/parens', 2,
2080 'Extra space after (')
2081 if (Search(r'\w\s+\(', fncall) and
erg@google.comd350fe52013-01-14 17:51:48 +00002082 not Search(r'#\s*define|typedef', fncall) and
2083 not Search(r'\w\s+\((\w+::)?\*\w+\)\(', fncall)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002084 error(filename, linenum, 'whitespace/parens', 4,
2085 'Extra space before ( in function call')
2086 # If the ) is followed only by a newline or a { + newline, assume it's
2087 # part of a control statement (if/while/etc), and don't complain
2088 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002089 # If the closing parenthesis is preceded by only whitespaces,
2090 # try to give a more descriptive error message.
2091 if Search(r'^\s+\)', fncall):
2092 error(filename, linenum, 'whitespace/parens', 2,
2093 'Closing ) should be moved to the previous line')
2094 else:
2095 error(filename, linenum, 'whitespace/parens', 2,
2096 'Extra space before )')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002097
2098
2099def IsBlankLine(line):
2100 """Returns true if the given line is blank.
2101
2102 We consider a line to be blank if the line is empty or consists of
2103 only white spaces.
2104
2105 Args:
2106 line: A line of a string.
2107
2108 Returns:
2109 True, if the given line is blank.
2110 """
2111 return not line or line.isspace()
2112
2113
2114def CheckForFunctionLengths(filename, clean_lines, linenum,
2115 function_state, error):
2116 """Reports for long function bodies.
2117
2118 For an overview why this is done, see:
2119 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2120
2121 Uses a simplistic algorithm assuming other style guidelines
2122 (especially spacing) are followed.
2123 Only checks unindented functions, so class members are unchecked.
2124 Trivial bodies are unchecked, so constructors with huge initializer lists
2125 may be missed.
2126 Blank/comment lines are not counted so as to avoid encouraging the removal
erg@google.com8a95ecc2011-09-08 00:45:54 +00002127 of vertical space and comments just to get through a lint check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002128 NOLINT *on the last line of a function* disables this check.
2129
2130 Args:
2131 filename: The name of the current file.
2132 clean_lines: A CleansedLines instance containing the file.
2133 linenum: The number of the line to check.
2134 function_state: Current function name and lines in body so far.
2135 error: The function to call with any errors found.
2136 """
2137 lines = clean_lines.lines
2138 line = lines[linenum]
2139 raw = clean_lines.raw_lines
2140 raw_line = raw[linenum]
2141 joined_line = ''
2142
2143 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00002144 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00002145 match_result = Match(regexp, line)
2146 if match_result:
2147 # If the name is all caps and underscores, figure it's a macro and
2148 # ignore it, unless it's TEST or TEST_F.
2149 function_name = match_result.group(1).split()[-1]
2150 if function_name == 'TEST' or function_name == 'TEST_F' or (
2151 not Match(r'[A-Z_]+$', function_name)):
2152 starting_func = True
2153
2154 if starting_func:
2155 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00002156 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002157 start_line = lines[start_linenum]
2158 joined_line += ' ' + start_line.lstrip()
2159 if Search(r'(;|})', start_line): # Declarations and trivial functions
2160 body_found = True
2161 break # ... ignore
2162 elif Search(r'{', start_line):
2163 body_found = True
2164 function = Search(r'((\w|:)*)\(', line).group(1)
2165 if Match(r'TEST', function): # Handle TEST... macros
2166 parameter_regexp = Search(r'(\(.*\))', joined_line)
2167 if parameter_regexp: # Ignore bad syntax
2168 function += parameter_regexp.group(1)
2169 else:
2170 function += '()'
2171 function_state.Begin(function)
2172 break
2173 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00002174 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002175 error(filename, linenum, 'readability/fn_size', 5,
2176 'Lint failed to find start of function body.')
2177 elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com05189642010-04-30 20:43:03 +00002178 function_state.Check(error, filename, linenum)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002179 function_state.End()
2180 elif not Match(r'^\s*$', line):
2181 function_state.Count() # Count non-blank/non-comment lines.
2182
2183
2184_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2185
2186
2187def CheckComment(comment, filename, linenum, error):
2188 """Checks for common mistakes in TODO comments.
2189
2190 Args:
2191 comment: The text of the comment from the line in question.
2192 filename: The name of the current file.
2193 linenum: The number of the line to check.
2194 error: The function to call with any errors found.
2195 """
2196 match = _RE_PATTERN_TODO.match(comment)
2197 if match:
2198 # One whitespace is correct; zero whitespace is handled elsewhere.
2199 leading_whitespace = match.group(1)
2200 if len(leading_whitespace) > 1:
2201 error(filename, linenum, 'whitespace/todo', 2,
2202 'Too many spaces before TODO')
2203
2204 username = match.group(2)
2205 if not username:
2206 error(filename, linenum, 'readability/todo', 2,
2207 'Missing username in TODO; it should look like '
2208 '"// TODO(my_username): Stuff."')
2209
2210 middle_whitespace = match.group(3)
erg@google.coma87abb82009-02-24 01:41:01 +00002211 # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00002212 if middle_whitespace != ' ' and middle_whitespace != '':
2213 error(filename, linenum, 'whitespace/todo', 2,
2214 'TODO(my_username) should be followed by a space')
2215
erg@google.comd350fe52013-01-14 17:51:48 +00002216def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2217 """Checks for improper use of DISALLOW* macros.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002218
erg@google.comd350fe52013-01-14 17:51:48 +00002219 Args:
2220 filename: The name of the current file.
2221 clean_lines: A CleansedLines instance containing the file.
2222 linenum: The number of the line to check.
2223 nesting_state: A _NestingState instance which maintains information about
2224 the current stack of nested blocks being parsed.
2225 error: The function to call with any errors found.
2226 """
2227 line = clean_lines.elided[linenum] # get rid of comments and strings
2228
2229 matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2230 r'DISALLOW_EVIL_CONSTRUCTORS|'
2231 r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2232 if not matched:
2233 return
2234 if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2235 if nesting_state.stack[-1].access != 'private':
2236 error(filename, linenum, 'readability/constructors', 3,
2237 '%s must be in the private: section' % matched.group(1))
2238
2239 else:
2240 # Found DISALLOW* macro outside a class declaration, or perhaps it
2241 # was used inside a function when it should have been part of the
2242 # class declaration. We could issue a warning here, but it
2243 # probably resulted in a compiler error already.
2244 pass
2245
2246
2247def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
2248 """Find the corresponding > to close a template.
2249
2250 Args:
2251 clean_lines: A CleansedLines instance containing the file.
2252 linenum: Current line number.
2253 init_suffix: Remainder of the current line after the initial <.
2254
2255 Returns:
2256 True if a matching bracket exists.
2257 """
2258 line = init_suffix
2259 nesting_stack = ['<']
2260 while True:
2261 # Find the next operator that can tell us whether < is used as an
2262 # opening bracket or as a less-than operator. We only want to
2263 # warn on the latter case.
2264 #
2265 # We could also check all other operators and terminate the search
2266 # early, e.g. if we got something like this "a<b+c", the "<" is
2267 # most likely a less-than operator, but then we will get false
2268 # positives for default arguments (e.g. http://go/prccd) and
2269 # other template expressions (e.g. http://go/oxcjq).
2270 match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
2271 if match:
2272 # Found an operator, update nesting stack
2273 operator = match.group(1)
2274 line = match.group(2)
2275
2276 if nesting_stack[-1] == '<':
2277 # Expecting closing angle bracket
2278 if operator in ('<', '(', '['):
2279 nesting_stack.append(operator)
2280 elif operator == '>':
2281 nesting_stack.pop()
2282 if not nesting_stack:
2283 # Found matching angle bracket
2284 return True
2285 elif operator == ',':
2286 # Got a comma after a bracket, this is most likely a template
2287 # argument. We have not seen a closing angle bracket yet, but
2288 # it's probably a few lines later if we look for it, so just
2289 # return early here.
2290 return True
2291 else:
2292 # Got some other operator.
2293 return False
2294
2295 else:
2296 # Expecting closing parenthesis or closing bracket
2297 if operator in ('<', '(', '['):
2298 nesting_stack.append(operator)
2299 elif operator in (')', ']'):
2300 # We don't bother checking for matching () or []. If we got
2301 # something like (] or [), it would have been a syntax error.
2302 nesting_stack.pop()
2303
2304 else:
2305 # Scan the next line
2306 linenum += 1
2307 if linenum >= len(clean_lines.elided):
2308 break
2309 line = clean_lines.elided[linenum]
2310
2311 # Exhausted all remaining lines and still no matching angle bracket.
2312 # Most likely the input was incomplete, otherwise we should have
2313 # seen a semicolon and returned early.
2314 return True
2315
2316
2317def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
2318 """Find the corresponding < that started a template.
2319
2320 Args:
2321 clean_lines: A CleansedLines instance containing the file.
2322 linenum: Current line number.
2323 init_prefix: Part of the current line before the initial >.
2324
2325 Returns:
2326 True if a matching bracket exists.
2327 """
2328 line = init_prefix
2329 nesting_stack = ['>']
2330 while True:
2331 # Find the previous operator
2332 match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
2333 if match:
2334 # Found an operator, update nesting stack
2335 operator = match.group(2)
2336 line = match.group(1)
2337
2338 if nesting_stack[-1] == '>':
2339 # Expecting opening angle bracket
2340 if operator in ('>', ')', ']'):
2341 nesting_stack.append(operator)
2342 elif operator == '<':
2343 nesting_stack.pop()
2344 if not nesting_stack:
2345 # Found matching angle bracket
2346 return True
2347 elif operator == ',':
2348 # Got a comma before a bracket, this is most likely a
2349 # template argument. The opening angle bracket is probably
2350 # there if we look for it, so just return early here.
2351 return True
2352 else:
2353 # Got some other operator.
2354 return False
2355
2356 else:
2357 # Expecting opening parenthesis or opening bracket
2358 if operator in ('>', ')', ']'):
2359 nesting_stack.append(operator)
2360 elif operator in ('(', '['):
2361 nesting_stack.pop()
2362
2363 else:
2364 # Scan the previous line
2365 linenum -= 1
2366 if linenum < 0:
2367 break
2368 line = clean_lines.elided[linenum]
2369
2370 # Exhausted all earlier lines and still no matching angle bracket.
2371 return False
2372
2373
2374def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002375 """Checks for the correctness of various spacing issues in the code.
2376
2377 Things we check for: spaces around operators, spaces after
2378 if/for/while/switch, no spaces around parens in function calls, two
2379 spaces between code and comment, don't start a block with a blank
erg@google.com8a95ecc2011-09-08 00:45:54 +00002380 line, don't end a function with a blank line, don't add a blank line
2381 after public/protected/private, don't have too many blank lines in a row.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002382
2383 Args:
2384 filename: The name of the current file.
2385 clean_lines: A CleansedLines instance containing the file.
2386 linenum: The number of the line to check.
erg@google.comd350fe52013-01-14 17:51:48 +00002387 nesting_state: A _NestingState instance which maintains information about
2388 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002389 error: The function to call with any errors found.
2390 """
2391
2392 raw = clean_lines.raw_lines
2393 line = raw[linenum]
2394
2395 # Before nixing comments, check if the line is blank for no good
2396 # reason. This includes the first line after a block is opened, and
2397 # blank lines at the end of a function (ie, right before a line like '}'
erg@google.comd350fe52013-01-14 17:51:48 +00002398 #
2399 # Skip all the blank line checks if we are immediately inside a
2400 # namespace body. In other words, don't issue blank line warnings
2401 # for this block:
2402 # namespace {
2403 #
2404 # }
2405 #
2406 # A warning about missing end of namespace comments will be issued instead.
2407 if IsBlankLine(line) and not nesting_state.InNamespaceBody():
erg@google.com4e00b9a2009-01-12 23:05:11 +00002408 elided = clean_lines.elided
2409 prev_line = elided[linenum - 1]
2410 prevbrace = prev_line.rfind('{')
2411 # TODO(unknown): Don't complain if line before blank line, and line after,
2412 # both start with alnums and are indented the same amount.
2413 # This ignores whitespace at the start of a namespace block
2414 # because those are not usually indented.
erg@google.comd350fe52013-01-14 17:51:48 +00002415 if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002416 # OK, we have a blank line at the start of a code block. Before we
2417 # complain, we check if it is an exception to the rule: The previous
erg@google.com8a95ecc2011-09-08 00:45:54 +00002418 # non-empty line has the parameters of a function header that are indented
erg@google.com4e00b9a2009-01-12 23:05:11 +00002419 # 4 spaces (because they did not fit in a 80 column line when placed on
2420 # the same line as the function name). We also check for the case where
2421 # the previous line is indented 6 spaces, which may happen when the
2422 # initializers of a constructor do not fit into a 80 column line.
2423 exception = False
2424 if Match(r' {6}\w', prev_line): # Initializer list?
2425 # We are looking for the opening column of initializer list, which
2426 # should be indented 4 spaces to cause 6 space indentation afterwards.
2427 search_position = linenum-2
2428 while (search_position >= 0
2429 and Match(r' {6}\w', elided[search_position])):
2430 search_position -= 1
2431 exception = (search_position >= 0
2432 and elided[search_position][:5] == ' :')
2433 else:
2434 # Search for the function arguments or an initializer list. We use a
2435 # simple heuristic here: If the line is indented 4 spaces; and we have a
2436 # closing paren, without the opening paren, followed by an opening brace
2437 # or colon (for initializer lists) we assume that it is the last line of
2438 # a function header. If we have a colon indented 4 spaces, it is an
2439 # initializer list.
2440 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2441 prev_line)
2442 or Match(r' {4}:', prev_line))
2443
2444 if not exception:
2445 error(filename, linenum, 'whitespace/blank_line', 2,
2446 'Blank line at the start of a code block. Is this needed?')
erg@google.comd350fe52013-01-14 17:51:48 +00002447 # Ignore blank lines at the end of a block in a long if-else
erg@google.com4e00b9a2009-01-12 23:05:11 +00002448 # chain, like this:
2449 # if (condition1) {
2450 # // Something followed by a blank line
2451 #
2452 # } else if (condition2) {
2453 # // Something else
2454 # }
2455 if linenum + 1 < clean_lines.NumLines():
2456 next_line = raw[linenum + 1]
2457 if (next_line
2458 and Match(r'\s*}', next_line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002459 and next_line.find('} else ') == -1):
2460 error(filename, linenum, 'whitespace/blank_line', 3,
2461 'Blank line at the end of a code block. Is this needed?')
2462
erg@google.com8a95ecc2011-09-08 00:45:54 +00002463 matched = Match(r'\s*(public|protected|private):', prev_line)
2464 if matched:
2465 error(filename, linenum, 'whitespace/blank_line', 3,
2466 'Do not leave a blank line after "%s:"' % matched.group(1))
2467
erg@google.com4e00b9a2009-01-12 23:05:11 +00002468 # Next, we complain if there's a comment too near the text
2469 commentpos = line.find('//')
2470 if commentpos != -1:
2471 # Check if the // may be in quotes. If so, ignore it
erg@google.coma87abb82009-02-24 01:41:01 +00002472 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00002473 if (line.count('"', 0, commentpos) -
2474 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
2475 # Allow one space for new scopes, two spaces otherwise:
2476 if (not Match(r'^\s*{ //', line) and
2477 ((commentpos >= 1 and
2478 line[commentpos-1] not in string.whitespace) or
2479 (commentpos >= 2 and
2480 line[commentpos-2] not in string.whitespace))):
2481 error(filename, linenum, 'whitespace/comments', 2,
2482 'At least two spaces is best between code and comments')
2483 # There should always be a space between the // and the comment
2484 commentend = commentpos + 2
2485 if commentend < len(line) and not line[commentend] == ' ':
2486 # but some lines are exceptions -- e.g. if they're big
2487 # comment delimiters like:
2488 # //----------------------------------------------------------
erg@google.coma51c16b2010-11-17 18:09:31 +00002489 # or are an empty C++ style Doxygen comment, like:
2490 # ///
erg@google.come35f7652009-06-19 20:52:09 +00002491 # or they begin with multiple slashes followed by a space:
2492 # //////// Header comment
2493 match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.coma51c16b2010-11-17 18:09:31 +00002494 Search(r'^/$', line[commentend:]) or
erg@google.come35f7652009-06-19 20:52:09 +00002495 Search(r'^/+ ', line[commentend:]))
erg@google.com4e00b9a2009-01-12 23:05:11 +00002496 if not match:
2497 error(filename, linenum, 'whitespace/comments', 4,
2498 'Should have a space between // and comment')
2499 CheckComment(line[commentpos:], filename, linenum, error)
2500
2501 line = clean_lines.elided[linenum] # get rid of comments and strings
2502
2503 # Don't try to do spacing checks for operator methods
2504 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
2505
2506 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2507 # Otherwise not. Note we only check for non-spaces on *both* sides;
2508 # sometimes people put non-spaces on one side when aligning ='s among
2509 # many lines (not that this is behavior that I approve of...)
2510 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2511 error(filename, linenum, 'whitespace/operators', 4,
2512 'Missing spaces around =')
2513
2514 # It's ok not to have spaces around binary operators like + - * /, but if
2515 # there's too little whitespace, we get concerned. It's hard to tell,
2516 # though, so we punt on this one for now. TODO.
2517
2518 # You should always have whitespace around binary operators.
erg@google.comd350fe52013-01-14 17:51:48 +00002519 #
2520 # Check <= and >= first to avoid false positives with < and >, then
2521 # check non-include lines for spacing around < and >.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002522 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002523 if match:
2524 error(filename, linenum, 'whitespace/operators', 3,
2525 'Missing spaces around %s' % match.group(1))
erg@google.comd350fe52013-01-14 17:51:48 +00002526 # We allow no-spaces around << when used like this: 10<<20, but
erg@google.com4e00b9a2009-01-12 23:05:11 +00002527 # not otherwise (particularly, not when used as streams)
erg@google.comd350fe52013-01-14 17:51:48 +00002528 match = Search(r'(\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
2529 if match and not (match.group(1).isdigit() and match.group(2).isdigit()):
2530 error(filename, linenum, 'whitespace/operators', 3,
2531 'Missing spaces around <<')
2532 elif not Match(r'#.*include', line):
2533 # Avoid false positives on ->
2534 reduced_line = line.replace('->', '')
2535
2536 # Look for < that is not surrounded by spaces. This is only
2537 # triggered if both sides are missing spaces, even though
2538 # technically should should flag if at least one side is missing a
2539 # space. This is done to avoid some false positives with shifts.
2540 match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
2541 if (match and
2542 not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
2543 error(filename, linenum, 'whitespace/operators', 3,
2544 'Missing spaces around <')
2545
2546 # Look for > that is not surrounded by spaces. Similar to the
2547 # above, we only trigger if both sides are missing spaces to avoid
2548 # false positives with shifts.
2549 match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
2550 if (match and
2551 not FindPreviousMatchingAngleBracket(clean_lines, linenum,
2552 match.group(1))):
2553 error(filename, linenum, 'whitespace/operators', 3,
2554 'Missing spaces around >')
2555
2556 # We allow no-spaces around >> for almost anything. This is because
2557 # C++11 allows ">>" to close nested templates, which accounts for
2558 # most cases when ">>" is not followed by a space.
2559 #
2560 # We still warn on ">>" followed by alpha character, because that is
2561 # likely due to ">>" being used for right shifts, e.g.:
2562 # value >> alpha
2563 #
2564 # When ">>" is used to close templates, the alphanumeric letter that
2565 # follows would be part of an identifier, and there should still be
2566 # a space separating the template type and the identifier.
2567 # type<type<type>> alpha
2568 match = Search(r'>>[a-zA-Z_]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002569 if match:
2570 error(filename, linenum, 'whitespace/operators', 3,
erg@google.comd350fe52013-01-14 17:51:48 +00002571 'Missing spaces around >>')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002572
2573 # There shouldn't be space around unary operators
2574 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
2575 if match:
2576 error(filename, linenum, 'whitespace/operators', 4,
2577 'Extra space for operator %s' % match.group(1))
2578
2579 # A pet peeve of mine: no spaces after an if, while, switch, or for
2580 match = Search(r' (if\(|for\(|while\(|switch\()', line)
2581 if match:
2582 error(filename, linenum, 'whitespace/parens', 5,
2583 'Missing space before ( in %s' % match.group(1))
2584
2585 # For if/for/while/switch, the left and right parens should be
2586 # consistent about how many spaces are inside the parens, and
2587 # there should either be zero or one spaces inside the parens.
2588 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.come35f7652009-06-19 20:52:09 +00002589 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002590 match = Search(r'\b(if|for|while|switch)\s*'
2591 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
2592 line)
2593 if match:
2594 if len(match.group(2)) != len(match.group(4)):
2595 if not (match.group(3) == ';' and
erg@google.come35f7652009-06-19 20:52:09 +00002596 len(match.group(2)) == 1 + len(match.group(4)) or
2597 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002598 error(filename, linenum, 'whitespace/parens', 5,
2599 'Mismatching spaces inside () in %s' % match.group(1))
2600 if not len(match.group(2)) in [0, 1]:
2601 error(filename, linenum, 'whitespace/parens', 5,
2602 'Should have zero or one spaces inside ( and ) in %s' %
2603 match.group(1))
2604
2605 # You should always have a space after a comma (either as fn arg or operator)
2606 if Search(r',[^\s]', line):
2607 error(filename, linenum, 'whitespace/comma', 3,
2608 'Missing space after ,')
2609
erg@google.comd7d27472011-09-07 17:36:35 +00002610 # You should always have a space after a semicolon
2611 # except for few corner cases
2612 # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
2613 # space after ;
2614 if Search(r';[^\s};\\)/]', line):
2615 error(filename, linenum, 'whitespace/semicolon', 3,
2616 'Missing space after ;')
2617
erg@google.com4e00b9a2009-01-12 23:05:11 +00002618 # Next we will look for issues with function calls.
2619 CheckSpacingForFunctionCall(filename, line, linenum, error)
2620
erg@google.com8a95ecc2011-09-08 00:45:54 +00002621 # Except after an opening paren, or after another opening brace (in case of
2622 # an initializer list, for instance), you should have spaces before your
2623 # braces. And since you should never have braces at the beginning of a line,
2624 # this is an easy test.
2625 if Search(r'[^ ({]{', line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002626 error(filename, linenum, 'whitespace/braces', 5,
2627 'Missing space before {')
2628
2629 # Make sure '} else {' has spaces.
2630 if Search(r'}else', line):
2631 error(filename, linenum, 'whitespace/braces', 5,
2632 'Missing space before else')
2633
2634 # You shouldn't have spaces before your brackets, except maybe after
2635 # 'delete []' or 'new char * []'.
2636 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
2637 error(filename, linenum, 'whitespace/braces', 5,
2638 'Extra space before [')
2639
2640 # You shouldn't have a space before a semicolon at the end of the line.
2641 # There's a special case for "for" since the style guide allows space before
2642 # the semicolon there.
2643 if Search(r':\s*;\s*$', line):
2644 error(filename, linenum, 'whitespace/semicolon', 5,
erg@google.comd350fe52013-01-14 17:51:48 +00002645 'Semicolon defining empty statement. Use {} instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002646 elif Search(r'^\s*;\s*$', line):
2647 error(filename, linenum, 'whitespace/semicolon', 5,
2648 'Line contains only semicolon. If this should be an empty statement, '
erg@google.comd350fe52013-01-14 17:51:48 +00002649 'use {} instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002650 elif (Search(r'\s+;\s*$', line) and
2651 not Search(r'\bfor\b', line)):
2652 error(filename, linenum, 'whitespace/semicolon', 5,
2653 'Extra space before last semicolon. If this should be an empty '
erg@google.comd350fe52013-01-14 17:51:48 +00002654 'statement, use {} instead.')
2655
2656 # In range-based for, we wanted spaces before and after the colon, but
2657 # not around "::" tokens that might appear.
2658 if (Search('for *\(.*[^:]:[^: ]', line) or
2659 Search('for *\(.*[^: ]:[^:]', line)):
2660 error(filename, linenum, 'whitespace/forcolon', 2,
2661 'Missing space around colon in range-based for loop')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002662
2663
erg@google.com8a95ecc2011-09-08 00:45:54 +00002664def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
2665 """Checks for additional blank line issues related to sections.
2666
2667 Currently the only thing checked here is blank line before protected/private.
2668
2669 Args:
2670 filename: The name of the current file.
2671 clean_lines: A CleansedLines instance containing the file.
2672 class_info: A _ClassInfo objects.
2673 linenum: The number of the line to check.
2674 error: The function to call with any errors found.
2675 """
2676 # Skip checks if the class is small, where small means 25 lines or less.
2677 # 25 lines seems like a good cutoff since that's the usual height of
2678 # terminals, and any class that can't fit in one screen can't really
2679 # be considered "small".
2680 #
2681 # Also skip checks if we are on the first line. This accounts for
2682 # classes that look like
2683 # class Foo { public: ... };
2684 #
2685 # If we didn't find the end of the class, last_line would be zero,
2686 # and the check will be skipped by the first condition.
erg@google.comd350fe52013-01-14 17:51:48 +00002687 if (class_info.last_line - class_info.starting_linenum <= 24 or
2688 linenum <= class_info.starting_linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002689 return
2690
2691 matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
2692 if matched:
2693 # Issue warning if the line before public/protected/private was
2694 # not a blank line, but don't do this if the previous line contains
2695 # "class" or "struct". This can happen two ways:
2696 # - We are at the beginning of the class.
2697 # - We are forward-declaring an inner class that is semantically
2698 # private, but needed to be public for implementation reasons.
erg@google.comd350fe52013-01-14 17:51:48 +00002699 # Also ignores cases where the previous line ends with a backslash as can be
2700 # common when defining classes in C macros.
erg@google.com8a95ecc2011-09-08 00:45:54 +00002701 prev_line = clean_lines.lines[linenum - 1]
2702 if (not IsBlankLine(prev_line) and
erg@google.comd350fe52013-01-14 17:51:48 +00002703 not Search(r'\b(class|struct)\b', prev_line) and
2704 not Search(r'\\$', prev_line)):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002705 # Try a bit harder to find the beginning of the class. This is to
2706 # account for multi-line base-specifier lists, e.g.:
2707 # class Derived
2708 # : public Base {
erg@google.comd350fe52013-01-14 17:51:48 +00002709 end_class_head = class_info.starting_linenum
2710 for i in range(class_info.starting_linenum, linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002711 if Search(r'\{\s*$', clean_lines.lines[i]):
2712 end_class_head = i
2713 break
2714 if end_class_head < linenum - 1:
2715 error(filename, linenum, 'whitespace/blank_line', 3,
2716 '"%s:" should be preceded by a blank line' % matched.group(1))
2717
2718
erg@google.com4e00b9a2009-01-12 23:05:11 +00002719def GetPreviousNonBlankLine(clean_lines, linenum):
2720 """Return the most recent non-blank line and its line number.
2721
2722 Args:
2723 clean_lines: A CleansedLines instance containing the file contents.
2724 linenum: The number of the line to check.
2725
2726 Returns:
2727 A tuple with two elements. The first element is the contents of the last
2728 non-blank line before the current line, or the empty string if this is the
2729 first non-blank line. The second is the line number of that line, or -1
2730 if this is the first non-blank line.
2731 """
2732
2733 prevlinenum = linenum - 1
2734 while prevlinenum >= 0:
2735 prevline = clean_lines.elided[prevlinenum]
2736 if not IsBlankLine(prevline): # if not a blank line...
2737 return (prevline, prevlinenum)
2738 prevlinenum -= 1
2739 return ('', -1)
2740
2741
2742def CheckBraces(filename, clean_lines, linenum, error):
2743 """Looks for misplaced braces (e.g. at the end of line).
2744
2745 Args:
2746 filename: The name of the current file.
2747 clean_lines: A CleansedLines instance containing the file.
2748 linenum: The number of the line to check.
2749 error: The function to call with any errors found.
2750 """
2751
2752 line = clean_lines.elided[linenum] # get rid of comments and strings
2753
2754 if Match(r'\s*{\s*$', line):
2755 # We allow an open brace to start a line in the case where someone
2756 # is using braces in a block to explicitly create a new scope,
2757 # which is commonly used to control the lifetime of
2758 # stack-allocated variables. We don't detect this perfectly: we
2759 # just don't complain if the last non-whitespace character on the
erg@google.comd350fe52013-01-14 17:51:48 +00002760 # previous non-blank line is ';', ':', '{', or '}', or if the previous
2761 # line starts a preprocessor block.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002762 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
erg@google.comd350fe52013-01-14 17:51:48 +00002763 if (not Search(r'[;:}{]\s*$', prevline) and
2764 not Match(r'\s*#', prevline)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002765 error(filename, linenum, 'whitespace/braces', 4,
2766 '{ should almost always be at the end of the previous line')
2767
2768 # An else clause should be on the same line as the preceding closing brace.
2769 if Match(r'\s*else\s*', line):
2770 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2771 if Match(r'\s*}\s*$', prevline):
2772 error(filename, linenum, 'whitespace/newline', 4,
2773 'An else should appear on the same line as the preceding }')
2774
2775 # If braces come on one side of an else, they should be on both.
2776 # However, we have to worry about "else if" that spans multiple lines!
2777 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
2778 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
2779 # find the ( after the if
2780 pos = line.find('else if')
2781 pos = line.find('(', pos)
2782 if pos > 0:
2783 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
2784 if endline[endpos:].find('{') == -1: # must be brace after if
2785 error(filename, linenum, 'readability/braces', 5,
2786 'If an else has a brace on one side, it should have it on both')
2787 else: # common case: else not followed by a multi-line if
2788 error(filename, linenum, 'readability/braces', 5,
2789 'If an else has a brace on one side, it should have it on both')
2790
2791 # Likewise, an else should never have the else clause on the same line
2792 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
2793 error(filename, linenum, 'whitespace/newline', 4,
2794 'Else clause should never be on same line as else (use 2 lines)')
2795
2796 # In the same way, a do/while should never be on one line
2797 if Match(r'\s*do [^\s{]', line):
2798 error(filename, linenum, 'whitespace/newline', 4,
2799 'do/while clauses should not be on a single line')
2800
2801 # Braces shouldn't be followed by a ; unless they're defining a struct
2802 # or initializing an array.
2803 # We can't tell in general, but we can for some common cases.
2804 prevlinenum = linenum
2805 while True:
2806 (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
2807 if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
2808 line = prevline + line
2809 else:
2810 break
2811 if (Search(r'{.*}\s*;', line) and
2812 line.count('{') == line.count('}') and
erg@google.comfd5da632013-10-25 17:39:45 +00002813 not Search(r'struct|union|class|enum|\s*=\s*{', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002814 error(filename, linenum, 'readability/braces', 4,
2815 "You don't need a ; after a }")
2816
2817
erg@google.comd350fe52013-01-14 17:51:48 +00002818def CheckEmptyLoopBody(filename, clean_lines, linenum, error):
2819 """Loop for empty loop body with only a single semicolon.
2820
2821 Args:
2822 filename: The name of the current file.
2823 clean_lines: A CleansedLines instance containing the file.
2824 linenum: The number of the line to check.
2825 error: The function to call with any errors found.
2826 """
2827
2828 # Search for loop keywords at the beginning of the line. Because only
2829 # whitespaces are allowed before the keywords, this will also ignore most
2830 # do-while-loops, since those lines should start with closing brace.
2831 line = clean_lines.elided[linenum]
2832 if Match(r'\s*(for|while)\s*\(', line):
2833 # Find the end of the conditional expression
2834 (end_line, end_linenum, end_pos) = CloseExpression(
2835 clean_lines, linenum, line.find('('))
2836
2837 # Output warning if what follows the condition expression is a semicolon.
2838 # No warning for all other cases, including whitespace or newline, since we
2839 # have a separate check for semicolons preceded by whitespace.
2840 if end_pos >= 0 and Match(r';', end_line[end_pos:]):
2841 error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
2842 'Empty loop bodies should use {} or continue')
2843
2844
erg@google.com4e00b9a2009-01-12 23:05:11 +00002845def ReplaceableCheck(operator, macro, line):
2846 """Determine whether a basic CHECK can be replaced with a more specific one.
2847
2848 For example suggest using CHECK_EQ instead of CHECK(a == b) and
2849 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
2850
2851 Args:
2852 operator: The C++ operator used in the CHECK.
2853 macro: The CHECK or EXPECT macro being called.
2854 line: The current source line.
2855
2856 Returns:
2857 True if the CHECK can be replaced with a more specific one.
2858 """
2859
2860 # This matches decimal and hex integers, strings, and chars (in that order).
2861 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
2862
2863 # Expression to match two sides of the operator with something that
2864 # looks like a literal, since CHECK(x == iterator) won't compile.
2865 # This means we can't catch all the cases where a more specific
2866 # CHECK is possible, but it's less annoying than dealing with
2867 # extraneous warnings.
2868 match_this = (r'\s*' + macro + r'\((\s*' +
2869 match_constant + r'\s*' + operator + r'[^<>].*|'
2870 r'.*[^<>]' + operator + r'\s*' + match_constant +
2871 r'\s*\))')
2872
2873 # Don't complain about CHECK(x == NULL) or similar because
2874 # CHECK_EQ(x, NULL) won't compile (requires a cast).
2875 # Also, don't complain about more complex boolean expressions
2876 # involving && or || such as CHECK(a == b || c == d).
2877 return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
2878
2879
2880def CheckCheck(filename, clean_lines, linenum, error):
2881 """Checks the use of CHECK and EXPECT macros.
2882
2883 Args:
2884 filename: The name of the current file.
2885 clean_lines: A CleansedLines instance containing the file.
2886 linenum: The number of the line to check.
2887 error: The function to call with any errors found.
2888 """
2889
2890 # Decide the set of replacement macros that should be suggested
2891 raw_lines = clean_lines.raw_lines
2892 current_macro = ''
2893 for macro in _CHECK_MACROS:
2894 if raw_lines[linenum].find(macro) >= 0:
2895 current_macro = macro
2896 break
2897 if not current_macro:
2898 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2899 return
2900
2901 line = clean_lines.elided[linenum] # get rid of comments and strings
2902
2903 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
2904 for operator in ['==', '!=', '>=', '>', '<=', '<']:
2905 if ReplaceableCheck(operator, current_macro, line):
2906 error(filename, linenum, 'readability/check', 2,
2907 'Consider using %s instead of %s(a %s b)' % (
2908 _CHECK_REPLACEMENT[current_macro][operator],
2909 current_macro, operator))
2910 break
2911
2912
erg@google.comd350fe52013-01-14 17:51:48 +00002913def CheckAltTokens(filename, clean_lines, linenum, error):
2914 """Check alternative keywords being used in boolean expressions.
2915
2916 Args:
2917 filename: The name of the current file.
2918 clean_lines: A CleansedLines instance containing the file.
2919 linenum: The number of the line to check.
2920 error: The function to call with any errors found.
2921 """
2922 line = clean_lines.elided[linenum]
2923
2924 # Avoid preprocessor lines
2925 if Match(r'^\s*#', line):
2926 return
2927
2928 # Last ditch effort to avoid multi-line comments. This will not help
2929 # if the comment started before the current line or ended after the
2930 # current line, but it catches most of the false positives. At least,
2931 # it provides a way to workaround this warning for people who use
2932 # multi-line comments in preprocessor macros.
2933 #
2934 # TODO(unknown): remove this once cpplint has better support for
2935 # multi-line comments.
2936 if line.find('/*') >= 0 or line.find('*/') >= 0:
2937 return
2938
2939 for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
2940 error(filename, linenum, 'readability/alt_tokens', 2,
2941 'Use operator %s instead of %s' % (
2942 _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
2943
2944
erg@google.com4e00b9a2009-01-12 23:05:11 +00002945def GetLineWidth(line):
2946 """Determines the width of the line in column positions.
2947
2948 Args:
2949 line: A string, which may be a Unicode string.
2950
2951 Returns:
2952 The width of the line in column positions, accounting for Unicode
2953 combining characters and wide characters.
2954 """
2955 if isinstance(line, unicode):
2956 width = 0
erg@google.com8a95ecc2011-09-08 00:45:54 +00002957 for uc in unicodedata.normalize('NFC', line):
2958 if unicodedata.east_asian_width(uc) in ('W', 'F'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002959 width += 2
erg@google.com8a95ecc2011-09-08 00:45:54 +00002960 elif not unicodedata.combining(uc):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002961 width += 1
2962 return width
2963 else:
2964 return len(line)
2965
2966
erg@google.comd350fe52013-01-14 17:51:48 +00002967def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
erg@google.com8a95ecc2011-09-08 00:45:54 +00002968 error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002969 """Checks rules from the 'C++ style rules' section of cppguide.html.
2970
2971 Most of these rules are hard to test (naming, comment style), but we
2972 do what we can. In particular we check for 2-space indents, line lengths,
2973 tab usage, spaces inside code, etc.
2974
2975 Args:
2976 filename: The name of the current file.
2977 clean_lines: A CleansedLines instance containing the file.
2978 linenum: The number of the line to check.
2979 file_extension: The extension (without the dot) of the filename.
erg@google.comd350fe52013-01-14 17:51:48 +00002980 nesting_state: A _NestingState instance which maintains information about
2981 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002982 error: The function to call with any errors found.
2983 """
2984
2985 raw_lines = clean_lines.raw_lines
2986 line = raw_lines[linenum]
2987
2988 if line.find('\t') != -1:
2989 error(filename, linenum, 'whitespace/tab', 1,
2990 'Tab found; better to use spaces')
2991
2992 # One or three blank spaces at the beginning of the line is weird; it's
2993 # hard to reconcile that with 2-space indents.
2994 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
2995 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
2996 # if(RLENGTH > 20) complain = 0;
2997 # if(match($0, " +(error|private|public|protected):")) complain = 0;
2998 # if(match(prev, "&& *$")) complain = 0;
2999 # if(match(prev, "\\|\\| *$")) complain = 0;
3000 # if(match(prev, "[\",=><] *$")) complain = 0;
3001 # if(match($0, " <<")) complain = 0;
3002 # if(match(prev, " +for \\(")) complain = 0;
3003 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
3004 initial_spaces = 0
3005 cleansed_line = clean_lines.elided[linenum]
3006 while initial_spaces < len(line) and line[initial_spaces] == ' ':
3007 initial_spaces += 1
3008 if line and line[-1].isspace():
3009 error(filename, linenum, 'whitespace/end_of_line', 4,
3010 'Line ends in whitespace. Consider deleting these extra spaces.')
erg@google.comfd5da632013-10-25 17:39:45 +00003011 # There are certain situations we allow one space, notably for section labels
erg@google.com4e00b9a2009-01-12 23:05:11 +00003012 elif ((initial_spaces == 1 or initial_spaces == 3) and
3013 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
3014 error(filename, linenum, 'whitespace/indent', 3,
3015 'Weird number of spaces at line-start. '
3016 'Are you using a 2-space indent?')
erg@google.com4e00b9a2009-01-12 23:05:11 +00003017
3018 # Check if the line is a header guard.
3019 is_header_guard = False
3020 if file_extension == 'h':
3021 cppvar = GetHeaderGuardCPPVariable(filename)
3022 if (line.startswith('#ifndef %s' % cppvar) or
3023 line.startswith('#define %s' % cppvar) or
3024 line.startswith('#endif // %s' % cppvar)):
3025 is_header_guard = True
3026 # #include lines and header guards can be long, since there's no clean way to
3027 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00003028 #
3029 # URLs can be long too. It's possible to split these, but it makes them
3030 # harder to cut&paste.
erg@google.comd7d27472011-09-07 17:36:35 +00003031 #
3032 # The "$Id:...$" comment may also get very long without it being the
3033 # developers fault.
erg@google.coma87abb82009-02-24 01:41:01 +00003034 if (not line.startswith('#include') and not is_header_guard and
erg@google.comd7d27472011-09-07 17:36:35 +00003035 not Match(r'^\s*//.*http(s?)://\S*$', line) and
3036 not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003037 line_width = GetLineWidth(line)
3038 if line_width > 100:
3039 error(filename, linenum, 'whitespace/line_length', 4,
3040 'Lines should very rarely be longer than 100 characters')
3041 elif line_width > 80:
3042 error(filename, linenum, 'whitespace/line_length', 2,
3043 'Lines should be <= 80 characters long')
3044
3045 if (cleansed_line.count(';') > 1 and
3046 # for loops are allowed two ;'s (and may run over two lines).
3047 cleansed_line.find('for') == -1 and
3048 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
3049 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
3050 # It's ok to have many commands in a switch case that fits in 1 line
3051 not ((cleansed_line.find('case ') != -1 or
3052 cleansed_line.find('default:') != -1) and
3053 cleansed_line.find('break;') != -1)):
erg@google.comd350fe52013-01-14 17:51:48 +00003054 error(filename, linenum, 'whitespace/newline', 0,
erg@google.com4e00b9a2009-01-12 23:05:11 +00003055 'More than one command on the same line')
3056
3057 # Some more style checks
3058 CheckBraces(filename, clean_lines, linenum, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003059 CheckEmptyLoopBody(filename, clean_lines, linenum, error)
3060 CheckAccess(filename, clean_lines, linenum, nesting_state, error)
3061 CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003062 CheckCheck(filename, clean_lines, linenum, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003063 CheckAltTokens(filename, clean_lines, linenum, error)
3064 classinfo = nesting_state.InnermostClass()
3065 if classinfo:
3066 CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003067
3068
3069_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
3070_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
3071# Matches the first component of a filename delimited by -s and _s. That is:
3072# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
3073# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
3074# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
3075# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
3076_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
3077
3078
3079def _DropCommonSuffixes(filename):
3080 """Drops common suffixes like _test.cc or -inl.h from filename.
3081
3082 For example:
3083 >>> _DropCommonSuffixes('foo/foo-inl.h')
3084 'foo/foo'
3085 >>> _DropCommonSuffixes('foo/bar/foo.cc')
3086 'foo/bar/foo'
3087 >>> _DropCommonSuffixes('foo/foo_internal.h')
3088 'foo/foo'
3089 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
3090 'foo/foo_unusualinternal'
3091
3092 Args:
3093 filename: The input filename.
3094
3095 Returns:
3096 The filename with the common suffix removed.
3097 """
3098 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
3099 'inl.h', 'impl.h', 'internal.h'):
3100 if (filename.endswith(suffix) and len(filename) > len(suffix) and
3101 filename[-len(suffix) - 1] in ('-', '_')):
3102 return filename[:-len(suffix) - 1]
3103 return os.path.splitext(filename)[0]
3104
3105
3106def _IsTestFilename(filename):
3107 """Determines if the given filename has a suffix that identifies it as a test.
3108
3109 Args:
3110 filename: The input filename.
3111
3112 Returns:
3113 True if 'filename' looks like a test, False otherwise.
3114 """
3115 if (filename.endswith('_test.cc') or
3116 filename.endswith('_unittest.cc') or
3117 filename.endswith('_regtest.cc')):
3118 return True
3119 else:
3120 return False
3121
3122
3123def _ClassifyInclude(fileinfo, include, is_system):
3124 """Figures out what kind of header 'include' is.
3125
3126 Args:
3127 fileinfo: The current file cpplint is running over. A FileInfo instance.
3128 include: The path to a #included file.
3129 is_system: True if the #include used <> rather than "".
3130
3131 Returns:
3132 One of the _XXX_HEADER constants.
3133
3134 For example:
3135 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
3136 _C_SYS_HEADER
3137 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
3138 _CPP_SYS_HEADER
3139 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
3140 _LIKELY_MY_HEADER
3141 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
3142 ... 'bar/foo_other_ext.h', False)
3143 _POSSIBLE_MY_HEADER
3144 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
3145 _OTHER_HEADER
3146 """
3147 # This is a list of all standard c++ header files, except
3148 # those already checked for above.
erg@google.comfd5da632013-10-25 17:39:45 +00003149 is_cpp_h = include in _CPP_HEADERS
erg@google.com4e00b9a2009-01-12 23:05:11 +00003150
3151 if is_system:
3152 if is_cpp_h:
3153 return _CPP_SYS_HEADER
3154 else:
3155 return _C_SYS_HEADER
3156
3157 # If the target file and the include we're checking share a
3158 # basename when we drop common extensions, and the include
3159 # lives in . , then it's likely to be owned by the target file.
3160 target_dir, target_base = (
3161 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
3162 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
3163 if target_base == include_base and (
3164 include_dir == target_dir or
3165 include_dir == os.path.normpath(target_dir + '/../public')):
3166 return _LIKELY_MY_HEADER
3167
3168 # If the target and include share some initial basename
3169 # component, it's possible the target is implementing the
3170 # include, so it's allowed to be first, but we'll never
3171 # complain if it's not there.
3172 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
3173 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
3174 if (target_first_component and include_first_component and
3175 target_first_component.group(0) ==
3176 include_first_component.group(0)):
3177 return _POSSIBLE_MY_HEADER
3178
3179 return _OTHER_HEADER
3180
3181
erg@google.coma87abb82009-02-24 01:41:01 +00003182
erg@google.come35f7652009-06-19 20:52:09 +00003183def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
3184 """Check rules that are applicable to #include lines.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003185
erg@google.come35f7652009-06-19 20:52:09 +00003186 Strings on #include lines are NOT removed from elided line, to make
3187 certain tasks easier. However, to prevent false positives, checks
3188 applicable to #include lines in CheckLanguage must be put here.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003189
3190 Args:
3191 filename: The name of the current file.
3192 clean_lines: A CleansedLines instance containing the file.
3193 linenum: The number of the line to check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003194 include_state: An _IncludeState instance in which the headers are inserted.
3195 error: The function to call with any errors found.
3196 """
3197 fileinfo = FileInfo(filename)
3198
erg@google.come35f7652009-06-19 20:52:09 +00003199 line = clean_lines.lines[linenum]
erg@google.com4e00b9a2009-01-12 23:05:11 +00003200
3201 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.come35f7652009-06-19 20:52:09 +00003202 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003203 error(filename, linenum, 'build/include', 4,
3204 'Include the directory when naming .h files')
3205
3206 # we shouldn't include a file more than once. actually, there are a
3207 # handful of instances where doing so is okay, but in general it's
3208 # not.
erg@google.come35f7652009-06-19 20:52:09 +00003209 match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003210 if match:
3211 include = match.group(2)
3212 is_system = (match.group(1) == '<')
3213 if include in include_state:
3214 error(filename, linenum, 'build/include', 4,
3215 '"%s" already included at %s:%s' %
3216 (include, filename, include_state[include]))
3217 else:
3218 include_state[include] = linenum
3219
3220 # We want to ensure that headers appear in the right order:
3221 # 1) for foo.cc, foo.h (preferred location)
3222 # 2) c system files
3223 # 3) cpp system files
3224 # 4) for foo.cc, foo.h (deprecated location)
3225 # 5) other google headers
3226 #
3227 # We classify each include statement as one of those 5 types
3228 # using a number of techniques. The include_state object keeps
3229 # track of the highest type seen, and complains if we see a
3230 # lower type after that.
3231 error_message = include_state.CheckNextIncludeOrder(
3232 _ClassifyInclude(fileinfo, include, is_system))
3233 if error_message:
3234 error(filename, linenum, 'build/include_order', 4,
3235 '%s. Should be: %s.h, c system, c++ system, other.' %
3236 (error_message, fileinfo.BaseName()))
erg@google.comfd5da632013-10-25 17:39:45 +00003237 canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
3238 if not include_state.IsInAlphabeticalOrder(
3239 clean_lines, linenum, canonical_include):
erg@google.coma868d2d2009-10-09 21:18:45 +00003240 error(filename, linenum, 'build/include_alpha', 4,
3241 'Include "%s" not in alphabetical order' % include)
erg@google.comfd5da632013-10-25 17:39:45 +00003242 include_state.SetLastHeader(canonical_include)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003243
erg@google.come35f7652009-06-19 20:52:09 +00003244 # Look for any of the stream classes that are part of standard C++.
3245 match = _RE_PATTERN_INCLUDE.match(line)
3246 if match:
3247 include = match.group(2)
3248 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
3249 # Many unit tests use cout, so we exempt them.
3250 if not _IsTestFilename(filename):
3251 error(filename, linenum, 'readability/streams', 3,
3252 'Streams are highly discouraged.')
3253
erg@google.com8a95ecc2011-09-08 00:45:54 +00003254
3255def _GetTextInside(text, start_pattern):
3256 """Retrieves all the text between matching open and close parentheses.
3257
3258 Given a string of lines and a regular expression string, retrieve all the text
3259 following the expression and between opening punctuation symbols like
3260 (, [, or {, and the matching close-punctuation symbol. This properly nested
3261 occurrences of the punctuations, so for the text like
3262 printf(a(), b(c()));
3263 a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
3264 start_pattern must match string having an open punctuation symbol at the end.
3265
3266 Args:
3267 text: The lines to extract text. Its comments and strings must be elided.
3268 It can be single line and can span multiple lines.
3269 start_pattern: The regexp string indicating where to start extracting
3270 the text.
3271 Returns:
3272 The extracted text.
3273 None if either the opening string or ending punctuation could not be found.
3274 """
3275 # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
3276 # rewritten to use _GetTextInside (and use inferior regexp matching today).
3277
3278 # Give opening punctuations to get the matching close-punctuations.
3279 matching_punctuation = {'(': ')', '{': '}', '[': ']'}
3280 closing_punctuation = set(matching_punctuation.itervalues())
3281
3282 # Find the position to start extracting text.
3283 match = re.search(start_pattern, text, re.M)
3284 if not match: # start_pattern not found in text.
3285 return None
3286 start_position = match.end(0)
3287
3288 assert start_position > 0, (
3289 'start_pattern must ends with an opening punctuation.')
3290 assert text[start_position - 1] in matching_punctuation, (
3291 'start_pattern must ends with an opening punctuation.')
3292 # Stack of closing punctuations we expect to have in text after position.
3293 punctuation_stack = [matching_punctuation[text[start_position - 1]]]
3294 position = start_position
3295 while punctuation_stack and position < len(text):
3296 if text[position] == punctuation_stack[-1]:
3297 punctuation_stack.pop()
3298 elif text[position] in closing_punctuation:
3299 # A closing punctuation without matching opening punctuations.
3300 return None
3301 elif text[position] in matching_punctuation:
3302 punctuation_stack.append(matching_punctuation[text[position]])
3303 position += 1
3304 if punctuation_stack:
3305 # Opening punctuations left without matching close-punctuations.
3306 return None
3307 # punctuations match.
3308 return text[start_position:position - 1]
3309
3310
erg@google.comfd5da632013-10-25 17:39:45 +00003311# Patterns for matching call-by-reference parameters.
3312_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]*
3313_RE_PATTERN_TYPE = (
3314 r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
3315 r'[\w:]*\w(?:\s*<[\w:*, ]*>(?:::\w+)?)?')
3316# A call-by-reference parameter ends with '& identifier'.
3317_RE_PATTERN_REF_PARAM = re.compile(
3318 r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
3319 r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
3320# A call-by-const-reference parameter either ends with 'const& identifier'
3321# or looks like 'const type& identifier' when 'type' is atomic.
3322_RE_PATTERN_CONST_REF_PARAM = (
3323 r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
3324 r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
3325
3326
3327def CheckLanguage(filename, clean_lines, linenum, file_extension,
3328 include_state, nesting_state, error):
erg@google.come35f7652009-06-19 20:52:09 +00003329 """Checks rules from the 'C++ language rules' section of cppguide.html.
3330
3331 Some of these rules are hard to test (function overloading, using
3332 uint32 inappropriately), but we do the best we can.
3333
3334 Args:
3335 filename: The name of the current file.
3336 clean_lines: A CleansedLines instance containing the file.
3337 linenum: The number of the line to check.
3338 file_extension: The extension (without the dot) of the filename.
3339 include_state: An _IncludeState instance in which the headers are inserted.
erg@google.comfd5da632013-10-25 17:39:45 +00003340 nesting_state: A _NestingState instance which maintains information about
3341 the current stack of nested blocks being parsed.
erg@google.come35f7652009-06-19 20:52:09 +00003342 error: The function to call with any errors found.
3343 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00003344 # If the line is empty or consists of entirely a comment, no need to
3345 # check it.
3346 line = clean_lines.elided[linenum]
3347 if not line:
3348 return
3349
erg@google.come35f7652009-06-19 20:52:09 +00003350 match = _RE_PATTERN_INCLUDE.search(line)
3351 if match:
3352 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
3353 return
3354
erg@google.com4e00b9a2009-01-12 23:05:11 +00003355 # Create an extended_line, which is the concatenation of the current and
3356 # next lines, for more effective checking of code that may span more than one
3357 # line.
3358 if linenum + 1 < clean_lines.NumLines():
3359 extended_line = line + clean_lines.elided[linenum + 1]
3360 else:
3361 extended_line = line
3362
3363 # Make Windows paths like Unix.
3364 fullname = os.path.abspath(filename).replace('\\', '/')
3365
3366 # TODO(unknown): figure out if they're using default arguments in fn proto.
3367
erg@google.comfd5da632013-10-25 17:39:45 +00003368 # Check for non-const references in function parameters. A single '&' may
3369 # found in the following places:
3370 # inside expression: binary & for bitwise AND
3371 # inside expression: unary & for taking the address of something
3372 # inside declarators: reference parameter
3373 # We will exclude the first two cases by checking that we are not inside a
3374 # function body, including one that was just introduced by a trailing '{'.
3375 # TODO(unknown): Doesn't account for preprocessor directives.
3376 # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
3377 # TODO(unknown): Doesn't account for line breaks within declarators.
3378 check_params = False
3379 if not nesting_state.stack:
3380 check_params = True # top level
3381 elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
3382 isinstance(nesting_state.stack[-1], _NamespaceInfo)):
3383 check_params = True # within class or namespace
3384 elif Match(r'.*{\s*$', line):
3385 if (len(nesting_state.stack) == 1 or
3386 isinstance(nesting_state.stack[-2], _ClassInfo) or
3387 isinstance(nesting_state.stack[-2], _NamespaceInfo)):
3388 check_params = True # just opened global/class/namespace block
3389 # We allow non-const references in a few standard places, like functions
3390 # called "swap()" or iostream operators like "<<" or ">>". Do not check
3391 # those function parameters.
3392 #
3393 # We also accept & in static_assert, which looks like a function but
3394 # it's actually a declaration expression.
3395 whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
3396 r'operator\s*[<>][<>]|'
3397 r'static_assert|COMPILE_ASSERT'
3398 r')\s*\(')
3399 if Search(whitelisted_functions, line):
3400 check_params = False
3401 elif not Search(r'\S+\([^)]*$', line):
3402 # Don't see a whitelisted function on this line. Actually we
3403 # didn't see any function name on this line, so this is likely a
3404 # multi-line parameter list. Try a bit harder to catch this case.
3405 for i in xrange(2):
3406 if (linenum > i and
3407 Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
3408 check_params = False
3409 break
erg@google.com4e00b9a2009-01-12 23:05:11 +00003410
erg@google.comfd5da632013-10-25 17:39:45 +00003411 if check_params:
3412 decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
3413 for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
3414 if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
3415 error(filename, linenum, 'runtime/references', 2,
3416 'Is this a non-const reference? '
3417 'If so, make const or use a pointer: ' + parameter)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003418
3419 # Check to see if they're using an conversion function cast.
3420 # I just try to capture the most common basic types, though there are more.
3421 # Parameterless conversion functions, such as bool(), are allowed as they are
3422 # probably a member operator declaration or default constructor.
3423 match = Search(
erg@google.coma868d2d2009-10-09 21:18:45 +00003424 r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
3425 r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003426 if match:
3427 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
3428 # where type may be float(), int(string), etc. Without context they are
erg@google.comd7d27472011-09-07 17:36:35 +00003429 # virtually indistinguishable from int(x) casts. Likewise, gMock's
3430 # MockCallback takes a template parameter of the form return_type(arg_type),
3431 # which looks much like the cast we're trying to detect.
erg@google.coma868d2d2009-10-09 21:18:45 +00003432 if (match.group(1) is None and # If new operator, then this isn't a cast
erg@google.comd7d27472011-09-07 17:36:35 +00003433 not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
3434 Match(r'^\s*MockCallback<.*>', line))):
erg@google.comd350fe52013-01-14 17:51:48 +00003435 # Try a bit harder to catch gmock lines: the only place where
3436 # something looks like an old-style cast is where we declare the
3437 # return type of the mocked method, and the only time when we
3438 # are missing context is if MOCK_METHOD was split across
3439 # multiple lines (for example http://go/hrfhr ), so we only need
3440 # to check the previous line for MOCK_METHOD.
3441 if (linenum == 0 or
3442 not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(\S+,\s*$',
3443 clean_lines.elided[linenum - 1])):
3444 error(filename, linenum, 'readability/casting', 4,
3445 'Using deprecated casting style. '
3446 'Use static_cast<%s>(...) instead' %
3447 match.group(2))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003448
3449 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3450 'static_cast',
erg@google.com8a95ecc2011-09-08 00:45:54 +00003451 r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
3452
3453 # This doesn't catch all cases. Consider (const char * const)"hello".
3454 #
3455 # (char *) "foo" should always be a const_cast (reinterpret_cast won't
3456 # compile).
3457 if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3458 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
3459 pass
3460 else:
3461 # Check pointer casts for other than string constants
3462 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3463 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003464
3465 # In addition, we look for people taking the address of a cast. This
3466 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
3467 # point where you think.
3468 if Search(
3469 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
3470 error(filename, linenum, 'runtime/casting', 4,
3471 ('Are you taking an address of a cast? '
3472 'This is dangerous: could be a temp var. '
3473 'Take the address before doing the cast, rather than after'))
3474
3475 # Check for people declaring static/global STL strings at the top level.
3476 # This is dangerous because the C++ language does not guarantee that
3477 # globals with constructors are initialized before the first access.
3478 match = Match(
3479 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
3480 line)
3481 # Make sure it's not a function.
3482 # Function template specialization looks like: "string foo<Type>(...".
3483 # Class template definitions look like: "string Foo<Type>::Method(...".
3484 if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
3485 match.group(3)):
3486 error(filename, linenum, 'runtime/string', 4,
3487 'For a static/global string constant, use a C style string instead: '
3488 '"%schar %s[]".' %
3489 (match.group(1), match.group(2)))
3490
erg@google.com4e00b9a2009-01-12 23:05:11 +00003491 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
3492 error(filename, linenum, 'runtime/init', 4,
3493 'You seem to be initializing a member variable with itself.')
3494
3495 if file_extension == 'h':
3496 # TODO(unknown): check that 1-arg constructors are explicit.
3497 # How to tell it's a constructor?
3498 # (handled in CheckForNonStandardConstructs for now)
3499 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
3500 # (level 1 error)
3501 pass
3502
3503 # Check if people are using the verboten C basic types. The only exception
3504 # we regularly allow is "unsigned short port" for port.
3505 if Search(r'\bshort port\b', line):
3506 if not Search(r'\bunsigned short port\b', line):
3507 error(filename, linenum, 'runtime/int', 4,
3508 'Use "unsigned short" for ports, not "short"')
3509 else:
3510 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
3511 if match:
3512 error(filename, linenum, 'runtime/int', 4,
3513 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
3514
3515 # When snprintf is used, the second argument shouldn't be a literal.
3516 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
erg+personal@google.com05189642010-04-30 20:43:03 +00003517 if match and match.group(2) != '0':
3518 # If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003519 error(filename, linenum, 'runtime/printf', 3,
3520 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
3521 'to snprintf.' % (match.group(1), match.group(2)))
3522
3523 # Check if some verboten C functions are being used.
3524 if Search(r'\bsprintf\b', line):
3525 error(filename, linenum, 'runtime/printf', 5,
3526 'Never use sprintf. Use snprintf instead.')
3527 match = Search(r'\b(strcpy|strcat)\b', line)
3528 if match:
3529 error(filename, linenum, 'runtime/printf', 4,
3530 'Almost always, snprintf is better than %s' % match.group(1))
3531
erg@google.coma868d2d2009-10-09 21:18:45 +00003532 # Check if some verboten operator overloading is going on
3533 # TODO(unknown): catch out-of-line unary operator&:
3534 # class X {};
3535 # int operator&(const X& x) { return 42; } // unary operator&
3536 # The trick is it's hard to tell apart from binary operator&:
3537 # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
3538 if Search(r'\boperator\s*&\s*\(\s*\)', line):
3539 error(filename, linenum, 'runtime/operator', 4,
3540 'Unary operator& is dangerous. Do not use it.')
3541
erg@google.com4e00b9a2009-01-12 23:05:11 +00003542 # Check for suspicious usage of "if" like
3543 # } if (a == b) {
3544 if Search(r'\}\s*if\s*\(', line):
3545 error(filename, linenum, 'readability/braces', 4,
3546 'Did you mean "else if"? If not, start a new line for "if".')
3547
3548 # Check for potential format string bugs like printf(foo).
3549 # We constrain the pattern not to pick things like DocidForPrintf(foo).
3550 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
erg@google.com8a95ecc2011-09-08 00:45:54 +00003551 # TODO(sugawarayu): Catch the following case. Need to change the calling
3552 # convention of the whole function to process multiple line to handle it.
3553 # printf(
3554 # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
3555 printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
3556 if printf_args:
3557 match = Match(r'([\w.\->()]+)$', printf_args)
erg@google.comd350fe52013-01-14 17:51:48 +00003558 if match and match.group(1) != '__VA_ARGS__':
erg@google.com8a95ecc2011-09-08 00:45:54 +00003559 function_name = re.search(r'\b((?:string)?printf)\s*\(',
3560 line, re.I).group(1)
3561 error(filename, linenum, 'runtime/printf', 4,
3562 'Potential format string bug. Do %s("%%s", %s) instead.'
3563 % (function_name, match.group(1)))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003564
3565 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
3566 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
3567 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
3568 error(filename, linenum, 'runtime/memset', 4,
3569 'Did you mean "memset(%s, 0, %s)"?'
3570 % (match.group(1), match.group(2)))
3571
3572 if Search(r'\busing namespace\b', line):
3573 error(filename, linenum, 'build/namespaces', 5,
3574 'Do not use namespace using-directives. '
3575 'Use using-declarations instead.')
3576
3577 # Detect variable-length arrays.
3578 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
3579 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
3580 match.group(3).find(']') == -1):
3581 # Split the size using space and arithmetic operators as delimiters.
3582 # If any of the resulting tokens are not compile time constants then
3583 # report the error.
3584 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
3585 is_const = True
3586 skip_next = False
3587 for tok in tokens:
3588 if skip_next:
3589 skip_next = False
3590 continue
3591
3592 if Search(r'sizeof\(.+\)', tok): continue
3593 if Search(r'arraysize\(\w+\)', tok): continue
3594
3595 tok = tok.lstrip('(')
3596 tok = tok.rstrip(')')
3597 if not tok: continue
3598 if Match(r'\d+', tok): continue
3599 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
3600 if Match(r'k[A-Z0-9]\w*', tok): continue
3601 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
3602 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
3603 # A catch all for tricky sizeof cases, including 'sizeof expression',
3604 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
erg@google.com8a95ecc2011-09-08 00:45:54 +00003605 # requires skipping the next token because we split on ' ' and '*'.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003606 if tok.startswith('sizeof'):
3607 skip_next = True
3608 continue
3609 is_const = False
3610 break
3611 if not is_const:
3612 error(filename, linenum, 'runtime/arrays', 1,
3613 'Do not use variable-length arrays. Use an appropriately named '
3614 "('k' followed by CamelCase) compile-time constant for the size.")
3615
3616 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
3617 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
3618 # in the class declaration.
3619 match = Match(
3620 (r'\s*'
3621 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
3622 r'\(.*\);$'),
3623 line)
3624 if match and linenum + 1 < clean_lines.NumLines():
3625 next_line = clean_lines.elided[linenum + 1]
erg@google.com8a95ecc2011-09-08 00:45:54 +00003626 # We allow some, but not all, declarations of variables to be present
3627 # in the statement that defines the class. The [\w\*,\s]* fragment of
3628 # the regular expression below allows users to declare instances of
3629 # the class or pointers to instances, but not less common types such
3630 # as function pointers or arrays. It's a tradeoff between allowing
3631 # reasonable code and avoiding trying to parse more C++ using regexps.
3632 if not Search(r'^\s*}[\w\*,\s]*;', next_line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003633 error(filename, linenum, 'readability/constructors', 3,
3634 match.group(1) + ' should be the last thing in the class')
3635
3636 # Check for use of unnamed namespaces in header files. Registration
3637 # macros are typically OK, so we allow use of "namespace {" on lines
3638 # that end with backslashes.
3639 if (file_extension == 'h'
3640 and Search(r'\bnamespace\s*{', line)
3641 and line[-1] != '\\'):
3642 error(filename, linenum, 'build/namespaces', 4,
3643 'Do not use unnamed namespaces in header files. See '
3644 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
3645 ' for more information.')
3646
3647
3648def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
3649 error):
3650 """Checks for a C-style cast by looking for the pattern.
3651
erg@google.com4e00b9a2009-01-12 23:05:11 +00003652 Args:
3653 filename: The name of the current file.
3654 linenum: The number of the line to check.
3655 line: The line of code to check.
3656 raw_line: The raw line of code to check, with comments.
3657 cast_type: The string for the C++ cast to recommend. This is either
erg@google.com8a95ecc2011-09-08 00:45:54 +00003658 reinterpret_cast, static_cast, or const_cast, depending.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003659 pattern: The regular expression used to find C-style casts.
3660 error: The function to call with any errors found.
erg@google.com8a95ecc2011-09-08 00:45:54 +00003661
3662 Returns:
3663 True if an error was emitted.
3664 False otherwise.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003665 """
3666 match = Search(pattern, line)
3667 if not match:
erg@google.com8a95ecc2011-09-08 00:45:54 +00003668 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +00003669
erg@google.comfd5da632013-10-25 17:39:45 +00003670 # Exclude lines with sizeof, since sizeof looks like a cast.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003671 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
3672 if sizeof_match:
erg@google.comfd5da632013-10-25 17:39:45 +00003673 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +00003674
erg@google.comd350fe52013-01-14 17:51:48 +00003675 # operator++(int) and operator--(int)
3676 if (line[0:match.start(1) - 1].endswith(' operator++') or
3677 line[0:match.start(1) - 1].endswith(' operator--')):
3678 return False
3679
erg@google.com4e00b9a2009-01-12 23:05:11 +00003680 remainder = line[match.end(0):]
3681
3682 # The close paren is for function pointers as arguments to a function.
3683 # eg, void foo(void (*bar)(int));
3684 # The semicolon check is a more basic function check; also possibly a
3685 # function pointer typedef.
3686 # eg, void foo(int); or void foo(int) const;
3687 # The equals check is for function pointer assignment.
3688 # eg, void *(*foo)(int) = ...
erg@google.comd7d27472011-09-07 17:36:35 +00003689 # The > is for MockCallback<...> ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00003690 #
3691 # Right now, this will only catch cases where there's a single argument, and
3692 # it's unnamed. It should probably be expanded to check for multiple
3693 # arguments with some unnamed.
erg@google.comd7d27472011-09-07 17:36:35 +00003694 function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)|>))', remainder)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003695 if function_match:
3696 if (not function_match.group(3) or
3697 function_match.group(3) == ';' or
erg@google.comd7d27472011-09-07 17:36:35 +00003698 ('MockCallback<' not in raw_line and
3699 '/*' not in raw_line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003700 error(filename, linenum, 'readability/function', 3,
3701 'All parameters should be named in a function')
erg@google.com8a95ecc2011-09-08 00:45:54 +00003702 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +00003703
3704 # At this point, all that should be left is actual casts.
3705 error(filename, linenum, 'readability/casting', 4,
3706 'Using C-style cast. Use %s<%s>(...) instead' %
3707 (cast_type, match.group(1)))
3708
erg@google.com8a95ecc2011-09-08 00:45:54 +00003709 return True
3710
erg@google.com4e00b9a2009-01-12 23:05:11 +00003711
3712_HEADERS_CONTAINING_TEMPLATES = (
3713 ('<deque>', ('deque',)),
3714 ('<functional>', ('unary_function', 'binary_function',
3715 'plus', 'minus', 'multiplies', 'divides', 'modulus',
3716 'negate',
3717 'equal_to', 'not_equal_to', 'greater', 'less',
3718 'greater_equal', 'less_equal',
3719 'logical_and', 'logical_or', 'logical_not',
3720 'unary_negate', 'not1', 'binary_negate', 'not2',
3721 'bind1st', 'bind2nd',
3722 'pointer_to_unary_function',
3723 'pointer_to_binary_function',
3724 'ptr_fun',
3725 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
3726 'mem_fun_ref_t',
3727 'const_mem_fun_t', 'const_mem_fun1_t',
3728 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
3729 'mem_fun_ref',
3730 )),
3731 ('<limits>', ('numeric_limits',)),
3732 ('<list>', ('list',)),
3733 ('<map>', ('map', 'multimap',)),
3734 ('<memory>', ('allocator',)),
3735 ('<queue>', ('queue', 'priority_queue',)),
3736 ('<set>', ('set', 'multiset',)),
3737 ('<stack>', ('stack',)),
3738 ('<string>', ('char_traits', 'basic_string',)),
3739 ('<utility>', ('pair',)),
3740 ('<vector>', ('vector',)),
3741
3742 # gcc extensions.
3743 # Note: std::hash is their hash, ::hash is our hash
3744 ('<hash_map>', ('hash_map', 'hash_multimap',)),
3745 ('<hash_set>', ('hash_set', 'hash_multiset',)),
3746 ('<slist>', ('slist',)),
3747 )
3748
erg@google.com4e00b9a2009-01-12 23:05:11 +00003749_RE_PATTERN_STRING = re.compile(r'\bstring\b')
3750
3751_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00003752for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
3753 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003754 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
3755 # type::max().
3756 _re_pattern_algorithm_header.append(
3757 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
3758 _template,
3759 '<algorithm>'))
3760
3761_re_pattern_templates = []
3762for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
3763 for _template in _templates:
3764 _re_pattern_templates.append(
3765 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
3766 _template + '<>',
3767 _header))
3768
3769
erg@google.come35f7652009-06-19 20:52:09 +00003770def FilesBelongToSameModule(filename_cc, filename_h):
3771 """Check if these two filenames belong to the same module.
3772
3773 The concept of a 'module' here is a as follows:
3774 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
3775 same 'module' if they are in the same directory.
3776 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
3777 to belong to the same module here.
3778
3779 If the filename_cc contains a longer path than the filename_h, for example,
3780 '/absolute/path/to/base/sysinfo.cc', and this file would include
3781 'base/sysinfo.h', this function also produces the prefix needed to open the
3782 header. This is used by the caller of this function to more robustly open the
3783 header file. We don't have access to the real include paths in this context,
3784 so we need this guesswork here.
3785
3786 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
3787 according to this implementation. Because of this, this function gives
3788 some false positives. This should be sufficiently rare in practice.
3789
3790 Args:
3791 filename_cc: is the path for the .cc file
3792 filename_h: is the path for the header path
3793
3794 Returns:
3795 Tuple with a bool and a string:
3796 bool: True if filename_cc and filename_h belong to the same module.
3797 string: the additional prefix needed to open the header file.
3798 """
3799
3800 if not filename_cc.endswith('.cc'):
3801 return (False, '')
3802 filename_cc = filename_cc[:-len('.cc')]
3803 if filename_cc.endswith('_unittest'):
3804 filename_cc = filename_cc[:-len('_unittest')]
3805 elif filename_cc.endswith('_test'):
3806 filename_cc = filename_cc[:-len('_test')]
3807 filename_cc = filename_cc.replace('/public/', '/')
3808 filename_cc = filename_cc.replace('/internal/', '/')
3809
3810 if not filename_h.endswith('.h'):
3811 return (False, '')
3812 filename_h = filename_h[:-len('.h')]
3813 if filename_h.endswith('-inl'):
3814 filename_h = filename_h[:-len('-inl')]
3815 filename_h = filename_h.replace('/public/', '/')
3816 filename_h = filename_h.replace('/internal/', '/')
3817
3818 files_belong_to_same_module = filename_cc.endswith(filename_h)
3819 common_path = ''
3820 if files_belong_to_same_module:
3821 common_path = filename_cc[:-len(filename_h)]
3822 return files_belong_to_same_module, common_path
3823
3824
3825def UpdateIncludeState(filename, include_state, io=codecs):
3826 """Fill up the include_state with new includes found from the file.
3827
3828 Args:
3829 filename: the name of the header to read.
3830 include_state: an _IncludeState instance in which the headers are inserted.
3831 io: The io factory to use to read the file. Provided for testability.
3832
3833 Returns:
3834 True if a header was succesfully added. False otherwise.
3835 """
3836 headerfile = None
3837 try:
3838 headerfile = io.open(filename, 'r', 'utf8', 'replace')
3839 except IOError:
3840 return False
3841 linenum = 0
3842 for line in headerfile:
3843 linenum += 1
3844 clean_line = CleanseComments(line)
3845 match = _RE_PATTERN_INCLUDE.search(clean_line)
3846 if match:
3847 include = match.group(2)
3848 # The value formatting is cute, but not really used right now.
3849 # What matters here is that the key is in include_state.
3850 include_state.setdefault(include, '%s:%d' % (filename, linenum))
3851 return True
3852
3853
3854def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
3855 io=codecs):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003856 """Reports for missing stl includes.
3857
3858 This function will output warnings to make sure you are including the headers
3859 necessary for the stl containers and functions that you use. We only give one
3860 reason to include a header. For example, if you use both equal_to<> and
3861 less<> in a .h file, only one (the latter in the file) of these will be
3862 reported as a reason to include the <functional>.
3863
erg@google.com4e00b9a2009-01-12 23:05:11 +00003864 Args:
3865 filename: The name of the current file.
3866 clean_lines: A CleansedLines instance containing the file.
3867 include_state: An _IncludeState instance.
3868 error: The function to call with any errors found.
erg@google.come35f7652009-06-19 20:52:09 +00003869 io: The IO factory to use to read the header file. Provided for unittest
3870 injection.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003871 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00003872 required = {} # A map of header name to linenumber and the template entity.
3873 # Example of required: { '<functional>': (1219, 'less<>') }
3874
3875 for linenum in xrange(clean_lines.NumLines()):
3876 line = clean_lines.elided[linenum]
3877 if not line or line[0] == '#':
3878 continue
3879
3880 # String is special -- it is a non-templatized type in STL.
erg@google.com8a95ecc2011-09-08 00:45:54 +00003881 matched = _RE_PATTERN_STRING.search(line)
3882 if matched:
erg+personal@google.com05189642010-04-30 20:43:03 +00003883 # Don't warn about strings in non-STL namespaces:
3884 # (We check only the first match per line; good enough.)
erg@google.com8a95ecc2011-09-08 00:45:54 +00003885 prefix = line[:matched.start()]
erg+personal@google.com05189642010-04-30 20:43:03 +00003886 if prefix.endswith('std::') or not prefix.endswith('::'):
3887 required['<string>'] = (linenum, 'string')
erg@google.com4e00b9a2009-01-12 23:05:11 +00003888
3889 for pattern, template, header in _re_pattern_algorithm_header:
3890 if pattern.search(line):
3891 required[header] = (linenum, template)
3892
3893 # The following function is just a speed up, no semantics are changed.
3894 if not '<' in line: # Reduces the cpu time usage by skipping lines.
3895 continue
3896
3897 for pattern, template, header in _re_pattern_templates:
3898 if pattern.search(line):
3899 required[header] = (linenum, template)
3900
erg@google.come35f7652009-06-19 20:52:09 +00003901 # The policy is that if you #include something in foo.h you don't need to
3902 # include it again in foo.cc. Here, we will look at possible includes.
3903 # Let's copy the include_state so it is only messed up within this function.
3904 include_state = include_state.copy()
3905
3906 # Did we find the header for this file (if any) and succesfully load it?
3907 header_found = False
3908
3909 # Use the absolute path so that matching works properly.
erg@google.com90ecb622012-01-30 19:34:23 +00003910 abs_filename = FileInfo(filename).FullName()
erg@google.come35f7652009-06-19 20:52:09 +00003911
3912 # For Emacs's flymake.
3913 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
3914 # by flymake and that file name might end with '_flymake.cc'. In that case,
3915 # restore original file name here so that the corresponding header file can be
3916 # found.
3917 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
3918 # instead of 'foo_flymake.h'
erg+personal@google.com05189642010-04-30 20:43:03 +00003919 abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.come35f7652009-06-19 20:52:09 +00003920
3921 # include_state is modified during iteration, so we iterate over a copy of
3922 # the keys.
erg@google.com8a95ecc2011-09-08 00:45:54 +00003923 header_keys = include_state.keys()
3924 for header in header_keys:
erg@google.come35f7652009-06-19 20:52:09 +00003925 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
3926 fullpath = common_path + header
3927 if same_module and UpdateIncludeState(fullpath, include_state, io):
3928 header_found = True
3929
3930 # If we can't find the header file for a .cc, assume it's because we don't
3931 # know where to look. In that case we'll give up as we're not sure they
3932 # didn't include it in the .h file.
3933 # TODO(unknown): Do a better job of finding .h files so we are confident that
3934 # not having the .h file means there isn't one.
3935 if filename.endswith('.cc') and not header_found:
3936 return
3937
erg@google.com4e00b9a2009-01-12 23:05:11 +00003938 # All the lines have been processed, report the errors found.
3939 for required_header_unstripped in required:
3940 template = required[required_header_unstripped][1]
erg@google.com4e00b9a2009-01-12 23:05:11 +00003941 if required_header_unstripped.strip('<>"') not in include_state:
3942 error(filename, required[required_header_unstripped][0],
3943 'build/include_what_you_use', 4,
3944 'Add #include ' + required_header_unstripped + ' for ' + template)
3945
3946
erg@google.com8a95ecc2011-09-08 00:45:54 +00003947_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
3948
3949
3950def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
3951 """Check that make_pair's template arguments are deduced.
3952
3953 G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
3954 specified explicitly, and such use isn't intended in any case.
3955
3956 Args:
3957 filename: The name of the current file.
3958 clean_lines: A CleansedLines instance containing the file.
3959 linenum: The number of the line to check.
3960 error: The function to call with any errors found.
3961 """
3962 raw = clean_lines.raw_lines
3963 line = raw[linenum]
3964 match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
3965 if match:
3966 error(filename, linenum, 'build/explicit_make_pair',
3967 4, # 4 = high confidence
erg@google.comd350fe52013-01-14 17:51:48 +00003968 'For C++11-compatibility, omit template arguments from make_pair'
3969 ' OR use pair directly OR if appropriate, construct a pair directly')
erg@google.com8a95ecc2011-09-08 00:45:54 +00003970
3971
erg@google.comd350fe52013-01-14 17:51:48 +00003972def ProcessLine(filename, file_extension, clean_lines, line,
3973 include_state, function_state, nesting_state, error,
3974 extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003975 """Processes a single line in the file.
3976
3977 Args:
3978 filename: Filename of the file that is being processed.
3979 file_extension: The extension (dot not included) of the file.
3980 clean_lines: An array of strings, each representing a line of the file,
3981 with comments stripped.
3982 line: Number of line being processed.
3983 include_state: An _IncludeState instance in which the headers are inserted.
3984 function_state: A _FunctionState instance which counts function lines, etc.
erg@google.comd350fe52013-01-14 17:51:48 +00003985 nesting_state: A _NestingState instance which maintains information about
3986 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003987 error: A callable to which errors are reported, which takes 4 arguments:
3988 filename, line number, error level, and message
erg@google.comefeacdf2011-09-07 21:12:16 +00003989 extra_check_functions: An array of additional check functions that will be
3990 run on each source line. Each function takes 4
3991 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00003992 """
3993 raw_lines = clean_lines.raw_lines
erg+personal@google.com05189642010-04-30 20:43:03 +00003994 ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003995 nesting_state.Update(filename, clean_lines, line, error)
3996 if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
3997 return
erg@google.com4e00b9a2009-01-12 23:05:11 +00003998 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003999 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
erg@google.comd350fe52013-01-14 17:51:48 +00004000 CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004001 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
erg@google.comfd5da632013-10-25 17:39:45 +00004002 nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004003 CheckForNonStandardConstructs(filename, clean_lines, line,
erg@google.comd350fe52013-01-14 17:51:48 +00004004 nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004005 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com36649102009-03-25 21:18:36 +00004006 CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com8a95ecc2011-09-08 00:45:54 +00004007 CheckMakePairUsesDeduction(filename, clean_lines, line, error)
erg@google.comefeacdf2011-09-07 21:12:16 +00004008 for check_fn in extra_check_functions:
4009 check_fn(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004010
erg@google.comefeacdf2011-09-07 21:12:16 +00004011def ProcessFileData(filename, file_extension, lines, error,
4012 extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004013 """Performs lint checks and reports any errors to the given error function.
4014
4015 Args:
4016 filename: Filename of the file that is being processed.
4017 file_extension: The extension (dot not included) of the file.
4018 lines: An array of strings, each representing a line of the file, with the
erg@google.com8a95ecc2011-09-08 00:45:54 +00004019 last element being empty if the file is terminated with a newline.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004020 error: A callable to which errors are reported, which takes 4 arguments:
erg@google.comefeacdf2011-09-07 21:12:16 +00004021 filename, line number, error level, and message
4022 extra_check_functions: An array of additional check functions that will be
4023 run on each source line. Each function takes 4
4024 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004025 """
4026 lines = (['// marker so line numbers and indices both start at 1'] + lines +
4027 ['// marker so line numbers end in a known way'])
4028
4029 include_state = _IncludeState()
4030 function_state = _FunctionState()
erg@google.comd350fe52013-01-14 17:51:48 +00004031 nesting_state = _NestingState()
erg@google.com4e00b9a2009-01-12 23:05:11 +00004032
erg+personal@google.com05189642010-04-30 20:43:03 +00004033 ResetNolintSuppressions()
4034
erg@google.com4e00b9a2009-01-12 23:05:11 +00004035 CheckForCopyright(filename, lines, error)
4036
4037 if file_extension == 'h':
4038 CheckForHeaderGuard(filename, lines, error)
4039
4040 RemoveMultiLineComments(filename, lines, error)
4041 clean_lines = CleansedLines(lines)
4042 for line in xrange(clean_lines.NumLines()):
4043 ProcessLine(filename, file_extension, clean_lines, line,
erg@google.comd350fe52013-01-14 17:51:48 +00004044 include_state, function_state, nesting_state, error,
erg@google.comefeacdf2011-09-07 21:12:16 +00004045 extra_check_functions)
erg@google.comd350fe52013-01-14 17:51:48 +00004046 nesting_state.CheckClassFinished(filename, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004047
4048 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
4049
4050 # We check here rather than inside ProcessLine so that we see raw
4051 # lines rather than "cleaned" lines.
4052 CheckForUnicodeReplacementCharacters(filename, lines, error)
4053
4054 CheckForNewlineAtEOF(filename, lines, error)
4055
erg@google.comefeacdf2011-09-07 21:12:16 +00004056def ProcessFile(filename, vlevel, extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004057 """Does google-lint on a single file.
4058
4059 Args:
4060 filename: The name of the file to parse.
4061
4062 vlevel: The level of errors to report. Every error of confidence
4063 >= verbose_level will be reported. 0 is a good default.
erg@google.comefeacdf2011-09-07 21:12:16 +00004064
4065 extra_check_functions: An array of additional check functions that will be
4066 run on each source line. Each function takes 4
4067 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004068 """
4069
4070 _SetVerboseLevel(vlevel)
4071
4072 try:
4073 # Support the UNIX convention of using "-" for stdin. Note that
4074 # we are not opening the file with universal newline support
4075 # (which codecs doesn't support anyway), so the resulting lines do
4076 # contain trailing '\r' characters if we are reading a file that
4077 # has CRLF endings.
4078 # If after the split a trailing '\r' is present, it is removed
4079 # below. If it is not expected to be present (i.e. os.linesep !=
4080 # '\r\n' as in Windows), a warning is issued below if this file
4081 # is processed.
4082
4083 if filename == '-':
4084 lines = codecs.StreamReaderWriter(sys.stdin,
4085 codecs.getreader('utf8'),
4086 codecs.getwriter('utf8'),
4087 'replace').read().split('\n')
4088 else:
4089 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
4090
4091 carriage_return_found = False
4092 # Remove trailing '\r'.
4093 for linenum in range(len(lines)):
4094 if lines[linenum].endswith('\r'):
4095 lines[linenum] = lines[linenum].rstrip('\r')
4096 carriage_return_found = True
4097
4098 except IOError:
4099 sys.stderr.write(
4100 "Skipping input '%s': Can't open for reading\n" % filename)
4101 return
4102
4103 # Note, if no dot is found, this will give the entire filename as the ext.
4104 file_extension = filename[filename.rfind('.') + 1:]
4105
4106 # When reading from stdin, the extension is unknown, so no cpplint tests
4107 # should rely on the extension.
4108 if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
4109 and file_extension != 'cpp'):
4110 sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
4111 else:
erg@google.comefeacdf2011-09-07 21:12:16 +00004112 ProcessFileData(filename, file_extension, lines, Error,
4113 extra_check_functions)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004114 if carriage_return_found and os.linesep != '\r\n':
erg@google.com8a95ecc2011-09-08 00:45:54 +00004115 # Use 0 for linenum since outputting only one error for potentially
erg@google.com4e00b9a2009-01-12 23:05:11 +00004116 # several lines.
4117 Error(filename, 0, 'whitespace/newline', 1,
4118 'One or more unexpected \\r (^M) found;'
4119 'better to use only a \\n')
4120
4121 sys.stderr.write('Done processing %s\n' % filename)
4122
4123
4124def PrintUsage(message):
4125 """Prints a brief usage string and exits, optionally with an error message.
4126
4127 Args:
4128 message: The optional error message.
4129 """
4130 sys.stderr.write(_USAGE)
4131 if message:
4132 sys.exit('\nFATAL ERROR: ' + message)
4133 else:
4134 sys.exit(1)
4135
4136
4137def PrintCategories():
4138 """Prints a list of all the error-categories used by error messages.
4139
4140 These are the categories used to filter messages via --filter.
4141 """
erg+personal@google.com05189642010-04-30 20:43:03 +00004142 sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com4e00b9a2009-01-12 23:05:11 +00004143 sys.exit(0)
4144
4145
4146def ParseArguments(args):
4147 """Parses the command line arguments.
4148
4149 This may set the output format and verbosity level as side-effects.
4150
4151 Args:
4152 args: The command line arguments:
4153
4154 Returns:
4155 The list of filenames to lint.
4156 """
4157 try:
4158 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.coma868d2d2009-10-09 21:18:45 +00004159 'counting=',
erg@google.com4d70a882013-04-16 21:06:32 +00004160 'filter=',
4161 'root='])
erg@google.com4e00b9a2009-01-12 23:05:11 +00004162 except getopt.GetoptError:
4163 PrintUsage('Invalid arguments.')
4164
4165 verbosity = _VerboseLevel()
4166 output_format = _OutputFormat()
4167 filters = ''
erg@google.coma868d2d2009-10-09 21:18:45 +00004168 counting_style = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +00004169
4170 for (opt, val) in opts:
4171 if opt == '--help':
4172 PrintUsage(None)
4173 elif opt == '--output':
erg@google.com02c27fd2013-05-28 21:34:34 +00004174 if not val in ('emacs', 'vs7', 'eclipse'):
4175 PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004176 output_format = val
4177 elif opt == '--verbose':
4178 verbosity = int(val)
4179 elif opt == '--filter':
4180 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00004181 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00004182 PrintCategories()
erg@google.coma868d2d2009-10-09 21:18:45 +00004183 elif opt == '--counting':
4184 if val not in ('total', 'toplevel', 'detailed'):
4185 PrintUsage('Valid counting options are total, toplevel, and detailed')
4186 counting_style = val
erg@google.com4d70a882013-04-16 21:06:32 +00004187 elif opt == '--root':
4188 global _root
4189 _root = val
erg@google.com4e00b9a2009-01-12 23:05:11 +00004190
4191 if not filenames:
4192 PrintUsage('No files were specified.')
4193
4194 _SetOutputFormat(output_format)
4195 _SetVerboseLevel(verbosity)
4196 _SetFilters(filters)
erg@google.coma868d2d2009-10-09 21:18:45 +00004197 _SetCountingStyle(counting_style)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004198
4199 return filenames
4200
4201
4202def main():
4203 filenames = ParseArguments(sys.argv[1:])
4204
4205 # Change stderr to write with replacement characters so we don't die
4206 # if we try to print something containing non-ASCII characters.
4207 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
4208 codecs.getreader('utf8'),
4209 codecs.getwriter('utf8'),
4210 'replace')
4211
erg@google.coma868d2d2009-10-09 21:18:45 +00004212 _cpplint_state.ResetErrorCounts()
erg@google.com4e00b9a2009-01-12 23:05:11 +00004213 for filename in filenames:
4214 ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.coma868d2d2009-10-09 21:18:45 +00004215 _cpplint_state.PrintErrorCounts()
4216
erg@google.com4e00b9a2009-01-12 23:05:11 +00004217 sys.exit(_cpplint_state.error_count > 0)
4218
4219
4220if __name__ == '__main__':
4221 main()