blob: 7ca38625effc0f63808ecdd67fdb9a52f2036f95 [file] [log] [blame]
erg@google.com720121a2012-05-11 16:31:47 +00001#!/usr/bin/python
erg@google.com4e00b9a2009-01-12 23:05:11 +00002#
erg@google.com8f91ab22011-09-06 21:04:45 +00003# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004#
erg@google.com969161c2009-06-26 22:06:46 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
erg@google.com4e00b9a2009-01-12 23:05:11 +00008#
erg@google.com969161c2009-06-26 22:06:46 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
erg@google.com4e00b9a2009-01-12 23:05:11 +000018#
erg@google.com969161c2009-06-26 22:06:46 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com4e00b9a2009-01-12 23:05:11 +000030
erg@google.com4e00b9a2009-01-12 23:05:11 +000031"""Does google-lint on c++ files.
32
33The goal of this script is to identify places in the code that *may*
34be in non-compliance with google style. It does not attempt to fix
35up these problems -- the point is to educate. It does also not
36attempt to find all problems, or to ensure that everything it does
37find is legitimately a problem.
38
39In particular, we can get very confused by /* and // inside strings!
40We do a small hack, which is to ignore //'s with "'s after them on the
41same line, but it is far from perfect (in either direction).
42"""
43
44import codecs
erg@google.comd350fe52013-01-14 17:51:48 +000045import copy
erg@google.com4e00b9a2009-01-12 23:05:11 +000046import getopt
47import math # for log
48import os
49import re
50import sre_compile
51import string
52import sys
53import unicodedata
54
55
56_USAGE = """
57Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.comab53edf2013-11-05 22:23:37 +000058 [--counting=total|toplevel|detailed] [--root=subdir]
59 [--linelength=digits]
erg@google.com4e00b9a2009-01-12 23:05:11 +000060 <file> [file] ...
61
62 The style guidelines this tries to follow are those in
63 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
64
65 Every problem is given a confidence score from 1-5, with 5 meaning we are
66 certain of the problem, and 1 meaning it could be a legitimate construct.
67 This will miss some errors, and is not a substitute for a code review.
68
erg+personal@google.com05189642010-04-30 20:43:03 +000069 To suppress false-positive errors of a certain category, add a
70 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
71 suppresses errors of all categories on that line.
erg@google.com4e00b9a2009-01-12 23:05:11 +000072
73 The files passed in will be linted; at least one file must be provided.
74 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
75
76 Flags:
77
78 output=vs7
79 By default, the output is formatted to ease emacs parsing. Visual Studio
80 compatible output (vs7) may also be used. Other formats are unsupported.
81
82 verbose=#
83 Specify a number 0-5 to restrict errors to certain verbosity levels.
84
85 filter=-x,+y,...
86 Specify a comma-separated list of category-filters to apply: only
87 error messages whose category names pass the filters will be printed.
88 (Category names are printed with the message and look like
89 "[whitespace/indent]".) Filters are evaluated left to right.
90 "-FOO" and "FOO" means "do not print categories that start with FOO".
91 "+FOO" means "do print categories that start with FOO".
92
93 Examples: --filter=-whitespace,+whitespace/braces
94 --filter=whitespace,runtime/printf,+runtime/printf_format
95 --filter=-,+build/include_what_you_use
96
97 To see a list of all the categories used in cpplint, pass no arg:
98 --filter=
erg@google.coma868d2d2009-10-09 21:18:45 +000099
100 counting=total|toplevel|detailed
101 The total number of errors found is always printed. If
102 'toplevel' is provided, then the count of errors in each of
103 the top-level categories like 'build' and 'whitespace' will
104 also be printed. If 'detailed' is provided, then a count
105 is provided for each category like 'build/class'.
erg@google.com4d70a882013-04-16 21:06:32 +0000106
107 root=subdir
108 The root directory used for deriving header guard CPP variable.
109 By default, the header guard CPP variable is calculated as the relative
110 path to the directory that contains .git, .hg, or .svn. When this flag
111 is specified, the relative path is calculated from the specified
112 directory. If the specified directory does not exist, this flag is
113 ignored.
114
115 Examples:
116 Assuing that src/.git exists, the header guard CPP variables for
117 src/chrome/browser/ui/browser.h are:
118
119 No flag => CHROME_BROWSER_UI_BROWSER_H_
120 --root=chrome => BROWSER_UI_BROWSER_H_
121 --root=chrome/browser => UI_BROWSER_H_
erg@google.comab53edf2013-11-05 22:23:37 +0000122
123 linelength=digits
124 This is the allowed line length for the project. The default value is
125 80 characters.
126
127 Examples:
128 --linelength=120
erg@google.com4e00b9a2009-01-12 23:05:11 +0000129"""
130
131# We categorize each error message we print. Here are the categories.
132# We want an explicit list so we can list them all in cpplint --filter=.
133# If you add a new error message with a new category, add it to the list
134# here! cpplint_unittest.py should tell you if you forget to do this.
erg+personal@google.com05189642010-04-30 20:43:03 +0000135_ERROR_CATEGORIES = [
136 'build/class',
137 'build/deprecated',
138 'build/endif_comment',
erg@google.com8a95ecc2011-09-08 00:45:54 +0000139 'build/explicit_make_pair',
erg+personal@google.com05189642010-04-30 20:43:03 +0000140 'build/forward_decl',
141 'build/header_guard',
142 'build/include',
143 'build/include_alpha',
144 'build/include_order',
145 'build/include_what_you_use',
146 'build/namespaces',
147 'build/printf_format',
148 'build/storage_class',
149 'legal/copyright',
erg@google.comd350fe52013-01-14 17:51:48 +0000150 'readability/alt_tokens',
erg+personal@google.com05189642010-04-30 20:43:03 +0000151 'readability/braces',
152 'readability/casting',
153 'readability/check',
154 'readability/constructors',
155 'readability/fn_size',
156 'readability/function',
157 'readability/multiline_comment',
158 'readability/multiline_string',
erg@google.comd350fe52013-01-14 17:51:48 +0000159 'readability/namespace',
erg+personal@google.com05189642010-04-30 20:43:03 +0000160 'readability/nolint',
erg@google.com2aa59982013-10-28 19:09:25 +0000161 'readability/nul',
erg+personal@google.com05189642010-04-30 20:43:03 +0000162 'readability/streams',
163 'readability/todo',
164 'readability/utf8',
165 'runtime/arrays',
166 'runtime/casting',
167 'runtime/explicit',
168 'runtime/int',
169 'runtime/init',
170 'runtime/invalid_increment',
171 'runtime/member_string_references',
172 'runtime/memset',
173 'runtime/operator',
174 'runtime/printf',
175 'runtime/printf_format',
176 'runtime/references',
erg+personal@google.com05189642010-04-30 20:43:03 +0000177 'runtime/string',
178 'runtime/threadsafe_fn',
erg@google.com2aa59982013-10-28 19:09:25 +0000179 'runtime/vlog',
180 'whitespace/blank_line',
181 'whitespace/braces',
182 'whitespace/comma',
183 'whitespace/comments',
184 'whitespace/empty_conditional_body',
185 'whitespace/empty_loop_body',
186 'whitespace/end_of_line',
187 'whitespace/ending_newline',
188 'whitespace/forcolon',
erg+personal@google.com05189642010-04-30 20:43:03 +0000189 'whitespace/indent',
erg+personal@google.com05189642010-04-30 20:43:03 +0000190 'whitespace/line_length',
191 'whitespace/newline',
192 'whitespace/operators',
193 'whitespace/parens',
194 'whitespace/semicolon',
195 'whitespace/tab',
196 'whitespace/todo'
197 ]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000198
erg@google.come35f7652009-06-19 20:52:09 +0000199# The default state of the category filter. This is overrided by the --filter=
200# flag. By default all errors are on, so only add here categories that should be
201# off by default (i.e., categories that must be enabled by the --filter= flags).
202# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com8a95ecc2011-09-08 00:45:54 +0000203_DEFAULT_FILTERS = ['-build/include_alpha']
erg@google.come35f7652009-06-19 20:52:09 +0000204
erg@google.com4e00b9a2009-01-12 23:05:11 +0000205# We used to check for high-bit characters, but after much discussion we
206# decided those were OK, as long as they were in UTF-8 and didn't represent
erg@google.com8a95ecc2011-09-08 00:45:54 +0000207# hard-coded international strings, which belong in a separate i18n file.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000208
erg@google.com4e00b9a2009-01-12 23:05:11 +0000209
erg@google.comfd5da632013-10-25 17:39:45 +0000210# C++ headers
erg@google.com4e00b9a2009-01-12 23:05:11 +0000211_CPP_HEADERS = frozenset([
erg@google.comfd5da632013-10-25 17:39:45 +0000212 # Legacy
213 'algobase.h',
214 'algo.h',
215 'alloc.h',
216 'builtinbuf.h',
217 'bvector.h',
218 'complex.h',
219 'defalloc.h',
220 'deque.h',
221 'editbuf.h',
222 'fstream.h',
223 'function.h',
224 'hash_map',
225 'hash_map.h',
226 'hash_set',
227 'hash_set.h',
228 'hashtable.h',
229 'heap.h',
230 'indstream.h',
231 'iomanip.h',
232 'iostream.h',
233 'istream.h',
234 'iterator.h',
235 'list.h',
236 'map.h',
237 'multimap.h',
238 'multiset.h',
239 'ostream.h',
240 'pair.h',
241 'parsestream.h',
242 'pfstream.h',
243 'procbuf.h',
244 'pthread_alloc',
245 'pthread_alloc.h',
246 'rope',
247 'rope.h',
248 'ropeimpl.h',
249 'set.h',
250 'slist',
251 'slist.h',
252 'stack.h',
253 'stdiostream.h',
254 'stl_alloc.h',
255 'stl_relops.h',
256 'streambuf.h',
257 'stream.h',
258 'strfile.h',
259 'strstream.h',
260 'tempbuf.h',
261 'tree.h',
262 'type_traits.h',
263 'vector.h',
264 # 17.6.1.2 C++ library headers
265 'algorithm',
266 'array',
267 'atomic',
268 'bitset',
269 'chrono',
270 'codecvt',
271 'complex',
272 'condition_variable',
273 'deque',
274 'exception',
275 'forward_list',
276 'fstream',
277 'functional',
278 'future',
279 'initializer_list',
280 'iomanip',
281 'ios',
282 'iosfwd',
283 'iostream',
284 'istream',
285 'iterator',
286 'limits',
287 'list',
288 'locale',
289 'map',
290 'memory',
291 'mutex',
292 'new',
293 'numeric',
294 'ostream',
295 'queue',
296 'random',
297 'ratio',
298 'regex',
299 'set',
300 'sstream',
301 'stack',
302 'stdexcept',
303 'streambuf',
304 'string',
305 'strstream',
306 'system_error',
307 'thread',
308 'tuple',
309 'typeindex',
310 'typeinfo',
311 'type_traits',
312 'unordered_map',
313 'unordered_set',
314 'utility',
erg@google.com5d00c562013-07-12 19:57:05 +0000315 'valarray',
erg@google.comfd5da632013-10-25 17:39:45 +0000316 'vector',
317 # 17.6.1.2 C++ headers for C library facilities
318 'cassert',
319 'ccomplex',
320 'cctype',
321 'cerrno',
322 'cfenv',
323 'cfloat',
324 'cinttypes',
325 'ciso646',
326 'climits',
327 'clocale',
328 'cmath',
329 'csetjmp',
330 'csignal',
331 'cstdalign',
332 'cstdarg',
333 'cstdbool',
334 'cstddef',
335 'cstdint',
336 'cstdio',
337 'cstdlib',
338 'cstring',
339 'ctgmath',
340 'ctime',
341 'cuchar',
342 'cwchar',
343 'cwctype',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000344 ])
345
erg@google.com4e00b9a2009-01-12 23:05:11 +0000346# Assertion macros. These are defined in base/logging.h and
347# testing/base/gunit.h. Note that the _M versions need to come first
348# for substring matching to work.
349_CHECK_MACROS = [
erg@google.come35f7652009-06-19 20:52:09 +0000350 'DCHECK', 'CHECK',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000351 'EXPECT_TRUE_M', 'EXPECT_TRUE',
352 'ASSERT_TRUE_M', 'ASSERT_TRUE',
353 'EXPECT_FALSE_M', 'EXPECT_FALSE',
354 'ASSERT_FALSE_M', 'ASSERT_FALSE',
355 ]
356
erg@google.come35f7652009-06-19 20:52:09 +0000357# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com4e00b9a2009-01-12 23:05:11 +0000358_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
359
360for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
361 ('>=', 'GE'), ('>', 'GT'),
362 ('<=', 'LE'), ('<', 'LT')]:
erg@google.come35f7652009-06-19 20:52:09 +0000363 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com4e00b9a2009-01-12 23:05:11 +0000364 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
365 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
366 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
367 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
368 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
369
370for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
371 ('>=', 'LT'), ('>', 'LE'),
372 ('<=', 'GT'), ('<', 'GE')]:
373 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
374 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
375 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
376 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
377
erg@google.comd350fe52013-01-14 17:51:48 +0000378# Alternative tokens and their replacements. For full list, see section 2.5
379# Alternative tokens [lex.digraph] in the C++ standard.
380#
381# Digraphs (such as '%:') are not included here since it's a mess to
382# match those on a word boundary.
383_ALT_TOKEN_REPLACEMENT = {
384 'and': '&&',
385 'bitor': '|',
386 'or': '||',
387 'xor': '^',
388 'compl': '~',
389 'bitand': '&',
390 'and_eq': '&=',
391 'or_eq': '|=',
392 'xor_eq': '^=',
393 'not': '!',
394 'not_eq': '!='
395 }
396
397# Compile regular expression that matches all the above keywords. The "[ =()]"
398# bit is meant to avoid matching these keywords outside of boolean expressions.
399#
erg@google.comc6671232013-10-25 21:44:03 +0000400# False positives include C-style multi-line comments and multi-line strings
401# but those have always been troublesome for cpplint.
erg@google.comd350fe52013-01-14 17:51:48 +0000402_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
403 r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
404
erg@google.com4e00b9a2009-01-12 23:05:11 +0000405
406# These constants define types of headers for use with
407# _IncludeState.CheckNextIncludeOrder().
408_C_SYS_HEADER = 1
409_CPP_SYS_HEADER = 2
410_LIKELY_MY_HEADER = 3
411_POSSIBLE_MY_HEADER = 4
412_OTHER_HEADER = 5
413
erg@google.comd350fe52013-01-14 17:51:48 +0000414# These constants define the current inline assembly state
415_NO_ASM = 0 # Outside of inline assembly block
416_INSIDE_ASM = 1 # Inside inline assembly block
417_END_ASM = 2 # Last line of inline assembly block
418_BLOCK_ASM = 3 # The whole block is an inline assembly block
419
420# Match start of assembly blocks
421_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
422 r'(?:\s+(volatile|__volatile__))?'
423 r'\s*[{(]')
424
erg@google.com4e00b9a2009-01-12 23:05:11 +0000425
426_regexp_compile_cache = {}
427
erg+personal@google.com05189642010-04-30 20:43:03 +0000428# Finds occurrences of NOLINT or NOLINT(...).
429_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
430
431# {str, set(int)}: a map from error categories to sets of linenumbers
432# on which those errors are expected and should be suppressed.
433_error_suppressions = {}
434
erg@google.com4d70a882013-04-16 21:06:32 +0000435# The root directory used for deriving header guard CPP variable.
436# This is set by --root flag.
437_root = None
438
erg@google.comab53edf2013-11-05 22:23:37 +0000439# The allowed line length of files.
440# This is set by --linelength flag.
441_line_length = 80
442
erg+personal@google.com05189642010-04-30 20:43:03 +0000443def ParseNolintSuppressions(filename, raw_line, linenum, error):
444 """Updates the global list of error-suppressions.
445
446 Parses any NOLINT comments on the current line, updating the global
447 error_suppressions store. Reports an error if the NOLINT comment
448 was malformed.
449
450 Args:
451 filename: str, the name of the input file.
452 raw_line: str, the line of input text, with comments.
453 linenum: int, the number of the current line.
454 error: function, an error handler.
455 """
456 # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
erg@google.com8a95ecc2011-09-08 00:45:54 +0000457 matched = _RE_SUPPRESSION.search(raw_line)
458 if matched:
459 category = matched.group(1)
erg+personal@google.com05189642010-04-30 20:43:03 +0000460 if category in (None, '(*)'): # => "suppress all"
461 _error_suppressions.setdefault(None, set()).add(linenum)
462 else:
463 if category.startswith('(') and category.endswith(')'):
464 category = category[1:-1]
465 if category in _ERROR_CATEGORIES:
466 _error_suppressions.setdefault(category, set()).add(linenum)
467 else:
468 error(filename, linenum, 'readability/nolint', 5,
erg@google.com8a95ecc2011-09-08 00:45:54 +0000469 'Unknown NOLINT error category: %s' % category)
erg+personal@google.com05189642010-04-30 20:43:03 +0000470
471
472def ResetNolintSuppressions():
473 "Resets the set of NOLINT suppressions to empty."
474 _error_suppressions.clear()
475
476
477def IsErrorSuppressedByNolint(category, linenum):
478 """Returns true if the specified error category is suppressed on this line.
479
480 Consults the global error_suppressions map populated by
481 ParseNolintSuppressions/ResetNolintSuppressions.
482
483 Args:
484 category: str, the category of the error.
485 linenum: int, the current line number.
486 Returns:
487 bool, True iff the error should be suppressed due to a NOLINT comment.
488 """
489 return (linenum in _error_suppressions.get(category, set()) or
490 linenum in _error_suppressions.get(None, set()))
erg@google.com4e00b9a2009-01-12 23:05:11 +0000491
492def Match(pattern, s):
493 """Matches the string with the pattern, caching the compiled regexp."""
494 # The regexp compilation caching is inlined in both Match and Search for
495 # performance reasons; factoring it out into a separate function turns out
496 # to be noticeably expensive.
erg@google.comc6671232013-10-25 21:44:03 +0000497 if pattern not in _regexp_compile_cache:
erg@google.com4e00b9a2009-01-12 23:05:11 +0000498 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
499 return _regexp_compile_cache[pattern].match(s)
500
501
erg@google.comfd5da632013-10-25 17:39:45 +0000502def ReplaceAll(pattern, rep, s):
503 """Replaces instances of pattern in a string with a replacement.
504
505 The compiled regex is kept in a cache shared by Match and Search.
506
507 Args:
508 pattern: regex pattern
509 rep: replacement text
510 s: search string
511
512 Returns:
513 string with replacements made (or original string if no replacements)
514 """
515 if pattern not in _regexp_compile_cache:
516 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
517 return _regexp_compile_cache[pattern].sub(rep, s)
518
519
erg@google.com4e00b9a2009-01-12 23:05:11 +0000520def Search(pattern, s):
521 """Searches the string for the pattern, caching the compiled regexp."""
erg@google.comc6671232013-10-25 21:44:03 +0000522 if pattern not in _regexp_compile_cache:
erg@google.com4e00b9a2009-01-12 23:05:11 +0000523 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
524 return _regexp_compile_cache[pattern].search(s)
525
526
527class _IncludeState(dict):
528 """Tracks line numbers for includes, and the order in which includes appear.
529
530 As a dict, an _IncludeState object serves as a mapping between include
531 filename and line number on which that file was included.
532
533 Call CheckNextIncludeOrder() once for each header in the file, passing
534 in the type constants defined above. Calls in an illegal order will
535 raise an _IncludeError with an appropriate error message.
536
537 """
538 # self._section will move monotonically through this set. If it ever
539 # needs to move backwards, CheckNextIncludeOrder will raise an error.
540 _INITIAL_SECTION = 0
541 _MY_H_SECTION = 1
542 _C_SECTION = 2
543 _CPP_SECTION = 3
544 _OTHER_H_SECTION = 4
545
546 _TYPE_NAMES = {
547 _C_SYS_HEADER: 'C system header',
548 _CPP_SYS_HEADER: 'C++ system header',
549 _LIKELY_MY_HEADER: 'header this file implements',
550 _POSSIBLE_MY_HEADER: 'header this file may implement',
551 _OTHER_HEADER: 'other header',
552 }
553 _SECTION_NAMES = {
554 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
555 _MY_H_SECTION: 'a header this file implements',
556 _C_SECTION: 'C system header',
557 _CPP_SECTION: 'C++ system header',
558 _OTHER_H_SECTION: 'other header',
559 }
560
561 def __init__(self):
562 dict.__init__(self)
erg@google.com2aa59982013-10-28 19:09:25 +0000563 self.ResetSection()
564
565 def ResetSection(self):
erg@google.coma868d2d2009-10-09 21:18:45 +0000566 # The name of the current section.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000567 self._section = self._INITIAL_SECTION
erg@google.coma868d2d2009-10-09 21:18:45 +0000568 # The path of last found header.
569 self._last_header = ''
570
erg@google.comfd5da632013-10-25 17:39:45 +0000571 def SetLastHeader(self, header_path):
572 self._last_header = header_path
573
erg@google.coma868d2d2009-10-09 21:18:45 +0000574 def CanonicalizeAlphabeticalOrder(self, header_path):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000575 """Returns a path canonicalized for alphabetical comparison.
erg@google.coma868d2d2009-10-09 21:18:45 +0000576
577 - replaces "-" with "_" so they both cmp the same.
578 - removes '-inl' since we don't require them to be after the main header.
579 - lowercase everything, just in case.
580
581 Args:
582 header_path: Path to be canonicalized.
583
584 Returns:
585 Canonicalized path.
586 """
587 return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
588
erg@google.comfd5da632013-10-25 17:39:45 +0000589 def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
erg@google.coma868d2d2009-10-09 21:18:45 +0000590 """Check if a header is in alphabetical order with the previous header.
591
592 Args:
erg@google.comfd5da632013-10-25 17:39:45 +0000593 clean_lines: A CleansedLines instance containing the file.
594 linenum: The number of the line to check.
595 header_path: Canonicalized header to be checked.
erg@google.coma868d2d2009-10-09 21:18:45 +0000596
597 Returns:
598 Returns true if the header is in alphabetical order.
599 """
erg@google.comfd5da632013-10-25 17:39:45 +0000600 # If previous section is different from current section, _last_header will
601 # be reset to empty string, so it's always less than current header.
602 #
603 # If previous line was a blank line, assume that the headers are
604 # intentionally sorted the way they are.
605 if (self._last_header > header_path and
606 not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
erg@google.coma868d2d2009-10-09 21:18:45 +0000607 return False
erg@google.coma868d2d2009-10-09 21:18:45 +0000608 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +0000609
610 def CheckNextIncludeOrder(self, header_type):
611 """Returns a non-empty error message if the next header is out of order.
612
613 This function also updates the internal state to be ready to check
614 the next include.
615
616 Args:
617 header_type: One of the _XXX_HEADER constants defined above.
618
619 Returns:
620 The empty string if the header is in the right order, or an
621 error message describing what's wrong.
622
623 """
624 error_message = ('Found %s after %s' %
625 (self._TYPE_NAMES[header_type],
626 self._SECTION_NAMES[self._section]))
627
erg@google.coma868d2d2009-10-09 21:18:45 +0000628 last_section = self._section
629
erg@google.com4e00b9a2009-01-12 23:05:11 +0000630 if header_type == _C_SYS_HEADER:
631 if self._section <= self._C_SECTION:
632 self._section = self._C_SECTION
633 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000634 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000635 return error_message
636 elif header_type == _CPP_SYS_HEADER:
637 if self._section <= self._CPP_SECTION:
638 self._section = self._CPP_SECTION
639 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000640 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000641 return error_message
642 elif header_type == _LIKELY_MY_HEADER:
643 if self._section <= self._MY_H_SECTION:
644 self._section = self._MY_H_SECTION
645 else:
646 self._section = self._OTHER_H_SECTION
647 elif header_type == _POSSIBLE_MY_HEADER:
648 if self._section <= self._MY_H_SECTION:
649 self._section = self._MY_H_SECTION
650 else:
651 # This will always be the fallback because we're not sure
652 # enough that the header is associated with this file.
653 self._section = self._OTHER_H_SECTION
654 else:
655 assert header_type == _OTHER_HEADER
656 self._section = self._OTHER_H_SECTION
657
erg@google.coma868d2d2009-10-09 21:18:45 +0000658 if last_section != self._section:
659 self._last_header = ''
660
erg@google.com4e00b9a2009-01-12 23:05:11 +0000661 return ''
662
663
664class _CppLintState(object):
665 """Maintains module-wide state.."""
666
667 def __init__(self):
668 self.verbose_level = 1 # global setting.
669 self.error_count = 0 # global count of reported errors
erg@google.come35f7652009-06-19 20:52:09 +0000670 # filters to apply when emitting error messages
671 self.filters = _DEFAULT_FILTERS[:]
erg@google.coma868d2d2009-10-09 21:18:45 +0000672 self.counting = 'total' # In what way are we counting errors?
673 self.errors_by_category = {} # string to int dict storing error counts
erg@google.com4e00b9a2009-01-12 23:05:11 +0000674
675 # output format:
676 # "emacs" - format that emacs can parse (default)
677 # "vs7" - format that Microsoft Visual Studio 7 can parse
678 self.output_format = 'emacs'
679
680 def SetOutputFormat(self, output_format):
681 """Sets the output format for errors."""
682 self.output_format = output_format
683
684 def SetVerboseLevel(self, level):
685 """Sets the module's verbosity, and returns the previous setting."""
686 last_verbose_level = self.verbose_level
687 self.verbose_level = level
688 return last_verbose_level
689
erg@google.coma868d2d2009-10-09 21:18:45 +0000690 def SetCountingStyle(self, counting_style):
691 """Sets the module's counting options."""
692 self.counting = counting_style
693
erg@google.com4e00b9a2009-01-12 23:05:11 +0000694 def SetFilters(self, filters):
695 """Sets the error-message filters.
696
697 These filters are applied when deciding whether to emit a given
698 error message.
699
700 Args:
701 filters: A string of comma-separated filters (eg "+whitespace/indent").
702 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000703
704 Raises:
705 ValueError: The comma-separated filters did not all start with '+' or '-'.
706 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000707 """
erg@google.come35f7652009-06-19 20:52:09 +0000708 # Default filters always have less priority than the flag ones.
709 self.filters = _DEFAULT_FILTERS[:]
710 for filt in filters.split(','):
711 clean_filt = filt.strip()
712 if clean_filt:
713 self.filters.append(clean_filt)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000714 for filt in self.filters:
715 if not (filt.startswith('+') or filt.startswith('-')):
716 raise ValueError('Every filter in --filters must start with + or -'
717 ' (%s does not)' % filt)
718
erg@google.coma868d2d2009-10-09 21:18:45 +0000719 def ResetErrorCounts(self):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000720 """Sets the module's error statistic back to zero."""
721 self.error_count = 0
erg@google.coma868d2d2009-10-09 21:18:45 +0000722 self.errors_by_category = {}
erg@google.com4e00b9a2009-01-12 23:05:11 +0000723
erg@google.coma868d2d2009-10-09 21:18:45 +0000724 def IncrementErrorCount(self, category):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000725 """Bumps the module's error statistic."""
726 self.error_count += 1
erg@google.coma868d2d2009-10-09 21:18:45 +0000727 if self.counting in ('toplevel', 'detailed'):
728 if self.counting != 'detailed':
729 category = category.split('/')[0]
730 if category not in self.errors_by_category:
731 self.errors_by_category[category] = 0
732 self.errors_by_category[category] += 1
erg@google.com4e00b9a2009-01-12 23:05:11 +0000733
erg@google.coma868d2d2009-10-09 21:18:45 +0000734 def PrintErrorCounts(self):
735 """Print a summary of errors by category, and the total."""
736 for category, count in self.errors_by_category.iteritems():
737 sys.stderr.write('Category \'%s\' errors found: %d\n' %
738 (category, count))
739 sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000740
741_cpplint_state = _CppLintState()
742
743
744def _OutputFormat():
745 """Gets the module's output format."""
746 return _cpplint_state.output_format
747
748
749def _SetOutputFormat(output_format):
750 """Sets the module's output format."""
751 _cpplint_state.SetOutputFormat(output_format)
752
753
754def _VerboseLevel():
755 """Returns the module's verbosity setting."""
756 return _cpplint_state.verbose_level
757
758
759def _SetVerboseLevel(level):
760 """Sets the module's verbosity, and returns the previous setting."""
761 return _cpplint_state.SetVerboseLevel(level)
762
763
erg@google.coma868d2d2009-10-09 21:18:45 +0000764def _SetCountingStyle(level):
765 """Sets the module's counting options."""
766 _cpplint_state.SetCountingStyle(level)
767
768
erg@google.com4e00b9a2009-01-12 23:05:11 +0000769def _Filters():
770 """Returns the module's list of output filters, as a list."""
771 return _cpplint_state.filters
772
773
774def _SetFilters(filters):
775 """Sets the module's error-message filters.
776
777 These filters are applied when deciding whether to emit a given
778 error message.
779
780 Args:
781 filters: A string of comma-separated filters (eg "whitespace/indent").
782 Each filter should start with + or -; else we die.
783 """
784 _cpplint_state.SetFilters(filters)
785
786
787class _FunctionState(object):
788 """Tracks current function name and the number of lines in its body."""
789
790 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
791 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
792
793 def __init__(self):
794 self.in_a_function = False
795 self.lines_in_function = 0
796 self.current_function = ''
797
798 def Begin(self, function_name):
799 """Start analyzing function body.
800
801 Args:
802 function_name: The name of the function being tracked.
803 """
804 self.in_a_function = True
805 self.lines_in_function = 0
806 self.current_function = function_name
807
808 def Count(self):
809 """Count line in current function body."""
810 if self.in_a_function:
811 self.lines_in_function += 1
812
813 def Check(self, error, filename, linenum):
814 """Report if too many lines in function body.
815
816 Args:
817 error: The function to call with any errors found.
818 filename: The name of the current file.
819 linenum: The number of the line to check.
820 """
821 if Match(r'T(EST|est)', self.current_function):
822 base_trigger = self._TEST_TRIGGER
823 else:
824 base_trigger = self._NORMAL_TRIGGER
825 trigger = base_trigger * 2**_VerboseLevel()
826
827 if self.lines_in_function > trigger:
828 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
829 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
830 if error_level > 5:
831 error_level = 5
832 error(filename, linenum, 'readability/fn_size', error_level,
833 'Small and focused functions are preferred:'
834 ' %s has %d non-comment lines'
835 ' (error triggered by exceeding %d lines).' % (
836 self.current_function, self.lines_in_function, trigger))
837
838 def End(self):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000839 """Stop analyzing function body."""
erg@google.com4e00b9a2009-01-12 23:05:11 +0000840 self.in_a_function = False
841
842
843class _IncludeError(Exception):
844 """Indicates a problem with the include order in a file."""
845 pass
846
847
848class FileInfo:
849 """Provides utility functions for filenames.
850
851 FileInfo provides easy access to the components of a file's path
852 relative to the project root.
853 """
854
855 def __init__(self, filename):
856 self._filename = filename
857
858 def FullName(self):
859 """Make Windows paths like Unix."""
860 return os.path.abspath(self._filename).replace('\\', '/')
861
862 def RepositoryName(self):
863 """FullName after removing the local path to the repository.
864
865 If we have a real absolute path name here we can try to do something smart:
866 detecting the root of the checkout and truncating /path/to/checkout from
867 the name so that we get header guards that don't include things like
868 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
869 people on different computers who have checked the source out to different
870 locations won't see bogus errors.
871 """
872 fullname = self.FullName()
873
874 if os.path.exists(fullname):
875 project_dir = os.path.dirname(fullname)
876
877 if os.path.exists(os.path.join(project_dir, ".svn")):
878 # If there's a .svn file in the current directory, we recursively look
879 # up the directory tree for the top of the SVN checkout
880 root_dir = project_dir
881 one_up_dir = os.path.dirname(root_dir)
882 while os.path.exists(os.path.join(one_up_dir, ".svn")):
883 root_dir = os.path.dirname(root_dir)
884 one_up_dir = os.path.dirname(one_up_dir)
885
886 prefix = os.path.commonprefix([root_dir, project_dir])
887 return fullname[len(prefix) + 1:]
888
erg@google.com3dc74262011-11-30 01:12:00 +0000889 # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
890 # searching up from the current path.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000891 root_dir = os.path.dirname(fullname)
892 while (root_dir != os.path.dirname(root_dir) and
erg@google.com5e169692010-01-28 20:17:01 +0000893 not os.path.exists(os.path.join(root_dir, ".git")) and
erg@google.com3dc74262011-11-30 01:12:00 +0000894 not os.path.exists(os.path.join(root_dir, ".hg")) and
895 not os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000896 root_dir = os.path.dirname(root_dir)
erg@google.com42e59b02010-10-04 22:18:07 +0000897
898 if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@google.com3dc74262011-11-30 01:12:00 +0000899 os.path.exists(os.path.join(root_dir, ".hg")) or
900 os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com42e59b02010-10-04 22:18:07 +0000901 prefix = os.path.commonprefix([root_dir, project_dir])
902 return fullname[len(prefix) + 1:]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000903
904 # Don't know what to do; header guard warnings may be wrong...
905 return fullname
906
907 def Split(self):
908 """Splits the file into the directory, basename, and extension.
909
910 For 'chrome/browser/browser.cc', Split() would
911 return ('chrome/browser', 'browser', '.cc')
912
913 Returns:
914 A tuple of (directory, basename, extension).
915 """
916
917 googlename = self.RepositoryName()
918 project, rest = os.path.split(googlename)
919 return (project,) + os.path.splitext(rest)
920
921 def BaseName(self):
922 """File base name - text after the final slash, before the final period."""
923 return self.Split()[1]
924
925 def Extension(self):
926 """File extension - text following the final period."""
927 return self.Split()[2]
928
929 def NoExtension(self):
930 """File has no source file extension."""
931 return '/'.join(self.Split()[0:2])
932
933 def IsSource(self):
934 """File has a source file extension."""
935 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
936
937
erg+personal@google.com05189642010-04-30 20:43:03 +0000938def _ShouldPrintError(category, confidence, linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000939 """If confidence >= verbose, category passes filter and is not suppressed."""
erg+personal@google.com05189642010-04-30 20:43:03 +0000940
941 # There are three ways we might decide not to print an error message:
942 # a "NOLINT(category)" comment appears in the source,
erg@google.com4e00b9a2009-01-12 23:05:11 +0000943 # the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com05189642010-04-30 20:43:03 +0000944 if IsErrorSuppressedByNolint(category, linenum):
945 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +0000946 if confidence < _cpplint_state.verbose_level:
947 return False
948
949 is_filtered = False
950 for one_filter in _Filters():
951 if one_filter.startswith('-'):
952 if category.startswith(one_filter[1:]):
953 is_filtered = True
954 elif one_filter.startswith('+'):
955 if category.startswith(one_filter[1:]):
956 is_filtered = False
957 else:
958 assert False # should have been checked for in SetFilter.
959 if is_filtered:
960 return False
961
962 return True
963
964
965def Error(filename, linenum, category, confidence, message):
966 """Logs the fact we've found a lint error.
967
968 We log where the error was found, and also our confidence in the error,
969 that is, how certain we are this is a legitimate style regression, and
970 not a misidentification or a use that's sometimes justified.
971
erg+personal@google.com05189642010-04-30 20:43:03 +0000972 False positives can be suppressed by the use of
973 "cpplint(category)" comments on the offending line. These are
974 parsed into _error_suppressions.
975
erg@google.com4e00b9a2009-01-12 23:05:11 +0000976 Args:
977 filename: The name of the file containing the error.
978 linenum: The number of the line containing the error.
979 category: A string used to describe the "category" this bug
980 falls under: "whitespace", say, or "runtime". Categories
981 may have a hierarchy separated by slashes: "whitespace/indent".
982 confidence: A number from 1-5 representing a confidence score for
983 the error, with 5 meaning that we are certain of the problem,
984 and 1 meaning that it could be a legitimate construct.
985 message: The error message.
986 """
erg+personal@google.com05189642010-04-30 20:43:03 +0000987 if _ShouldPrintError(category, confidence, linenum):
erg@google.coma868d2d2009-10-09 21:18:45 +0000988 _cpplint_state.IncrementErrorCount(category)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000989 if _cpplint_state.output_format == 'vs7':
990 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
991 filename, linenum, message, category, confidence))
erg@google.com02c27fd2013-05-28 21:34:34 +0000992 elif _cpplint_state.output_format == 'eclipse':
993 sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
994 filename, linenum, message, category, confidence))
erg@google.com4e00b9a2009-01-12 23:05:11 +0000995 else:
996 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
997 filename, linenum, message, category, confidence))
998
999
erg@google.com2aa59982013-10-28 19:09:25 +00001000# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001001_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1002 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1003# Matches strings. Escape codes should already be removed by ESCAPES.
1004_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
1005# Matches characters. Escape codes should already be removed by ESCAPES.
1006_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
1007# Matches multi-line C++ comments.
1008# This RE is a little bit more complicated than one might expect, because we
1009# have to take care of space removals tools so we can handle comments inside
1010# statements better.
1011# The current rule is: We only clear spaces from both sides when we're at the
1012# end of the line. Otherwise, we try to remove spaces from the right side,
1013# if this doesn't work we try on left side but only if there's a non-character
1014# on the right.
1015_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1016 r"""(\s*/\*.*\*/\s*$|
1017 /\*.*\*/\s+|
1018 \s+/\*.*\*/(?=\W)|
1019 /\*.*\*/)""", re.VERBOSE)
1020
1021
1022def IsCppString(line):
1023 """Does line terminate so, that the next symbol is in string constant.
1024
1025 This function does not consider single-line nor multi-line comments.
1026
1027 Args:
1028 line: is a partial line of code starting from the 0..n.
1029
1030 Returns:
1031 True, if next character appended to 'line' is inside a
1032 string constant.
1033 """
1034
1035 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
1036 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1037
1038
erg@google.com2aa59982013-10-28 19:09:25 +00001039def CleanseRawStrings(raw_lines):
1040 """Removes C++11 raw strings from lines.
1041
1042 Before:
1043 static const char kData[] = R"(
1044 multi-line string
1045 )";
1046
1047 After:
1048 static const char kData[] = ""
1049 (replaced by blank line)
1050 "";
1051
1052 Args:
1053 raw_lines: list of raw lines.
1054
1055 Returns:
1056 list of lines with C++11 raw strings replaced by empty strings.
1057 """
1058
1059 delimiter = None
1060 lines_without_raw_strings = []
1061 for line in raw_lines:
1062 if delimiter:
1063 # Inside a raw string, look for the end
1064 end = line.find(delimiter)
1065 if end >= 0:
1066 # Found the end of the string, match leading space for this
1067 # line and resume copying the original lines, and also insert
1068 # a "" on the last line.
1069 leading_space = Match(r'^(\s*)\S', line)
1070 line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1071 delimiter = None
1072 else:
1073 # Haven't found the end yet, append a blank line.
1074 line = ''
1075
1076 else:
1077 # Look for beginning of a raw string.
1078 # See 2.14.15 [lex.string] for syntax.
1079 matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1080 if matched:
1081 delimiter = ')' + matched.group(2) + '"'
1082
1083 end = matched.group(3).find(delimiter)
1084 if end >= 0:
1085 # Raw string ended on same line
1086 line = (matched.group(1) + '""' +
1087 matched.group(3)[end + len(delimiter):])
1088 delimiter = None
1089 else:
1090 # Start of a multi-line raw string
1091 line = matched.group(1) + '""'
1092
1093 lines_without_raw_strings.append(line)
1094
1095 # TODO(unknown): if delimiter is not None here, we might want to
1096 # emit a warning for unterminated string.
1097 return lines_without_raw_strings
1098
1099
erg@google.com4e00b9a2009-01-12 23:05:11 +00001100def FindNextMultiLineCommentStart(lines, lineix):
1101 """Find the beginning marker for a multiline comment."""
1102 while lineix < len(lines):
1103 if lines[lineix].strip().startswith('/*'):
1104 # Only return this marker if the comment goes beyond this line
1105 if lines[lineix].strip().find('*/', 2) < 0:
1106 return lineix
1107 lineix += 1
1108 return len(lines)
1109
1110
1111def FindNextMultiLineCommentEnd(lines, lineix):
1112 """We are inside a comment, find the end marker."""
1113 while lineix < len(lines):
1114 if lines[lineix].strip().endswith('*/'):
1115 return lineix
1116 lineix += 1
1117 return len(lines)
1118
1119
1120def RemoveMultiLineCommentsFromRange(lines, begin, end):
1121 """Clears a range of lines for multi-line comments."""
1122 # Having // dummy comments makes the lines non-empty, so we will not get
1123 # unnecessary blank line warnings later in the code.
1124 for i in range(begin, end):
1125 lines[i] = '// dummy'
1126
1127
1128def RemoveMultiLineComments(filename, lines, error):
1129 """Removes multiline (c-style) comments from lines."""
1130 lineix = 0
1131 while lineix < len(lines):
1132 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1133 if lineix_begin >= len(lines):
1134 return
1135 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1136 if lineix_end >= len(lines):
1137 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1138 'Could not find end of multi-line comment')
1139 return
1140 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1141 lineix = lineix_end + 1
1142
1143
1144def CleanseComments(line):
1145 """Removes //-comments and single-line C-style /* */ comments.
1146
1147 Args:
1148 line: A line of C++ source.
1149
1150 Returns:
1151 The line with single-line comments removed.
1152 """
1153 commentpos = line.find('//')
1154 if commentpos != -1 and not IsCppString(line[:commentpos]):
erg@google.comd7d27472011-09-07 17:36:35 +00001155 line = line[:commentpos].rstrip()
erg@google.com4e00b9a2009-01-12 23:05:11 +00001156 # get rid of /* ... */
1157 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1158
1159
erg@google.coma87abb82009-02-24 01:41:01 +00001160class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001161 """Holds 3 copies of all lines with different preprocessing applied to them.
1162
1163 1) elided member contains lines without strings and comments,
1164 2) lines member contains lines without comments, and
erg@google.comd350fe52013-01-14 17:51:48 +00001165 3) raw_lines member contains all the lines without processing.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001166 All these three members are of <type 'list'>, and of the same length.
1167 """
1168
1169 def __init__(self, lines):
1170 self.elided = []
1171 self.lines = []
1172 self.raw_lines = lines
1173 self.num_lines = len(lines)
erg@google.com2aa59982013-10-28 19:09:25 +00001174 self.lines_without_raw_strings = CleanseRawStrings(lines)
1175 for linenum in range(len(self.lines_without_raw_strings)):
1176 self.lines.append(CleanseComments(
1177 self.lines_without_raw_strings[linenum]))
1178 elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
erg@google.com4e00b9a2009-01-12 23:05:11 +00001179 self.elided.append(CleanseComments(elided))
1180
1181 def NumLines(self):
1182 """Returns the number of lines represented."""
1183 return self.num_lines
1184
1185 @staticmethod
1186 def _CollapseStrings(elided):
1187 """Collapses strings and chars on a line to simple "" or '' blocks.
1188
1189 We nix strings first so we're not fooled by text like '"http://"'
1190
1191 Args:
1192 elided: The line being processed.
1193
1194 Returns:
1195 The line with collapsed strings.
1196 """
1197 if not _RE_PATTERN_INCLUDE.match(elided):
1198 # Remove escaped characters first to make quote/single quote collapsing
1199 # basic. Things that look like escaped characters shouldn't occur
1200 # outside of strings and chars.
1201 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1202 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
1203 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
1204 return elided
1205
1206
erg@google.comd350fe52013-01-14 17:51:48 +00001207def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
1208 """Find the position just after the matching endchar.
1209
1210 Args:
1211 line: a CleansedLines line.
1212 startpos: start searching at this position.
1213 depth: nesting level at startpos.
1214 startchar: expression opening character.
1215 endchar: expression closing character.
1216
1217 Returns:
erg@google.com2aa59982013-10-28 19:09:25 +00001218 On finding matching endchar: (index just after matching endchar, 0)
1219 Otherwise: (-1, new depth at end of this line)
erg@google.comd350fe52013-01-14 17:51:48 +00001220 """
1221 for i in xrange(startpos, len(line)):
1222 if line[i] == startchar:
1223 depth += 1
1224 elif line[i] == endchar:
1225 depth -= 1
1226 if depth == 0:
erg@google.com2aa59982013-10-28 19:09:25 +00001227 return (i + 1, 0)
1228 return (-1, depth)
erg@google.comd350fe52013-01-14 17:51:48 +00001229
1230
erg@google.com4e00b9a2009-01-12 23:05:11 +00001231def CloseExpression(clean_lines, linenum, pos):
erg@google.com2aa59982013-10-28 19:09:25 +00001232 """If input points to ( or { or [ or <, finds the position that closes it.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001233
erg@google.com2aa59982013-10-28 19:09:25 +00001234 If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
erg@google.com4e00b9a2009-01-12 23:05:11 +00001235 linenum/pos that correspond to the closing of the expression.
1236
1237 Args:
1238 clean_lines: A CleansedLines instance containing the file.
1239 linenum: The number of the line to check.
1240 pos: A position on the line.
1241
1242 Returns:
1243 A tuple (line, linenum, pos) pointer *past* the closing brace, or
1244 (line, len(lines), -1) if we never find a close. Note we ignore
1245 strings and comments when matching; and the line we return is the
1246 'cleansed' line at linenum.
1247 """
1248
1249 line = clean_lines.elided[linenum]
1250 startchar = line[pos]
erg@google.com2aa59982013-10-28 19:09:25 +00001251 if startchar not in '({[<':
erg@google.com4e00b9a2009-01-12 23:05:11 +00001252 return (line, clean_lines.NumLines(), -1)
1253 if startchar == '(': endchar = ')'
1254 if startchar == '[': endchar = ']'
1255 if startchar == '{': endchar = '}'
erg@google.com2aa59982013-10-28 19:09:25 +00001256 if startchar == '<': endchar = '>'
erg@google.com4e00b9a2009-01-12 23:05:11 +00001257
erg@google.comd350fe52013-01-14 17:51:48 +00001258 # Check first line
erg@google.com2aa59982013-10-28 19:09:25 +00001259 (end_pos, num_open) = FindEndOfExpressionInLine(
1260 line, pos, 0, startchar, endchar)
erg@google.comd350fe52013-01-14 17:51:48 +00001261 if end_pos > -1:
1262 return (line, linenum, end_pos)
erg@google.com2aa59982013-10-28 19:09:25 +00001263
1264 # Continue scanning forward
erg@google.comd350fe52013-01-14 17:51:48 +00001265 while linenum < clean_lines.NumLines() - 1:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001266 linenum += 1
1267 line = clean_lines.elided[linenum]
erg@google.com2aa59982013-10-28 19:09:25 +00001268 (end_pos, num_open) = FindEndOfExpressionInLine(
1269 line, 0, num_open, startchar, endchar)
1270 if end_pos > -1:
1271 return (line, linenum, end_pos)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001272
erg@google.comd350fe52013-01-14 17:51:48 +00001273 # Did not find endchar before end of file, give up
1274 return (line, clean_lines.NumLines(), -1)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001275
erg@google.com2aa59982013-10-28 19:09:25 +00001276
1277def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
1278 """Find position at the matching startchar.
1279
1280 This is almost the reverse of FindEndOfExpressionInLine, but note
1281 that the input position and returned position differs by 1.
1282
1283 Args:
1284 line: a CleansedLines line.
1285 endpos: start searching at this position.
1286 depth: nesting level at endpos.
1287 startchar: expression opening character.
1288 endchar: expression closing character.
1289
1290 Returns:
1291 On finding matching startchar: (index at matching startchar, 0)
1292 Otherwise: (-1, new depth at beginning of this line)
1293 """
1294 for i in xrange(endpos, -1, -1):
1295 if line[i] == endchar:
1296 depth += 1
1297 elif line[i] == startchar:
1298 depth -= 1
1299 if depth == 0:
1300 return (i, 0)
1301 return (-1, depth)
1302
1303
1304def ReverseCloseExpression(clean_lines, linenum, pos):
1305 """If input points to ) or } or ] or >, finds the position that opens it.
1306
1307 If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1308 linenum/pos that correspond to the opening of the expression.
1309
1310 Args:
1311 clean_lines: A CleansedLines instance containing the file.
1312 linenum: The number of the line to check.
1313 pos: A position on the line.
1314
1315 Returns:
1316 A tuple (line, linenum, pos) pointer *at* the opening brace, or
1317 (line, 0, -1) if we never find the matching opening brace. Note
1318 we ignore strings and comments when matching; and the line we
1319 return is the 'cleansed' line at linenum.
1320 """
1321 line = clean_lines.elided[linenum]
1322 endchar = line[pos]
1323 if endchar not in ')}]>':
1324 return (line, 0, -1)
1325 if endchar == ')': startchar = '('
1326 if endchar == ']': startchar = '['
1327 if endchar == '}': startchar = '{'
1328 if endchar == '>': startchar = '<'
1329
1330 # Check last line
1331 (start_pos, num_open) = FindStartOfExpressionInLine(
1332 line, pos, 0, startchar, endchar)
1333 if start_pos > -1:
1334 return (line, linenum, start_pos)
1335
1336 # Continue scanning backward
1337 while linenum > 0:
1338 linenum -= 1
1339 line = clean_lines.elided[linenum]
1340 (start_pos, num_open) = FindStartOfExpressionInLine(
1341 line, len(line) - 1, num_open, startchar, endchar)
1342 if start_pos > -1:
1343 return (line, linenum, start_pos)
1344
1345 # Did not find startchar before beginning of file, give up
1346 return (line, 0, -1)
1347
1348
erg@google.com4e00b9a2009-01-12 23:05:11 +00001349def CheckForCopyright(filename, lines, error):
1350 """Logs an error if no Copyright message appears at the top of the file."""
1351
1352 # We'll say it should occur by line 10. Don't forget there's a
1353 # dummy line at the front.
1354 for line in xrange(1, min(len(lines), 11)):
1355 if re.search(r'Copyright', lines[line], re.I): break
1356 else: # means no copyright line was found
1357 error(filename, 0, 'legal/copyright', 5,
1358 'No copyright message found. '
1359 'You should have a line: "Copyright [year] <Copyright Owner>"')
1360
1361
1362def GetHeaderGuardCPPVariable(filename):
1363 """Returns the CPP variable that should be used as a header guard.
1364
1365 Args:
1366 filename: The name of a C++ header file.
1367
1368 Returns:
1369 The CPP variable that should be used as a header guard in the
1370 named file.
1371
1372 """
1373
erg+personal@google.com05189642010-04-30 20:43:03 +00001374 # Restores original filename in case that cpplint is invoked from Emacs's
1375 # flymake.
1376 filename = re.sub(r'_flymake\.h$', '.h', filename)
erg@google.comd350fe52013-01-14 17:51:48 +00001377 filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
erg+personal@google.com05189642010-04-30 20:43:03 +00001378
erg@google.com4e00b9a2009-01-12 23:05:11 +00001379 fileinfo = FileInfo(filename)
erg@google.com4d70a882013-04-16 21:06:32 +00001380 file_path_from_root = fileinfo.RepositoryName()
1381 if _root:
1382 file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1383 return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
erg@google.com4e00b9a2009-01-12 23:05:11 +00001384
1385
1386def CheckForHeaderGuard(filename, lines, error):
1387 """Checks that the file contains a header guard.
1388
erg@google.coma87abb82009-02-24 01:41:01 +00001389 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +00001390 headers, checks that the full pathname is used.
1391
1392 Args:
1393 filename: The name of the C++ header file.
1394 lines: An array of strings, each representing a line of the file.
1395 error: The function to call with any errors found.
1396 """
1397
1398 cppvar = GetHeaderGuardCPPVariable(filename)
1399
1400 ifndef = None
1401 ifndef_linenum = 0
1402 define = None
1403 endif = None
1404 endif_linenum = 0
1405 for linenum, line in enumerate(lines):
1406 linesplit = line.split()
1407 if len(linesplit) >= 2:
1408 # find the first occurrence of #ifndef and #define, save arg
1409 if not ifndef and linesplit[0] == '#ifndef':
1410 # set ifndef to the header guard presented on the #ifndef line.
1411 ifndef = linesplit[1]
1412 ifndef_linenum = linenum
1413 if not define and linesplit[0] == '#define':
1414 define = linesplit[1]
1415 # find the last occurrence of #endif, save entire line
1416 if line.startswith('#endif'):
1417 endif = line
1418 endif_linenum = linenum
1419
erg@google.comdc289702012-01-26 20:30:03 +00001420 if not ifndef:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001421 error(filename, 0, 'build/header_guard', 5,
1422 'No #ifndef header guard found, suggested CPP variable is: %s' %
1423 cppvar)
1424 return
1425
erg@google.comdc289702012-01-26 20:30:03 +00001426 if not define:
1427 error(filename, 0, 'build/header_guard', 5,
1428 'No #define header guard found, suggested CPP variable is: %s' %
1429 cppvar)
1430 return
1431
erg@google.com4e00b9a2009-01-12 23:05:11 +00001432 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1433 # for backward compatibility.
erg+personal@google.com05189642010-04-30 20:43:03 +00001434 if ifndef != cppvar:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001435 error_level = 0
1436 if ifndef != cppvar + '_':
1437 error_level = 5
1438
erg+personal@google.com05189642010-04-30 20:43:03 +00001439 ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1440 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001441 error(filename, ifndef_linenum, 'build/header_guard', error_level,
1442 '#ifndef header guard has wrong style, please use: %s' % cppvar)
1443
erg@google.comdc289702012-01-26 20:30:03 +00001444 if define != ifndef:
1445 error(filename, 0, 'build/header_guard', 5,
1446 '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1447 cppvar)
1448 return
1449
erg+personal@google.com05189642010-04-30 20:43:03 +00001450 if endif != ('#endif // %s' % cppvar):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001451 error_level = 0
1452 if endif != ('#endif // %s' % (cppvar + '_')):
1453 error_level = 5
1454
erg+personal@google.com05189642010-04-30 20:43:03 +00001455 ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1456 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001457 error(filename, endif_linenum, 'build/header_guard', error_level,
1458 '#endif line should be "#endif // %s"' % cppvar)
1459
1460
erg@google.com2aa59982013-10-28 19:09:25 +00001461def CheckForBadCharacters(filename, lines, error):
1462 """Logs an error for each line containing bad characters.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001463
erg@google.com2aa59982013-10-28 19:09:25 +00001464 Two kinds of bad characters:
1465
1466 1. Unicode replacement characters: These indicate that either the file
1467 contained invalid UTF-8 (likely) or Unicode replacement characters (which
1468 it shouldn't). Note that it's possible for this to throw off line
1469 numbering if the invalid UTF-8 occurred adjacent to a newline.
1470
1471 2. NUL bytes. These are problematic for some tools.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001472
1473 Args:
1474 filename: The name of the current file.
1475 lines: An array of strings, each representing a line of the file.
1476 error: The function to call with any errors found.
1477 """
1478 for linenum, line in enumerate(lines):
1479 if u'\ufffd' in line:
1480 error(filename, linenum, 'readability/utf8', 5,
1481 'Line contains invalid UTF-8 (or Unicode replacement character).')
erg@google.com2aa59982013-10-28 19:09:25 +00001482 if '\0' in line:
1483 error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00001484
1485
1486def CheckForNewlineAtEOF(filename, lines, error):
1487 """Logs an error if there is no newline char at the end of the file.
1488
1489 Args:
1490 filename: The name of the current file.
1491 lines: An array of strings, each representing a line of the file.
1492 error: The function to call with any errors found.
1493 """
1494
1495 # The array lines() was created by adding two newlines to the
1496 # original file (go figure), then splitting on \n.
1497 # To verify that the file ends in \n, we just have to make sure the
1498 # last-but-two element of lines() exists and is empty.
1499 if len(lines) < 3 or lines[-2]:
1500 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1501 'Could not find a newline character at the end of the file.')
1502
1503
1504def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1505 """Logs an error if we see /* ... */ or "..." that extend past one line.
1506
1507 /* ... */ comments are legit inside macros, for one line.
1508 Otherwise, we prefer // comments, so it's ok to warn about the
1509 other. Likewise, it's ok for strings to extend across multiple
1510 lines, as long as a line continuation character (backslash)
1511 terminates each line. Although not currently prohibited by the C++
1512 style guide, it's ugly and unnecessary. We don't do well with either
1513 in this lint program, so we warn about both.
1514
1515 Args:
1516 filename: The name of the current file.
1517 clean_lines: A CleansedLines instance containing the file.
1518 linenum: The number of the line to check.
1519 error: The function to call with any errors found.
1520 """
1521 line = clean_lines.elided[linenum]
1522
1523 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1524 # second (escaped) slash may trigger later \" detection erroneously.
1525 line = line.replace('\\\\', '')
1526
1527 if line.count('/*') > line.count('*/'):
1528 error(filename, linenum, 'readability/multiline_comment', 5,
1529 'Complex multi-line /*...*/-style comment found. '
1530 'Lint may give bogus warnings. '
1531 'Consider replacing these with //-style comments, '
1532 'with #if 0...#endif, '
1533 'or with more clearly structured multi-line comments.')
1534
1535 if (line.count('"') - line.count('\\"')) % 2:
1536 error(filename, linenum, 'readability/multiline_string', 5,
1537 'Multi-line string ("...") found. This lint script doesn\'t '
erg@google.com2aa59982013-10-28 19:09:25 +00001538 'do well with such strings, and may give bogus warnings. '
1539 'Use C++11 raw strings or concatenation instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00001540
1541
1542threading_list = (
1543 ('asctime(', 'asctime_r('),
1544 ('ctime(', 'ctime_r('),
1545 ('getgrgid(', 'getgrgid_r('),
1546 ('getgrnam(', 'getgrnam_r('),
1547 ('getlogin(', 'getlogin_r('),
1548 ('getpwnam(', 'getpwnam_r('),
1549 ('getpwuid(', 'getpwuid_r('),
1550 ('gmtime(', 'gmtime_r('),
1551 ('localtime(', 'localtime_r('),
1552 ('rand(', 'rand_r('),
erg@google.com4e00b9a2009-01-12 23:05:11 +00001553 ('strtok(', 'strtok_r('),
1554 ('ttyname(', 'ttyname_r('),
1555 )
1556
1557
1558def CheckPosixThreading(filename, clean_lines, linenum, error):
1559 """Checks for calls to thread-unsafe functions.
1560
1561 Much code has been originally written without consideration of
1562 multi-threading. Also, engineers are relying on their old experience;
1563 they have learned posix before threading extensions were added. These
1564 tests guide the engineers to use thread-safe functions (when using
1565 posix directly).
1566
1567 Args:
1568 filename: The name of the current file.
1569 clean_lines: A CleansedLines instance containing the file.
1570 linenum: The number of the line to check.
1571 error: The function to call with any errors found.
1572 """
1573 line = clean_lines.elided[linenum]
1574 for single_thread_function, multithread_safe_function in threading_list:
1575 ix = line.find(single_thread_function)
erg@google.com2aa59982013-10-28 19:09:25 +00001576 # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com4e00b9a2009-01-12 23:05:11 +00001577 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1578 line[ix - 1] not in ('_', '.', '>'))):
1579 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1580 'Consider using ' + multithread_safe_function +
1581 '...) instead of ' + single_thread_function +
1582 '...) for improved thread safety.')
1583
1584
erg@google.com2aa59982013-10-28 19:09:25 +00001585def CheckVlogArguments(filename, clean_lines, linenum, error):
1586 """Checks that VLOG() is only used for defining a logging level.
1587
1588 For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
1589 VLOG(FATAL) are not.
1590
1591 Args:
1592 filename: The name of the current file.
1593 clean_lines: A CleansedLines instance containing the file.
1594 linenum: The number of the line to check.
1595 error: The function to call with any errors found.
1596 """
1597 line = clean_lines.elided[linenum]
1598 if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
1599 error(filename, linenum, 'runtime/vlog', 5,
1600 'VLOG() should be used with numeric verbosity level. '
1601 'Use LOG() if you want symbolic severity levels.')
1602
1603
erg@google.coma868d2d2009-10-09 21:18:45 +00001604# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com36649102009-03-25 21:18:36 +00001605# incrementing a value.
erg@google.coma868d2d2009-10-09 21:18:45 +00001606_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com36649102009-03-25 21:18:36 +00001607 r'^\s*\*\w+(\+\+|--);')
1608
1609
1610def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.coma868d2d2009-10-09 21:18:45 +00001611 """Checks for invalid increment *count++.
erg@google.com36649102009-03-25 21:18:36 +00001612
1613 For example following function:
1614 void increment_counter(int* count) {
1615 *count++;
1616 }
1617 is invalid, because it effectively does count++, moving pointer, and should
1618 be replaced with ++*count, (*count)++ or *count += 1.
1619
1620 Args:
1621 filename: The name of the current file.
1622 clean_lines: A CleansedLines instance containing the file.
1623 linenum: The number of the line to check.
1624 error: The function to call with any errors found.
1625 """
1626 line = clean_lines.elided[linenum]
erg@google.coma868d2d2009-10-09 21:18:45 +00001627 if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com36649102009-03-25 21:18:36 +00001628 error(filename, linenum, 'runtime/invalid_increment', 5,
1629 'Changing pointer instead of value (or unused value of operator*).')
1630
1631
erg@google.comd350fe52013-01-14 17:51:48 +00001632class _BlockInfo(object):
1633 """Stores information about a generic block of code."""
1634
1635 def __init__(self, seen_open_brace):
1636 self.seen_open_brace = seen_open_brace
1637 self.open_parentheses = 0
1638 self.inline_asm = _NO_ASM
1639
1640 def CheckBegin(self, filename, clean_lines, linenum, error):
1641 """Run checks that applies to text up to the opening brace.
1642
1643 This is mostly for checking the text after the class identifier
1644 and the "{", usually where the base class is specified. For other
1645 blocks, there isn't much to check, so we always pass.
1646
1647 Args:
1648 filename: The name of the current file.
1649 clean_lines: A CleansedLines instance containing the file.
1650 linenum: The number of the line to check.
1651 error: The function to call with any errors found.
1652 """
1653 pass
1654
1655 def CheckEnd(self, filename, clean_lines, linenum, error):
1656 """Run checks that applies to text after the closing brace.
1657
1658 This is mostly used for checking end of namespace comments.
1659
1660 Args:
1661 filename: The name of the current file.
1662 clean_lines: A CleansedLines instance containing the file.
1663 linenum: The number of the line to check.
1664 error: The function to call with any errors found.
1665 """
1666 pass
1667
1668
1669class _ClassInfo(_BlockInfo):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001670 """Stores information about a class."""
1671
erg@google.comd350fe52013-01-14 17:51:48 +00001672 def __init__(self, name, class_or_struct, clean_lines, linenum):
1673 _BlockInfo.__init__(self, False)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001674 self.name = name
erg@google.comd350fe52013-01-14 17:51:48 +00001675 self.starting_linenum = linenum
erg@google.com4e00b9a2009-01-12 23:05:11 +00001676 self.is_derived = False
erg@google.comd350fe52013-01-14 17:51:48 +00001677 if class_or_struct == 'struct':
1678 self.access = 'public'
erg@google.comfd5da632013-10-25 17:39:45 +00001679 self.is_struct = True
erg@google.comd350fe52013-01-14 17:51:48 +00001680 else:
1681 self.access = 'private'
erg@google.comfd5da632013-10-25 17:39:45 +00001682 self.is_struct = False
1683
1684 # Remember initial indentation level for this class. Using raw_lines here
erg@google.comc6671232013-10-25 21:44:03 +00001685 # instead of elided to account for leading comments.
erg@google.comfd5da632013-10-25 17:39:45 +00001686 initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
1687 if initial_indent:
1688 self.class_indent = len(initial_indent.group(1))
1689 else:
1690 self.class_indent = 0
erg@google.com4e00b9a2009-01-12 23:05:11 +00001691
erg@google.com8a95ecc2011-09-08 00:45:54 +00001692 # Try to find the end of the class. This will be confused by things like:
1693 # class A {
1694 # } *x = { ...
1695 #
1696 # But it's still good enough for CheckSectionSpacing.
1697 self.last_line = 0
1698 depth = 0
1699 for i in range(linenum, clean_lines.NumLines()):
erg@google.comd350fe52013-01-14 17:51:48 +00001700 line = clean_lines.elided[i]
erg@google.com8a95ecc2011-09-08 00:45:54 +00001701 depth += line.count('{') - line.count('}')
1702 if not depth:
1703 self.last_line = i
1704 break
1705
erg@google.comd350fe52013-01-14 17:51:48 +00001706 def CheckBegin(self, filename, clean_lines, linenum, error):
1707 # Look for a bare ':'
1708 if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1709 self.is_derived = True
erg@google.com4e00b9a2009-01-12 23:05:11 +00001710
erg@google.comfd5da632013-10-25 17:39:45 +00001711 def CheckEnd(self, filename, clean_lines, linenum, error):
1712 # Check that closing brace is aligned with beginning of the class.
1713 # Only do this if the closing brace is indented by only whitespaces.
1714 # This means we will not check single-line class definitions.
1715 indent = Match(r'^( *)\}', clean_lines.elided[linenum])
1716 if indent and len(indent.group(1)) != self.class_indent:
1717 if self.is_struct:
1718 parent = 'struct ' + self.name
1719 else:
1720 parent = 'class ' + self.name
1721 error(filename, linenum, 'whitespace/indent', 3,
1722 'Closing brace should be aligned with beginning of %s' % parent)
1723
erg@google.com4e00b9a2009-01-12 23:05:11 +00001724
erg@google.comd350fe52013-01-14 17:51:48 +00001725class _NamespaceInfo(_BlockInfo):
1726 """Stores information about a namespace."""
1727
1728 def __init__(self, name, linenum):
1729 _BlockInfo.__init__(self, False)
1730 self.name = name or ''
1731 self.starting_linenum = linenum
1732
1733 def CheckEnd(self, filename, clean_lines, linenum, error):
1734 """Check end of namespace comments."""
1735 line = clean_lines.raw_lines[linenum]
1736
1737 # Check how many lines is enclosed in this namespace. Don't issue
1738 # warning for missing namespace comments if there aren't enough
1739 # lines. However, do apply checks if there is already an end of
1740 # namespace comment and it's incorrect.
1741 #
1742 # TODO(unknown): We always want to check end of namespace comments
1743 # if a namespace is large, but sometimes we also want to apply the
1744 # check if a short namespace contained nontrivial things (something
1745 # other than forward declarations). There is currently no logic on
1746 # deciding what these nontrivial things are, so this check is
1747 # triggered by namespace size only, which works most of the time.
1748 if (linenum - self.starting_linenum < 10
1749 and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
1750 return
1751
1752 # Look for matching comment at end of namespace.
1753 #
1754 # Note that we accept C style "/* */" comments for terminating
1755 # namespaces, so that code that terminate namespaces inside
erg@google.comc6671232013-10-25 21:44:03 +00001756 # preprocessor macros can be cpplint clean.
erg@google.comd350fe52013-01-14 17:51:48 +00001757 #
1758 # We also accept stuff like "// end of namespace <name>." with the
1759 # period at the end.
1760 #
1761 # Besides these, we don't accept anything else, otherwise we might
1762 # get false negatives when existing comment is a substring of the
erg@google.comc6671232013-10-25 21:44:03 +00001763 # expected namespace.
erg@google.comd350fe52013-01-14 17:51:48 +00001764 if self.name:
1765 # Named namespace
1766 if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
1767 r'[\*/\.\\\s]*$'),
1768 line):
1769 error(filename, linenum, 'readability/namespace', 5,
1770 'Namespace should be terminated with "// namespace %s"' %
1771 self.name)
1772 else:
1773 # Anonymous namespace
1774 if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
1775 error(filename, linenum, 'readability/namespace', 5,
1776 'Namespace should be terminated with "// namespace"')
1777
1778
1779class _PreprocessorInfo(object):
1780 """Stores checkpoints of nesting stacks when #if/#else is seen."""
1781
1782 def __init__(self, stack_before_if):
1783 # The entire nesting stack before #if
1784 self.stack_before_if = stack_before_if
1785
1786 # The entire nesting stack up to #else
1787 self.stack_before_else = []
1788
1789 # Whether we have already seen #else or #elif
1790 self.seen_else = False
1791
1792
1793class _NestingState(object):
1794 """Holds states related to parsing braces."""
erg@google.com4e00b9a2009-01-12 23:05:11 +00001795
1796 def __init__(self):
erg@google.comd350fe52013-01-14 17:51:48 +00001797 # Stack for tracking all braces. An object is pushed whenever we
1798 # see a "{", and popped when we see a "}". Only 3 types of
1799 # objects are possible:
1800 # - _ClassInfo: a class or struct.
1801 # - _NamespaceInfo: a namespace.
1802 # - _BlockInfo: some other type of block.
1803 self.stack = []
erg@google.com4e00b9a2009-01-12 23:05:11 +00001804
erg@google.comd350fe52013-01-14 17:51:48 +00001805 # Stack of _PreprocessorInfo objects.
1806 self.pp_stack = []
1807
1808 def SeenOpenBrace(self):
1809 """Check if we have seen the opening brace for the innermost block.
1810
1811 Returns:
1812 True if we have seen the opening brace, False if the innermost
1813 block is still expecting an opening brace.
1814 """
1815 return (not self.stack) or self.stack[-1].seen_open_brace
1816
1817 def InNamespaceBody(self):
1818 """Check if we are currently one level inside a namespace body.
1819
1820 Returns:
1821 True if top of the stack is a namespace block, False otherwise.
1822 """
1823 return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
1824
1825 def UpdatePreprocessor(self, line):
1826 """Update preprocessor stack.
1827
1828 We need to handle preprocessors due to classes like this:
1829 #ifdef SWIG
1830 struct ResultDetailsPageElementExtensionPoint {
1831 #else
1832 struct ResultDetailsPageElementExtensionPoint : public Extension {
1833 #endif
erg@google.comd350fe52013-01-14 17:51:48 +00001834
1835 We make the following assumptions (good enough for most files):
1836 - Preprocessor condition evaluates to true from #if up to first
1837 #else/#elif/#endif.
1838
1839 - Preprocessor condition evaluates to false from #else/#elif up
1840 to #endif. We still perform lint checks on these lines, but
1841 these do not affect nesting stack.
1842
1843 Args:
1844 line: current line to check.
1845 """
1846 if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
1847 # Beginning of #if block, save the nesting stack here. The saved
1848 # stack will allow us to restore the parsing state in the #else case.
1849 self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
1850 elif Match(r'^\s*#\s*(else|elif)\b', line):
1851 # Beginning of #else block
1852 if self.pp_stack:
1853 if not self.pp_stack[-1].seen_else:
1854 # This is the first #else or #elif block. Remember the
1855 # whole nesting stack up to this point. This is what we
1856 # keep after the #endif.
1857 self.pp_stack[-1].seen_else = True
1858 self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
1859
1860 # Restore the stack to how it was before the #if
1861 self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
1862 else:
1863 # TODO(unknown): unexpected #else, issue warning?
1864 pass
1865 elif Match(r'^\s*#\s*endif\b', line):
1866 # End of #if or #else blocks.
1867 if self.pp_stack:
1868 # If we saw an #else, we will need to restore the nesting
1869 # stack to its former state before the #else, otherwise we
1870 # will just continue from where we left off.
1871 if self.pp_stack[-1].seen_else:
1872 # Here we can just use a shallow copy since we are the last
1873 # reference to it.
1874 self.stack = self.pp_stack[-1].stack_before_else
1875 # Drop the corresponding #if
1876 self.pp_stack.pop()
1877 else:
1878 # TODO(unknown): unexpected #endif, issue warning?
1879 pass
1880
1881 def Update(self, filename, clean_lines, linenum, error):
1882 """Update nesting state with current line.
1883
1884 Args:
1885 filename: The name of the current file.
1886 clean_lines: A CleansedLines instance containing the file.
1887 linenum: The number of the line to check.
1888 error: The function to call with any errors found.
1889 """
1890 line = clean_lines.elided[linenum]
1891
1892 # Update pp_stack first
1893 self.UpdatePreprocessor(line)
1894
1895 # Count parentheses. This is to avoid adding struct arguments to
1896 # the nesting stack.
1897 if self.stack:
1898 inner_block = self.stack[-1]
1899 depth_change = line.count('(') - line.count(')')
1900 inner_block.open_parentheses += depth_change
1901
1902 # Also check if we are starting or ending an inline assembly block.
1903 if inner_block.inline_asm in (_NO_ASM, _END_ASM):
1904 if (depth_change != 0 and
1905 inner_block.open_parentheses == 1 and
1906 _MATCH_ASM.match(line)):
1907 # Enter assembly block
1908 inner_block.inline_asm = _INSIDE_ASM
1909 else:
1910 # Not entering assembly block. If previous line was _END_ASM,
1911 # we will now shift to _NO_ASM state.
1912 inner_block.inline_asm = _NO_ASM
1913 elif (inner_block.inline_asm == _INSIDE_ASM and
1914 inner_block.open_parentheses == 0):
1915 # Exit assembly block
1916 inner_block.inline_asm = _END_ASM
1917
1918 # Consume namespace declaration at the beginning of the line. Do
1919 # this in a loop so that we catch same line declarations like this:
1920 # namespace proto2 { namespace bridge { class MessageSet; } }
1921 while True:
1922 # Match start of namespace. The "\b\s*" below catches namespace
1923 # declarations even if it weren't followed by a whitespace, this
1924 # is so that we don't confuse our namespace checker. The
1925 # missing spaces will be flagged by CheckSpacing.
1926 namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
1927 if not namespace_decl_match:
1928 break
1929
1930 new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
1931 self.stack.append(new_namespace)
1932
1933 line = namespace_decl_match.group(2)
1934 if line.find('{') != -1:
1935 new_namespace.seen_open_brace = True
1936 line = line[line.find('{') + 1:]
1937
1938 # Look for a class declaration in whatever is left of the line
1939 # after parsing namespaces. The regexp accounts for decorated classes
1940 # such as in:
1941 # class LOCKABLE API Object {
1942 # };
1943 #
1944 # Templates with class arguments may confuse the parser, for example:
1945 # template <class T
1946 # class Comparator = less<T>,
1947 # class Vector = vector<T> >
1948 # class HeapQueue {
1949 #
1950 # Because this parser has no nesting state about templates, by the
1951 # time it saw "class Comparator", it may think that it's a new class.
1952 # Nested templates have a similar problem:
1953 # template <
1954 # typename ExportedType,
1955 # typename TupleType,
1956 # template <typename, typename> class ImplTemplate>
1957 #
1958 # To avoid these cases, we ignore classes that are followed by '=' or '>'
1959 class_decl_match = Match(
1960 r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
erg@google.comfd5da632013-10-25 17:39:45 +00001961 r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
1962 r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line)
erg@google.comd350fe52013-01-14 17:51:48 +00001963 if (class_decl_match and
1964 (not self.stack or self.stack[-1].open_parentheses == 0)):
1965 self.stack.append(_ClassInfo(
1966 class_decl_match.group(4), class_decl_match.group(2),
1967 clean_lines, linenum))
1968 line = class_decl_match.group(5)
1969
1970 # If we have not yet seen the opening brace for the innermost block,
1971 # run checks here.
1972 if not self.SeenOpenBrace():
1973 self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
1974
1975 # Update access control if we are inside a class/struct
1976 if self.stack and isinstance(self.stack[-1], _ClassInfo):
erg@google.comfd5da632013-10-25 17:39:45 +00001977 classinfo = self.stack[-1]
1978 access_match = Match(
1979 r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
1980 r':(?:[^:]|$)',
1981 line)
erg@google.comd350fe52013-01-14 17:51:48 +00001982 if access_match:
erg@google.comfd5da632013-10-25 17:39:45 +00001983 classinfo.access = access_match.group(2)
1984
1985 # Check that access keywords are indented +1 space. Skip this
erg@google.comc6671232013-10-25 21:44:03 +00001986 # check if the keywords are not preceded by whitespaces.
erg@google.comfd5da632013-10-25 17:39:45 +00001987 indent = access_match.group(1)
1988 if (len(indent) != classinfo.class_indent + 1 and
1989 Match(r'^\s*$', indent)):
1990 if classinfo.is_struct:
1991 parent = 'struct ' + classinfo.name
1992 else:
1993 parent = 'class ' + classinfo.name
1994 slots = ''
1995 if access_match.group(3):
1996 slots = access_match.group(3)
1997 error(filename, linenum, 'whitespace/indent', 3,
1998 '%s%s: should be indented +1 space inside %s' % (
1999 access_match.group(2), slots, parent))
erg@google.comd350fe52013-01-14 17:51:48 +00002000
2001 # Consume braces or semicolons from what's left of the line
2002 while True:
2003 # Match first brace, semicolon, or closed parenthesis.
2004 matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2005 if not matched:
2006 break
2007
2008 token = matched.group(1)
2009 if token == '{':
2010 # If namespace or class hasn't seen a opening brace yet, mark
2011 # namespace/class head as complete. Push a new block onto the
2012 # stack otherwise.
2013 if not self.SeenOpenBrace():
2014 self.stack[-1].seen_open_brace = True
2015 else:
2016 self.stack.append(_BlockInfo(True))
2017 if _MATCH_ASM.match(line):
2018 self.stack[-1].inline_asm = _BLOCK_ASM
2019 elif token == ';' or token == ')':
2020 # If we haven't seen an opening brace yet, but we already saw
2021 # a semicolon, this is probably a forward declaration. Pop
2022 # the stack for these.
2023 #
2024 # Similarly, if we haven't seen an opening brace yet, but we
2025 # already saw a closing parenthesis, then these are probably
2026 # function arguments with extra "class" or "struct" keywords.
2027 # Also pop these stack for these.
2028 if not self.SeenOpenBrace():
2029 self.stack.pop()
2030 else: # token == '}'
2031 # Perform end of block checks and pop the stack.
2032 if self.stack:
2033 self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2034 self.stack.pop()
2035 line = matched.group(2)
2036
2037 def InnermostClass(self):
2038 """Get class info on the top of the stack.
2039
2040 Returns:
2041 A _ClassInfo object if we are inside a class, or None otherwise.
2042 """
2043 for i in range(len(self.stack), 0, -1):
2044 classinfo = self.stack[i - 1]
2045 if isinstance(classinfo, _ClassInfo):
2046 return classinfo
2047 return None
2048
erg@google.com2aa59982013-10-28 19:09:25 +00002049 def CheckCompletedBlocks(self, filename, error):
2050 """Checks that all classes and namespaces have been completely parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002051
2052 Call this when all lines in a file have been processed.
2053 Args:
2054 filename: The name of the current file.
2055 error: The function to call with any errors found.
2056 """
erg@google.comd350fe52013-01-14 17:51:48 +00002057 # Note: This test can result in false positives if #ifdef constructs
2058 # get in the way of brace matching. See the testBuildClass test in
2059 # cpplint_unittest.py for an example of this.
2060 for obj in self.stack:
2061 if isinstance(obj, _ClassInfo):
2062 error(filename, obj.starting_linenum, 'build/class', 5,
2063 'Failed to find complete declaration of class %s' %
2064 obj.name)
erg@google.com2aa59982013-10-28 19:09:25 +00002065 elif isinstance(obj, _NamespaceInfo):
2066 error(filename, obj.starting_linenum, 'build/namespaces', 5,
2067 'Failed to find complete declaration of namespace %s' %
2068 obj.name)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002069
2070
2071def CheckForNonStandardConstructs(filename, clean_lines, linenum,
erg@google.comd350fe52013-01-14 17:51:48 +00002072 nesting_state, error):
erg@google.com2aa59982013-10-28 19:09:25 +00002073 r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002074
2075 Complain about several constructs which gcc-2 accepts, but which are
2076 not standard C++. Warning about these in lint is one way to ease the
2077 transition to new compilers.
2078 - put storage class first (e.g. "static const" instead of "const static").
2079 - "%lld" instead of %qd" in printf-type functions.
2080 - "%1$d" is non-standard in printf-type functions.
2081 - "\%" is an undefined character escape sequence.
2082 - text after #endif is not allowed.
2083 - invalid inner-style forward declaration.
2084 - >? and <? operators, and their >?= and <?= cousins.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002085
erg@google.coma868d2d2009-10-09 21:18:45 +00002086 Additionally, check for constructor/destructor style violations and reference
2087 members, as it is very convenient to do so while checking for
2088 gcc-2 compliance.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002089
2090 Args:
2091 filename: The name of the current file.
2092 clean_lines: A CleansedLines instance containing the file.
2093 linenum: The number of the line to check.
erg@google.comd350fe52013-01-14 17:51:48 +00002094 nesting_state: A _NestingState instance which maintains information about
2095 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002096 error: A callable to which errors are reported, which takes 4 arguments:
2097 filename, line number, error level, and message
2098 """
2099
2100 # Remove comments from the line, but leave in strings for now.
2101 line = clean_lines.lines[linenum]
2102
2103 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
2104 error(filename, linenum, 'runtime/printf_format', 3,
2105 '%q in format strings is deprecated. Use %ll instead.')
2106
2107 if Search(r'printf\s*\(.*".*%\d+\$', line):
2108 error(filename, linenum, 'runtime/printf_format', 2,
2109 '%N$ formats are unconventional. Try rewriting to avoid them.')
2110
2111 # Remove escaped backslashes before looking for undefined escapes.
2112 line = line.replace('\\\\', '')
2113
2114 if Search(r'("|\').*\\(%|\[|\(|{)', line):
2115 error(filename, linenum, 'build/printf_format', 3,
2116 '%, [, (, and { are undefined character escapes. Unescape them.')
2117
2118 # For the rest, work with both comments and strings removed.
2119 line = clean_lines.elided[linenum]
2120
2121 if Search(r'\b(const|volatile|void|char|short|int|long'
2122 r'|float|double|signed|unsigned'
2123 r'|schar|u?int8|u?int16|u?int32|u?int64)'
erg@google.comd350fe52013-01-14 17:51:48 +00002124 r'\s+(register|static|extern|typedef)\b',
erg@google.com4e00b9a2009-01-12 23:05:11 +00002125 line):
2126 error(filename, linenum, 'build/storage_class', 5,
2127 'Storage class (static, extern, typedef, etc) should be first.')
2128
2129 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
2130 error(filename, linenum, 'build/endif_comment', 5,
2131 'Uncommented text after #endif is non-standard. Use a comment.')
2132
2133 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
2134 error(filename, linenum, 'build/forward_decl', 5,
2135 'Inner-style forward declarations are invalid. Remove this line.')
2136
2137 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
2138 line):
2139 error(filename, linenum, 'build/deprecated', 3,
2140 '>? and <? (max and min) operators are non-standard and deprecated.')
2141
erg@google.coma868d2d2009-10-09 21:18:45 +00002142 if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
2143 # TODO(unknown): Could it be expanded safely to arbitrary references,
2144 # without triggering too many false positives? The first
2145 # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2146 # the restriction.
2147 # Here's the original regexp, for the reference:
2148 # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2149 # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2150 error(filename, linenum, 'runtime/member_string_references', 2,
2151 'const string& members are dangerous. It is much better to use '
2152 'alternatives, such as pointers or simple constants.')
2153
erg@google.comd350fe52013-01-14 17:51:48 +00002154 # Everything else in this function operates on class declarations.
2155 # Return early if the top of the nesting stack is not a class, or if
2156 # the class head is not completed yet.
2157 classinfo = nesting_state.InnermostClass()
2158 if not classinfo or not classinfo.seen_open_brace:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002159 return
2160
erg@google.com4e00b9a2009-01-12 23:05:11 +00002161 # The class may have been declared with namespace or classname qualifiers.
2162 # The constructor and destructor will not have those qualifiers.
2163 base_classname = classinfo.name.split('::')[-1]
2164
2165 # Look for single-argument constructors that aren't marked explicit.
2166 # Technically a valid construct, but against style.
erg@google.com8a95ecc2011-09-08 00:45:54 +00002167 args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
erg@google.com4e00b9a2009-01-12 23:05:11 +00002168 % re.escape(base_classname),
2169 line)
2170 if (args and
2171 args.group(1) != 'void' and
erg@google.comfd5da632013-10-25 17:39:45 +00002172 not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
2173 % re.escape(base_classname), args.group(1).strip())):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002174 error(filename, linenum, 'runtime/explicit', 5,
2175 'Single-argument constructors should be marked explicit.')
2176
erg@google.com4e00b9a2009-01-12 23:05:11 +00002177
2178def CheckSpacingForFunctionCall(filename, line, linenum, error):
2179 """Checks for the correctness of various spacing around function calls.
2180
2181 Args:
2182 filename: The name of the current file.
2183 line: The text of the line to check.
2184 linenum: The number of the line to check.
2185 error: The function to call with any errors found.
2186 """
2187
2188 # Since function calls often occur inside if/for/while/switch
2189 # expressions - which have their own, more liberal conventions - we
2190 # first see if we should be looking inside such an expression for a
2191 # function call, to which we can apply more strict standards.
2192 fncall = line # if there's no control flow construct, look at whole line
2193 for pattern in (r'\bif\s*\((.*)\)\s*{',
2194 r'\bfor\s*\((.*)\)\s*{',
2195 r'\bwhile\s*\((.*)\)\s*[{;]',
2196 r'\bswitch\s*\((.*)\)\s*{'):
2197 match = Search(pattern, line)
2198 if match:
2199 fncall = match.group(1) # look inside the parens for function calls
2200 break
2201
2202 # Except in if/for/while/switch, there should never be space
2203 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
2204 # for nested parens ( (a+b) + c ). Likewise, there should never be
2205 # a space before a ( when it's a function argument. I assume it's a
2206 # function argument when the char before the whitespace is legal in
2207 # a function name (alnum + _) and we're not starting a macro. Also ignore
2208 # pointers and references to arrays and functions coz they're too tricky:
2209 # we use a very simple way to recognize these:
2210 # " (something)(maybe-something)" or
2211 # " (something)(maybe-something," or
2212 # " (something)[something]"
2213 # Note that we assume the contents of [] to be short enough that
2214 # they'll never need to wrap.
2215 if ( # Ignore control structures.
erg@google.com2aa59982013-10-28 19:09:25 +00002216 not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
erg@google.comc6671232013-10-25 21:44:03 +00002217 fncall) and
erg@google.com4e00b9a2009-01-12 23:05:11 +00002218 # Ignore pointers/references to functions.
2219 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2220 # Ignore pointers/references to arrays.
2221 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com36649102009-03-25 21:18:36 +00002222 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
erg@google.com4e00b9a2009-01-12 23:05:11 +00002223 error(filename, linenum, 'whitespace/parens', 4,
2224 'Extra space after ( in function call')
erg@google.com36649102009-03-25 21:18:36 +00002225 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002226 error(filename, linenum, 'whitespace/parens', 2,
2227 'Extra space after (')
2228 if (Search(r'\w\s+\(', fncall) and
erg@google.comd350fe52013-01-14 17:51:48 +00002229 not Search(r'#\s*define|typedef', fncall) and
erg@google.com2aa59982013-10-28 19:09:25 +00002230 not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002231 error(filename, linenum, 'whitespace/parens', 4,
2232 'Extra space before ( in function call')
2233 # If the ) is followed only by a newline or a { + newline, assume it's
2234 # part of a control statement (if/while/etc), and don't complain
2235 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002236 # If the closing parenthesis is preceded by only whitespaces,
2237 # try to give a more descriptive error message.
2238 if Search(r'^\s+\)', fncall):
2239 error(filename, linenum, 'whitespace/parens', 2,
2240 'Closing ) should be moved to the previous line')
2241 else:
2242 error(filename, linenum, 'whitespace/parens', 2,
2243 'Extra space before )')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002244
2245
2246def IsBlankLine(line):
2247 """Returns true if the given line is blank.
2248
2249 We consider a line to be blank if the line is empty or consists of
2250 only white spaces.
2251
2252 Args:
2253 line: A line of a string.
2254
2255 Returns:
2256 True, if the given line is blank.
2257 """
2258 return not line or line.isspace()
2259
2260
2261def CheckForFunctionLengths(filename, clean_lines, linenum,
2262 function_state, error):
2263 """Reports for long function bodies.
2264
2265 For an overview why this is done, see:
2266 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2267
2268 Uses a simplistic algorithm assuming other style guidelines
2269 (especially spacing) are followed.
2270 Only checks unindented functions, so class members are unchecked.
2271 Trivial bodies are unchecked, so constructors with huge initializer lists
2272 may be missed.
2273 Blank/comment lines are not counted so as to avoid encouraging the removal
erg@google.com8a95ecc2011-09-08 00:45:54 +00002274 of vertical space and comments just to get through a lint check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002275 NOLINT *on the last line of a function* disables this check.
2276
2277 Args:
2278 filename: The name of the current file.
2279 clean_lines: A CleansedLines instance containing the file.
2280 linenum: The number of the line to check.
2281 function_state: Current function name and lines in body so far.
2282 error: The function to call with any errors found.
2283 """
2284 lines = clean_lines.lines
2285 line = lines[linenum]
2286 raw = clean_lines.raw_lines
2287 raw_line = raw[linenum]
2288 joined_line = ''
2289
2290 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00002291 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00002292 match_result = Match(regexp, line)
2293 if match_result:
2294 # If the name is all caps and underscores, figure it's a macro and
2295 # ignore it, unless it's TEST or TEST_F.
2296 function_name = match_result.group(1).split()[-1]
2297 if function_name == 'TEST' or function_name == 'TEST_F' or (
2298 not Match(r'[A-Z_]+$', function_name)):
2299 starting_func = True
2300
2301 if starting_func:
2302 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00002303 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002304 start_line = lines[start_linenum]
2305 joined_line += ' ' + start_line.lstrip()
2306 if Search(r'(;|})', start_line): # Declarations and trivial functions
2307 body_found = True
2308 break # ... ignore
2309 elif Search(r'{', start_line):
2310 body_found = True
2311 function = Search(r'((\w|:)*)\(', line).group(1)
2312 if Match(r'TEST', function): # Handle TEST... macros
2313 parameter_regexp = Search(r'(\(.*\))', joined_line)
2314 if parameter_regexp: # Ignore bad syntax
2315 function += parameter_regexp.group(1)
2316 else:
2317 function += '()'
2318 function_state.Begin(function)
2319 break
2320 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00002321 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002322 error(filename, linenum, 'readability/fn_size', 5,
2323 'Lint failed to find start of function body.')
2324 elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com05189642010-04-30 20:43:03 +00002325 function_state.Check(error, filename, linenum)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002326 function_state.End()
2327 elif not Match(r'^\s*$', line):
2328 function_state.Count() # Count non-blank/non-comment lines.
2329
2330
2331_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2332
2333
2334def CheckComment(comment, filename, linenum, error):
2335 """Checks for common mistakes in TODO comments.
2336
2337 Args:
2338 comment: The text of the comment from the line in question.
2339 filename: The name of the current file.
2340 linenum: The number of the line to check.
2341 error: The function to call with any errors found.
2342 """
2343 match = _RE_PATTERN_TODO.match(comment)
2344 if match:
2345 # One whitespace is correct; zero whitespace is handled elsewhere.
2346 leading_whitespace = match.group(1)
2347 if len(leading_whitespace) > 1:
2348 error(filename, linenum, 'whitespace/todo', 2,
2349 'Too many spaces before TODO')
2350
2351 username = match.group(2)
2352 if not username:
2353 error(filename, linenum, 'readability/todo', 2,
2354 'Missing username in TODO; it should look like '
2355 '"// TODO(my_username): Stuff."')
2356
2357 middle_whitespace = match.group(3)
erg@google.com2aa59982013-10-28 19:09:25 +00002358 # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
erg@google.com4e00b9a2009-01-12 23:05:11 +00002359 if middle_whitespace != ' ' and middle_whitespace != '':
2360 error(filename, linenum, 'whitespace/todo', 2,
2361 'TODO(my_username) should be followed by a space')
2362
erg@google.comd350fe52013-01-14 17:51:48 +00002363def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2364 """Checks for improper use of DISALLOW* macros.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002365
erg@google.comd350fe52013-01-14 17:51:48 +00002366 Args:
2367 filename: The name of the current file.
2368 clean_lines: A CleansedLines instance containing the file.
2369 linenum: The number of the line to check.
2370 nesting_state: A _NestingState instance which maintains information about
2371 the current stack of nested blocks being parsed.
2372 error: The function to call with any errors found.
2373 """
2374 line = clean_lines.elided[linenum] # get rid of comments and strings
2375
2376 matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2377 r'DISALLOW_EVIL_CONSTRUCTORS|'
2378 r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2379 if not matched:
2380 return
2381 if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2382 if nesting_state.stack[-1].access != 'private':
2383 error(filename, linenum, 'readability/constructors', 3,
2384 '%s must be in the private: section' % matched.group(1))
2385
2386 else:
2387 # Found DISALLOW* macro outside a class declaration, or perhaps it
2388 # was used inside a function when it should have been part of the
2389 # class declaration. We could issue a warning here, but it
2390 # probably resulted in a compiler error already.
2391 pass
2392
2393
2394def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
2395 """Find the corresponding > to close a template.
2396
2397 Args:
2398 clean_lines: A CleansedLines instance containing the file.
2399 linenum: Current line number.
2400 init_suffix: Remainder of the current line after the initial <.
2401
2402 Returns:
2403 True if a matching bracket exists.
2404 """
2405 line = init_suffix
2406 nesting_stack = ['<']
2407 while True:
2408 # Find the next operator that can tell us whether < is used as an
2409 # opening bracket or as a less-than operator. We only want to
2410 # warn on the latter case.
2411 #
2412 # We could also check all other operators and terminate the search
2413 # early, e.g. if we got something like this "a<b+c", the "<" is
2414 # most likely a less-than operator, but then we will get false
erg@google.comc6671232013-10-25 21:44:03 +00002415 # positives for default arguments and other template expressions.
erg@google.comd350fe52013-01-14 17:51:48 +00002416 match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
2417 if match:
2418 # Found an operator, update nesting stack
2419 operator = match.group(1)
2420 line = match.group(2)
2421
2422 if nesting_stack[-1] == '<':
2423 # Expecting closing angle bracket
2424 if operator in ('<', '(', '['):
2425 nesting_stack.append(operator)
2426 elif operator == '>':
2427 nesting_stack.pop()
2428 if not nesting_stack:
2429 # Found matching angle bracket
2430 return True
2431 elif operator == ',':
2432 # Got a comma after a bracket, this is most likely a template
2433 # argument. We have not seen a closing angle bracket yet, but
2434 # it's probably a few lines later if we look for it, so just
2435 # return early here.
2436 return True
2437 else:
2438 # Got some other operator.
2439 return False
2440
2441 else:
2442 # Expecting closing parenthesis or closing bracket
2443 if operator in ('<', '(', '['):
2444 nesting_stack.append(operator)
2445 elif operator in (')', ']'):
2446 # We don't bother checking for matching () or []. If we got
2447 # something like (] or [), it would have been a syntax error.
2448 nesting_stack.pop()
2449
2450 else:
2451 # Scan the next line
2452 linenum += 1
2453 if linenum >= len(clean_lines.elided):
2454 break
2455 line = clean_lines.elided[linenum]
2456
2457 # Exhausted all remaining lines and still no matching angle bracket.
2458 # Most likely the input was incomplete, otherwise we should have
2459 # seen a semicolon and returned early.
2460 return True
2461
2462
2463def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
2464 """Find the corresponding < that started a template.
2465
2466 Args:
2467 clean_lines: A CleansedLines instance containing the file.
2468 linenum: Current line number.
2469 init_prefix: Part of the current line before the initial >.
2470
2471 Returns:
2472 True if a matching bracket exists.
2473 """
2474 line = init_prefix
2475 nesting_stack = ['>']
2476 while True:
2477 # Find the previous operator
2478 match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
2479 if match:
2480 # Found an operator, update nesting stack
2481 operator = match.group(2)
2482 line = match.group(1)
2483
2484 if nesting_stack[-1] == '>':
2485 # Expecting opening angle bracket
2486 if operator in ('>', ')', ']'):
2487 nesting_stack.append(operator)
2488 elif operator == '<':
2489 nesting_stack.pop()
2490 if not nesting_stack:
2491 # Found matching angle bracket
2492 return True
2493 elif operator == ',':
2494 # Got a comma before a bracket, this is most likely a
2495 # template argument. The opening angle bracket is probably
2496 # there if we look for it, so just return early here.
2497 return True
2498 else:
2499 # Got some other operator.
2500 return False
2501
2502 else:
2503 # Expecting opening parenthesis or opening bracket
2504 if operator in ('>', ')', ']'):
2505 nesting_stack.append(operator)
2506 elif operator in ('(', '['):
2507 nesting_stack.pop()
2508
2509 else:
2510 # Scan the previous line
2511 linenum -= 1
2512 if linenum < 0:
2513 break
2514 line = clean_lines.elided[linenum]
2515
2516 # Exhausted all earlier lines and still no matching angle bracket.
2517 return False
2518
2519
2520def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002521 """Checks for the correctness of various spacing issues in the code.
2522
2523 Things we check for: spaces around operators, spaces after
2524 if/for/while/switch, no spaces around parens in function calls, two
2525 spaces between code and comment, don't start a block with a blank
erg@google.com8a95ecc2011-09-08 00:45:54 +00002526 line, don't end a function with a blank line, don't add a blank line
2527 after public/protected/private, don't have too many blank lines in a row.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002528
2529 Args:
2530 filename: The name of the current file.
2531 clean_lines: A CleansedLines instance containing the file.
2532 linenum: The number of the line to check.
erg@google.comd350fe52013-01-14 17:51:48 +00002533 nesting_state: A _NestingState instance which maintains information about
2534 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002535 error: The function to call with any errors found.
2536 """
2537
erg@google.com2aa59982013-10-28 19:09:25 +00002538 # Don't use "elided" lines here, otherwise we can't check commented lines.
2539 # Don't want to use "raw" either, because we don't want to check inside C++11
2540 # raw strings,
2541 raw = clean_lines.lines_without_raw_strings
erg@google.com4e00b9a2009-01-12 23:05:11 +00002542 line = raw[linenum]
2543
2544 # Before nixing comments, check if the line is blank for no good
2545 # reason. This includes the first line after a block is opened, and
2546 # blank lines at the end of a function (ie, right before a line like '}'
erg@google.comd350fe52013-01-14 17:51:48 +00002547 #
2548 # Skip all the blank line checks if we are immediately inside a
2549 # namespace body. In other words, don't issue blank line warnings
2550 # for this block:
2551 # namespace {
2552 #
2553 # }
2554 #
2555 # A warning about missing end of namespace comments will be issued instead.
2556 if IsBlankLine(line) and not nesting_state.InNamespaceBody():
erg@google.com4e00b9a2009-01-12 23:05:11 +00002557 elided = clean_lines.elided
2558 prev_line = elided[linenum - 1]
2559 prevbrace = prev_line.rfind('{')
2560 # TODO(unknown): Don't complain if line before blank line, and line after,
2561 # both start with alnums and are indented the same amount.
2562 # This ignores whitespace at the start of a namespace block
2563 # because those are not usually indented.
erg@google.comd350fe52013-01-14 17:51:48 +00002564 if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002565 # OK, we have a blank line at the start of a code block. Before we
2566 # complain, we check if it is an exception to the rule: The previous
erg@google.com8a95ecc2011-09-08 00:45:54 +00002567 # non-empty line has the parameters of a function header that are indented
erg@google.com4e00b9a2009-01-12 23:05:11 +00002568 # 4 spaces (because they did not fit in a 80 column line when placed on
2569 # the same line as the function name). We also check for the case where
2570 # the previous line is indented 6 spaces, which may happen when the
2571 # initializers of a constructor do not fit into a 80 column line.
2572 exception = False
2573 if Match(r' {6}\w', prev_line): # Initializer list?
2574 # We are looking for the opening column of initializer list, which
2575 # should be indented 4 spaces to cause 6 space indentation afterwards.
2576 search_position = linenum-2
2577 while (search_position >= 0
2578 and Match(r' {6}\w', elided[search_position])):
2579 search_position -= 1
2580 exception = (search_position >= 0
2581 and elided[search_position][:5] == ' :')
2582 else:
2583 # Search for the function arguments or an initializer list. We use a
2584 # simple heuristic here: If the line is indented 4 spaces; and we have a
2585 # closing paren, without the opening paren, followed by an opening brace
2586 # or colon (for initializer lists) we assume that it is the last line of
2587 # a function header. If we have a colon indented 4 spaces, it is an
2588 # initializer list.
2589 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2590 prev_line)
2591 or Match(r' {4}:', prev_line))
2592
2593 if not exception:
2594 error(filename, linenum, 'whitespace/blank_line', 2,
erg@google.com2aa59982013-10-28 19:09:25 +00002595 'Redundant blank line at the start of a code block '
2596 'should be deleted.')
erg@google.comd350fe52013-01-14 17:51:48 +00002597 # Ignore blank lines at the end of a block in a long if-else
erg@google.com4e00b9a2009-01-12 23:05:11 +00002598 # chain, like this:
2599 # if (condition1) {
2600 # // Something followed by a blank line
2601 #
2602 # } else if (condition2) {
2603 # // Something else
2604 # }
2605 if linenum + 1 < clean_lines.NumLines():
2606 next_line = raw[linenum + 1]
2607 if (next_line
2608 and Match(r'\s*}', next_line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002609 and next_line.find('} else ') == -1):
2610 error(filename, linenum, 'whitespace/blank_line', 3,
erg@google.com2aa59982013-10-28 19:09:25 +00002611 'Redundant blank line at the end of a code block '
2612 'should be deleted.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002613
erg@google.com8a95ecc2011-09-08 00:45:54 +00002614 matched = Match(r'\s*(public|protected|private):', prev_line)
2615 if matched:
2616 error(filename, linenum, 'whitespace/blank_line', 3,
2617 'Do not leave a blank line after "%s:"' % matched.group(1))
2618
erg@google.com4e00b9a2009-01-12 23:05:11 +00002619 # Next, we complain if there's a comment too near the text
2620 commentpos = line.find('//')
2621 if commentpos != -1:
2622 # Check if the // may be in quotes. If so, ignore it
erg@google.com2aa59982013-10-28 19:09:25 +00002623 # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com4e00b9a2009-01-12 23:05:11 +00002624 if (line.count('"', 0, commentpos) -
2625 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
2626 # Allow one space for new scopes, two spaces otherwise:
2627 if (not Match(r'^\s*{ //', line) and
2628 ((commentpos >= 1 and
2629 line[commentpos-1] not in string.whitespace) or
2630 (commentpos >= 2 and
2631 line[commentpos-2] not in string.whitespace))):
2632 error(filename, linenum, 'whitespace/comments', 2,
2633 'At least two spaces is best between code and comments')
2634 # There should always be a space between the // and the comment
2635 commentend = commentpos + 2
2636 if commentend < len(line) and not line[commentend] == ' ':
2637 # but some lines are exceptions -- e.g. if they're big
2638 # comment delimiters like:
2639 # //----------------------------------------------------------
erg@google.coma51c16b2010-11-17 18:09:31 +00002640 # or are an empty C++ style Doxygen comment, like:
2641 # ///
erg@google.com6d8d9832013-10-31 19:46:18 +00002642 # or C++ style Doxygen comments placed after the variable:
2643 # ///< Header comment
2644 # //!< Header comment
erg@google.come35f7652009-06-19 20:52:09 +00002645 # or they begin with multiple slashes followed by a space:
2646 # //////// Header comment
2647 match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.coma51c16b2010-11-17 18:09:31 +00002648 Search(r'^/$', line[commentend:]) or
erg@google.com6d8d9832013-10-31 19:46:18 +00002649 Search(r'^!< ', line[commentend:]) or
2650 Search(r'^/< ', line[commentend:]) or
erg@google.come35f7652009-06-19 20:52:09 +00002651 Search(r'^/+ ', line[commentend:]))
erg@google.com4e00b9a2009-01-12 23:05:11 +00002652 if not match:
2653 error(filename, linenum, 'whitespace/comments', 4,
2654 'Should have a space between // and comment')
2655 CheckComment(line[commentpos:], filename, linenum, error)
2656
2657 line = clean_lines.elided[linenum] # get rid of comments and strings
2658
2659 # Don't try to do spacing checks for operator methods
2660 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
2661
2662 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2663 # Otherwise not. Note we only check for non-spaces on *both* sides;
2664 # sometimes people put non-spaces on one side when aligning ='s among
2665 # many lines (not that this is behavior that I approve of...)
2666 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2667 error(filename, linenum, 'whitespace/operators', 4,
2668 'Missing spaces around =')
2669
2670 # It's ok not to have spaces around binary operators like + - * /, but if
2671 # there's too little whitespace, we get concerned. It's hard to tell,
2672 # though, so we punt on this one for now. TODO.
2673
2674 # You should always have whitespace around binary operators.
erg@google.comd350fe52013-01-14 17:51:48 +00002675 #
2676 # Check <= and >= first to avoid false positives with < and >, then
2677 # check non-include lines for spacing around < and >.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002678 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002679 if match:
2680 error(filename, linenum, 'whitespace/operators', 3,
2681 'Missing spaces around %s' % match.group(1))
erg@google.comd350fe52013-01-14 17:51:48 +00002682 # We allow no-spaces around << when used like this: 10<<20, but
erg@google.com4e00b9a2009-01-12 23:05:11 +00002683 # not otherwise (particularly, not when used as streams)
erg@google.comd350fe52013-01-14 17:51:48 +00002684 match = Search(r'(\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
2685 if match and not (match.group(1).isdigit() and match.group(2).isdigit()):
2686 error(filename, linenum, 'whitespace/operators', 3,
2687 'Missing spaces around <<')
2688 elif not Match(r'#.*include', line):
2689 # Avoid false positives on ->
2690 reduced_line = line.replace('->', '')
2691
2692 # Look for < that is not surrounded by spaces. This is only
2693 # triggered if both sides are missing spaces, even though
2694 # technically should should flag if at least one side is missing a
2695 # space. This is done to avoid some false positives with shifts.
2696 match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
2697 if (match and
2698 not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
2699 error(filename, linenum, 'whitespace/operators', 3,
2700 'Missing spaces around <')
2701
2702 # Look for > that is not surrounded by spaces. Similar to the
2703 # above, we only trigger if both sides are missing spaces to avoid
2704 # false positives with shifts.
2705 match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
2706 if (match and
2707 not FindPreviousMatchingAngleBracket(clean_lines, linenum,
2708 match.group(1))):
2709 error(filename, linenum, 'whitespace/operators', 3,
2710 'Missing spaces around >')
2711
2712 # We allow no-spaces around >> for almost anything. This is because
2713 # C++11 allows ">>" to close nested templates, which accounts for
2714 # most cases when ">>" is not followed by a space.
2715 #
2716 # We still warn on ">>" followed by alpha character, because that is
2717 # likely due to ">>" being used for right shifts, e.g.:
2718 # value >> alpha
2719 #
2720 # When ">>" is used to close templates, the alphanumeric letter that
2721 # follows would be part of an identifier, and there should still be
2722 # a space separating the template type and the identifier.
2723 # type<type<type>> alpha
2724 match = Search(r'>>[a-zA-Z_]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002725 if match:
2726 error(filename, linenum, 'whitespace/operators', 3,
erg@google.comd350fe52013-01-14 17:51:48 +00002727 'Missing spaces around >>')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002728
2729 # There shouldn't be space around unary operators
2730 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
2731 if match:
2732 error(filename, linenum, 'whitespace/operators', 4,
2733 'Extra space for operator %s' % match.group(1))
2734
2735 # A pet peeve of mine: no spaces after an if, while, switch, or for
2736 match = Search(r' (if\(|for\(|while\(|switch\()', line)
2737 if match:
2738 error(filename, linenum, 'whitespace/parens', 5,
2739 'Missing space before ( in %s' % match.group(1))
2740
2741 # For if/for/while/switch, the left and right parens should be
2742 # consistent about how many spaces are inside the parens, and
2743 # there should either be zero or one spaces inside the parens.
2744 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.come35f7652009-06-19 20:52:09 +00002745 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002746 match = Search(r'\b(if|for|while|switch)\s*'
2747 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
2748 line)
2749 if match:
2750 if len(match.group(2)) != len(match.group(4)):
2751 if not (match.group(3) == ';' and
erg@google.come35f7652009-06-19 20:52:09 +00002752 len(match.group(2)) == 1 + len(match.group(4)) or
2753 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002754 error(filename, linenum, 'whitespace/parens', 5,
2755 'Mismatching spaces inside () in %s' % match.group(1))
erg@google.comc6671232013-10-25 21:44:03 +00002756 if len(match.group(2)) not in [0, 1]:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002757 error(filename, linenum, 'whitespace/parens', 5,
2758 'Should have zero or one spaces inside ( and ) in %s' %
2759 match.group(1))
2760
2761 # You should always have a space after a comma (either as fn arg or operator)
erg@google.comc6671232013-10-25 21:44:03 +00002762 #
2763 # This does not apply when the non-space character following the
2764 # comma is another comma, since the only time when that happens is
2765 # for empty macro arguments.
erg@google.com2aa59982013-10-28 19:09:25 +00002766 #
2767 # We run this check in two passes: first pass on elided lines to
2768 # verify that lines contain missing whitespaces, second pass on raw
2769 # lines to confirm that those missing whitespaces are not due to
2770 # elided comments.
2771 if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002772 error(filename, linenum, 'whitespace/comma', 3,
2773 'Missing space after ,')
2774
erg@google.comd7d27472011-09-07 17:36:35 +00002775 # You should always have a space after a semicolon
2776 # except for few corner cases
2777 # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
2778 # space after ;
2779 if Search(r';[^\s};\\)/]', line):
2780 error(filename, linenum, 'whitespace/semicolon', 3,
2781 'Missing space after ;')
2782
erg@google.com4e00b9a2009-01-12 23:05:11 +00002783 # Next we will look for issues with function calls.
2784 CheckSpacingForFunctionCall(filename, line, linenum, error)
2785
erg@google.com8a95ecc2011-09-08 00:45:54 +00002786 # Except after an opening paren, or after another opening brace (in case of
2787 # an initializer list, for instance), you should have spaces before your
2788 # braces. And since you should never have braces at the beginning of a line,
2789 # this is an easy test.
erg@google.com2aa59982013-10-28 19:09:25 +00002790 match = Match(r'^(.*[^ ({]){', line)
2791 if match:
2792 # Try a bit harder to check for brace initialization. This
2793 # happens in one of the following forms:
2794 # Constructor() : initializer_list_{} { ... }
2795 # Constructor{}.MemberFunction()
2796 # Type variable{};
2797 # FunctionCall(type{}, ...);
2798 # LastArgument(..., type{});
2799 # LOG(INFO) << type{} << " ...";
2800 # map_of_type[{...}] = ...;
2801 #
2802 # We check for the character following the closing brace, and
2803 # silence the warning if it's one of those listed above, i.e.
2804 # "{.;,)<]".
2805 #
2806 # To account for nested initializer list, we allow any number of
2807 # closing braces up to "{;,)<". We can't simply silence the
2808 # warning on first sight of closing brace, because that would
2809 # cause false negatives for things that are not initializer lists.
2810 # Silence this: But not this:
2811 # Outer{ if (...) {
2812 # Inner{...} if (...){ // Missing space before {
2813 # }; }
2814 #
2815 # There is a false negative with this approach if people inserted
2816 # spurious semicolons, e.g. "if (cond){};", but we will catch the
2817 # spurious semicolon with a separate check.
2818 (endline, endlinenum, endpos) = CloseExpression(
2819 clean_lines, linenum, len(match.group(1)))
2820 trailing_text = ''
2821 if endpos > -1:
2822 trailing_text = endline[endpos:]
2823 for offset in xrange(endlinenum + 1,
2824 min(endlinenum + 3, clean_lines.NumLines() - 1)):
2825 trailing_text += clean_lines.elided[offset]
2826 if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
2827 error(filename, linenum, 'whitespace/braces', 5,
2828 'Missing space before {')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002829
2830 # Make sure '} else {' has spaces.
2831 if Search(r'}else', line):
2832 error(filename, linenum, 'whitespace/braces', 5,
2833 'Missing space before else')
2834
2835 # You shouldn't have spaces before your brackets, except maybe after
2836 # 'delete []' or 'new char * []'.
2837 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
2838 error(filename, linenum, 'whitespace/braces', 5,
2839 'Extra space before [')
2840
2841 # You shouldn't have a space before a semicolon at the end of the line.
2842 # There's a special case for "for" since the style guide allows space before
2843 # the semicolon there.
2844 if Search(r':\s*;\s*$', line):
2845 error(filename, linenum, 'whitespace/semicolon', 5,
erg@google.comd350fe52013-01-14 17:51:48 +00002846 'Semicolon defining empty statement. Use {} instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002847 elif Search(r'^\s*;\s*$', line):
2848 error(filename, linenum, 'whitespace/semicolon', 5,
2849 'Line contains only semicolon. If this should be an empty statement, '
erg@google.comd350fe52013-01-14 17:51:48 +00002850 'use {} instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002851 elif (Search(r'\s+;\s*$', line) and
2852 not Search(r'\bfor\b', line)):
2853 error(filename, linenum, 'whitespace/semicolon', 5,
2854 'Extra space before last semicolon. If this should be an empty '
erg@google.comd350fe52013-01-14 17:51:48 +00002855 'statement, use {} instead.')
2856
2857 # In range-based for, we wanted spaces before and after the colon, but
2858 # not around "::" tokens that might appear.
2859 if (Search('for *\(.*[^:]:[^: ]', line) or
2860 Search('for *\(.*[^: ]:[^:]', line)):
2861 error(filename, linenum, 'whitespace/forcolon', 2,
2862 'Missing space around colon in range-based for loop')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002863
2864
erg@google.com8a95ecc2011-09-08 00:45:54 +00002865def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
2866 """Checks for additional blank line issues related to sections.
2867
2868 Currently the only thing checked here is blank line before protected/private.
2869
2870 Args:
2871 filename: The name of the current file.
2872 clean_lines: A CleansedLines instance containing the file.
2873 class_info: A _ClassInfo objects.
2874 linenum: The number of the line to check.
2875 error: The function to call with any errors found.
2876 """
2877 # Skip checks if the class is small, where small means 25 lines or less.
2878 # 25 lines seems like a good cutoff since that's the usual height of
2879 # terminals, and any class that can't fit in one screen can't really
2880 # be considered "small".
2881 #
2882 # Also skip checks if we are on the first line. This accounts for
2883 # classes that look like
2884 # class Foo { public: ... };
2885 #
2886 # If we didn't find the end of the class, last_line would be zero,
2887 # and the check will be skipped by the first condition.
erg@google.comd350fe52013-01-14 17:51:48 +00002888 if (class_info.last_line - class_info.starting_linenum <= 24 or
2889 linenum <= class_info.starting_linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002890 return
2891
2892 matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
2893 if matched:
2894 # Issue warning if the line before public/protected/private was
2895 # not a blank line, but don't do this if the previous line contains
2896 # "class" or "struct". This can happen two ways:
2897 # - We are at the beginning of the class.
2898 # - We are forward-declaring an inner class that is semantically
2899 # private, but needed to be public for implementation reasons.
erg@google.comd350fe52013-01-14 17:51:48 +00002900 # Also ignores cases where the previous line ends with a backslash as can be
2901 # common when defining classes in C macros.
erg@google.com8a95ecc2011-09-08 00:45:54 +00002902 prev_line = clean_lines.lines[linenum - 1]
2903 if (not IsBlankLine(prev_line) and
erg@google.comd350fe52013-01-14 17:51:48 +00002904 not Search(r'\b(class|struct)\b', prev_line) and
2905 not Search(r'\\$', prev_line)):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002906 # Try a bit harder to find the beginning of the class. This is to
2907 # account for multi-line base-specifier lists, e.g.:
2908 # class Derived
2909 # : public Base {
erg@google.comd350fe52013-01-14 17:51:48 +00002910 end_class_head = class_info.starting_linenum
2911 for i in range(class_info.starting_linenum, linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002912 if Search(r'\{\s*$', clean_lines.lines[i]):
2913 end_class_head = i
2914 break
2915 if end_class_head < linenum - 1:
2916 error(filename, linenum, 'whitespace/blank_line', 3,
2917 '"%s:" should be preceded by a blank line' % matched.group(1))
2918
2919
erg@google.com4e00b9a2009-01-12 23:05:11 +00002920def GetPreviousNonBlankLine(clean_lines, linenum):
2921 """Return the most recent non-blank line and its line number.
2922
2923 Args:
2924 clean_lines: A CleansedLines instance containing the file contents.
2925 linenum: The number of the line to check.
2926
2927 Returns:
2928 A tuple with two elements. The first element is the contents of the last
2929 non-blank line before the current line, or the empty string if this is the
2930 first non-blank line. The second is the line number of that line, or -1
2931 if this is the first non-blank line.
2932 """
2933
2934 prevlinenum = linenum - 1
2935 while prevlinenum >= 0:
2936 prevline = clean_lines.elided[prevlinenum]
2937 if not IsBlankLine(prevline): # if not a blank line...
2938 return (prevline, prevlinenum)
2939 prevlinenum -= 1
2940 return ('', -1)
2941
2942
2943def CheckBraces(filename, clean_lines, linenum, error):
2944 """Looks for misplaced braces (e.g. at the end of line).
2945
2946 Args:
2947 filename: The name of the current file.
2948 clean_lines: A CleansedLines instance containing the file.
2949 linenum: The number of the line to check.
2950 error: The function to call with any errors found.
2951 """
2952
2953 line = clean_lines.elided[linenum] # get rid of comments and strings
2954
2955 if Match(r'\s*{\s*$', line):
erg@google.com2aa59982013-10-28 19:09:25 +00002956 # We allow an open brace to start a line in the case where someone is using
2957 # braces in a block to explicitly create a new scope, which is commonly used
2958 # to control the lifetime of stack-allocated variables. Braces are also
2959 # used for brace initializers inside function calls. We don't detect this
2960 # perfectly: we just don't complain if the last non-whitespace character on
2961 # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
erg@google.comc6671232013-10-25 21:44:03 +00002962 # previous line starts a preprocessor block.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002963 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
erg@google.com2aa59982013-10-28 19:09:25 +00002964 if (not Search(r'[,;:}{(]\s*$', prevline) and
erg@google.comd350fe52013-01-14 17:51:48 +00002965 not Match(r'\s*#', prevline)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002966 error(filename, linenum, 'whitespace/braces', 4,
2967 '{ should almost always be at the end of the previous line')
2968
2969 # An else clause should be on the same line as the preceding closing brace.
2970 if Match(r'\s*else\s*', line):
2971 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2972 if Match(r'\s*}\s*$', prevline):
2973 error(filename, linenum, 'whitespace/newline', 4,
2974 'An else should appear on the same line as the preceding }')
2975
2976 # If braces come on one side of an else, they should be on both.
2977 # However, we have to worry about "else if" that spans multiple lines!
2978 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
2979 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
2980 # find the ( after the if
2981 pos = line.find('else if')
2982 pos = line.find('(', pos)
2983 if pos > 0:
2984 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
2985 if endline[endpos:].find('{') == -1: # must be brace after if
2986 error(filename, linenum, 'readability/braces', 5,
2987 'If an else has a brace on one side, it should have it on both')
2988 else: # common case: else not followed by a multi-line if
2989 error(filename, linenum, 'readability/braces', 5,
2990 'If an else has a brace on one side, it should have it on both')
2991
2992 # Likewise, an else should never have the else clause on the same line
2993 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
2994 error(filename, linenum, 'whitespace/newline', 4,
2995 'Else clause should never be on same line as else (use 2 lines)')
2996
2997 # In the same way, a do/while should never be on one line
2998 if Match(r'\s*do [^\s{]', line):
2999 error(filename, linenum, 'whitespace/newline', 4,
3000 'do/while clauses should not be on a single line')
3001
erg@google.com2aa59982013-10-28 19:09:25 +00003002 # Block bodies should not be followed by a semicolon. Due to C++11
3003 # brace initialization, there are more places where semicolons are
3004 # required than not, so we use a whitelist approach to check these
3005 # rather than a blacklist. These are the places where "};" should
3006 # be replaced by just "}":
3007 # 1. Some flavor of block following closing parenthesis:
3008 # for (;;) {};
3009 # while (...) {};
3010 # switch (...) {};
3011 # Function(...) {};
3012 # if (...) {};
3013 # if (...) else if (...) {};
3014 #
3015 # 2. else block:
3016 # if (...) else {};
3017 #
3018 # 3. const member function:
3019 # Function(...) const {};
3020 #
3021 # 4. Block following some statement:
3022 # x = 42;
3023 # {};
3024 #
3025 # 5. Block at the beginning of a function:
3026 # Function(...) {
3027 # {};
3028 # }
3029 #
3030 # Note that naively checking for the preceding "{" will also match
3031 # braces inside multi-dimensional arrays, but this is fine since
3032 # that expression will not contain semicolons.
3033 #
3034 # 6. Block following another block:
3035 # while (true) {}
3036 # {};
3037 #
3038 # 7. End of namespaces:
3039 # namespace {};
3040 #
3041 # These semicolons seems far more common than other kinds of
3042 # redundant semicolons, possibly due to people converting classes
3043 # to namespaces. For now we do not warn for this case.
3044 #
3045 # Try matching case 1 first.
3046 match = Match(r'^(.*\)\s*)\{', line)
3047 if match:
3048 # Matched closing parenthesis (case 1). Check the token before the
3049 # matching opening parenthesis, and don't warn if it looks like a
3050 # macro. This avoids these false positives:
3051 # - macro that defines a base class
3052 # - multi-line macro that defines a base class
3053 # - macro that defines the whole class-head
3054 #
3055 # But we still issue warnings for macros that we know are safe to
3056 # warn, specifically:
3057 # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
3058 # - TYPED_TEST
3059 # - INTERFACE_DEF
3060 # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
3061 #
3062 # We implement a whitelist of safe macros instead of a blacklist of
3063 # unsafe macros, even though the latter appears less frequently in
3064 # google code and would have been easier to implement. This is because
3065 # the downside for getting the whitelist wrong means some extra
3066 # semicolons, while the downside for getting the blacklist wrong
3067 # would result in compile errors.
3068 #
3069 # In addition to macros, we also don't want to warn on compound
3070 # literals.
3071 closing_brace_pos = match.group(1).rfind(')')
3072 opening_parenthesis = ReverseCloseExpression(
3073 clean_lines, linenum, closing_brace_pos)
3074 if opening_parenthesis[2] > -1:
3075 line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
3076 macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
3077 if ((macro and
3078 macro.group(1) not in (
3079 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
3080 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
3081 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
3082 Search(r'\s+=\s*$', line_prefix)):
3083 match = None
3084
3085 else:
3086 # Try matching cases 2-3.
3087 match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
3088 if not match:
3089 # Try matching cases 4-6. These are always matched on separate lines.
3090 #
3091 # Note that we can't simply concatenate the previous line to the
3092 # current line and do a single match, otherwise we may output
3093 # duplicate warnings for the blank line case:
3094 # if (cond) {
3095 # // blank line
3096 # }
3097 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3098 if prevline and Search(r'[;{}]\s*$', prevline):
3099 match = Match(r'^(\s*)\{', line)
3100
3101 # Check matching closing brace
3102 if match:
3103 (endline, endlinenum, endpos) = CloseExpression(
3104 clean_lines, linenum, len(match.group(1)))
3105 if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
3106 # Current {} pair is eligible for semicolon check, and we have found
3107 # the redundant semicolon, output warning here.
3108 #
3109 # Note: because we are scanning forward for opening braces, and
3110 # outputting warnings for the matching closing brace, if there are
3111 # nested blocks with trailing semicolons, we will get the error
3112 # messages in reversed order.
3113 error(filename, endlinenum, 'readability/braces', 4,
3114 "You don't need a ; after a }")
erg@google.com4e00b9a2009-01-12 23:05:11 +00003115
3116
erg@google.comc6671232013-10-25 21:44:03 +00003117def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
3118 """Look for empty loop/conditional body with only a single semicolon.
erg@google.comd350fe52013-01-14 17:51:48 +00003119
3120 Args:
3121 filename: The name of the current file.
3122 clean_lines: A CleansedLines instance containing the file.
3123 linenum: The number of the line to check.
3124 error: The function to call with any errors found.
3125 """
3126
3127 # Search for loop keywords at the beginning of the line. Because only
3128 # whitespaces are allowed before the keywords, this will also ignore most
3129 # do-while-loops, since those lines should start with closing brace.
erg@google.comc6671232013-10-25 21:44:03 +00003130 #
3131 # We also check "if" blocks here, since an empty conditional block
3132 # is likely an error.
erg@google.comd350fe52013-01-14 17:51:48 +00003133 line = clean_lines.elided[linenum]
erg@google.comc6671232013-10-25 21:44:03 +00003134 matched = Match(r'\s*(for|while|if)\s*\(', line)
3135 if matched:
erg@google.comd350fe52013-01-14 17:51:48 +00003136 # Find the end of the conditional expression
3137 (end_line, end_linenum, end_pos) = CloseExpression(
3138 clean_lines, linenum, line.find('('))
3139
3140 # Output warning if what follows the condition expression is a semicolon.
3141 # No warning for all other cases, including whitespace or newline, since we
3142 # have a separate check for semicolons preceded by whitespace.
3143 if end_pos >= 0 and Match(r';', end_line[end_pos:]):
erg@google.comc6671232013-10-25 21:44:03 +00003144 if matched.group(1) == 'if':
3145 error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
3146 'Empty conditional bodies should use {}')
3147 else:
3148 error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
3149 'Empty loop bodies should use {} or continue')
erg@google.com4e00b9a2009-01-12 23:05:11 +00003150
3151
3152def CheckCheck(filename, clean_lines, linenum, error):
3153 """Checks the use of CHECK and EXPECT macros.
3154
3155 Args:
3156 filename: The name of the current file.
3157 clean_lines: A CleansedLines instance containing the file.
3158 linenum: The number of the line to check.
3159 error: The function to call with any errors found.
3160 """
3161
3162 # Decide the set of replacement macros that should be suggested
erg@google.comc6671232013-10-25 21:44:03 +00003163 lines = clean_lines.elided
3164 check_macro = None
3165 start_pos = -1
erg@google.com4e00b9a2009-01-12 23:05:11 +00003166 for macro in _CHECK_MACROS:
erg@google.comc6671232013-10-25 21:44:03 +00003167 i = lines[linenum].find(macro)
3168 if i >= 0:
3169 check_macro = macro
3170
3171 # Find opening parenthesis. Do a regular expression match here
3172 # to make sure that we are matching the expected CHECK macro, as
3173 # opposed to some other macro that happens to contain the CHECK
3174 # substring.
3175 matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum])
3176 if not matched:
3177 continue
3178 start_pos = len(matched.group(1))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003179 break
erg@google.comc6671232013-10-25 21:44:03 +00003180 if not check_macro or start_pos < 0:
erg@google.com4e00b9a2009-01-12 23:05:11 +00003181 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
3182 return
3183
erg@google.comc6671232013-10-25 21:44:03 +00003184 # Find end of the boolean expression by matching parentheses
3185 (last_line, end_line, end_pos) = CloseExpression(
3186 clean_lines, linenum, start_pos)
3187 if end_pos < 0:
3188 return
3189 if linenum == end_line:
3190 expression = lines[linenum][start_pos + 1:end_pos - 1]
3191 else:
3192 expression = lines[linenum][start_pos + 1:]
3193 for i in xrange(linenum + 1, end_line):
3194 expression += lines[i]
3195 expression += last_line[0:end_pos - 1]
erg@google.com4e00b9a2009-01-12 23:05:11 +00003196
erg@google.comc6671232013-10-25 21:44:03 +00003197 # Parse expression so that we can take parentheses into account.
3198 # This avoids false positives for inputs like "CHECK((a < 4) == b)",
3199 # which is not replaceable by CHECK_LE.
3200 lhs = ''
3201 rhs = ''
3202 operator = None
3203 while expression:
3204 matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
3205 r'==|!=|>=|>|<=|<|\()(.*)$', expression)
3206 if matched:
3207 token = matched.group(1)
3208 if token == '(':
3209 # Parenthesized operand
3210 expression = matched.group(2)
erg@google.com2aa59982013-10-28 19:09:25 +00003211 (end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
erg@google.comc6671232013-10-25 21:44:03 +00003212 if end < 0:
3213 return # Unmatched parenthesis
3214 lhs += '(' + expression[0:end]
3215 expression = expression[end:]
3216 elif token in ('&&', '||'):
3217 # Logical and/or operators. This means the expression
3218 # contains more than one term, for example:
3219 # CHECK(42 < a && a < b);
3220 #
3221 # These are not replaceable with CHECK_LE, so bail out early.
3222 return
3223 elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
3224 # Non-relational operator
3225 lhs += token
3226 expression = matched.group(2)
3227 else:
3228 # Relational operator
3229 operator = token
3230 rhs = matched.group(2)
3231 break
3232 else:
3233 # Unparenthesized operand. Instead of appending to lhs one character
3234 # at a time, we do another regular expression match to consume several
3235 # characters at once if possible. Trivial benchmark shows that this
3236 # is more efficient when the operands are longer than a single
3237 # character, which is generally the case.
3238 matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
3239 if not matched:
3240 matched = Match(r'^(\s*\S)(.*)$', expression)
3241 if not matched:
3242 break
3243 lhs += matched.group(1)
3244 expression = matched.group(2)
3245
3246 # Only apply checks if we got all parts of the boolean expression
3247 if not (lhs and operator and rhs):
3248 return
3249
3250 # Check that rhs do not contain logical operators. We already know
3251 # that lhs is fine since the loop above parses out && and ||.
3252 if rhs.find('&&') > -1 or rhs.find('||') > -1:
3253 return
3254
3255 # At least one of the operands must be a constant literal. This is
3256 # to avoid suggesting replacements for unprintable things like
3257 # CHECK(variable != iterator)
3258 #
3259 # The following pattern matches decimal, hex integers, strings, and
3260 # characters (in that order).
3261 lhs = lhs.strip()
3262 rhs = rhs.strip()
3263 match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
3264 if Match(match_constant, lhs) or Match(match_constant, rhs):
3265 # Note: since we know both lhs and rhs, we can provide a more
3266 # descriptive error message like:
3267 # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
3268 # Instead of:
3269 # Consider using CHECK_EQ instead of CHECK(a == b)
3270 #
3271 # We are still keeping the less descriptive message because if lhs
3272 # or rhs gets long, the error message might become unreadable.
3273 error(filename, linenum, 'readability/check', 2,
3274 'Consider using %s instead of %s(a %s b)' % (
3275 _CHECK_REPLACEMENT[check_macro][operator],
3276 check_macro, operator))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003277
3278
erg@google.comd350fe52013-01-14 17:51:48 +00003279def CheckAltTokens(filename, clean_lines, linenum, error):
3280 """Check alternative keywords being used in boolean expressions.
3281
3282 Args:
3283 filename: The name of the current file.
3284 clean_lines: A CleansedLines instance containing the file.
3285 linenum: The number of the line to check.
3286 error: The function to call with any errors found.
3287 """
3288 line = clean_lines.elided[linenum]
3289
3290 # Avoid preprocessor lines
3291 if Match(r'^\s*#', line):
3292 return
3293
3294 # Last ditch effort to avoid multi-line comments. This will not help
3295 # if the comment started before the current line or ended after the
3296 # current line, but it catches most of the false positives. At least,
3297 # it provides a way to workaround this warning for people who use
3298 # multi-line comments in preprocessor macros.
3299 #
3300 # TODO(unknown): remove this once cpplint has better support for
3301 # multi-line comments.
3302 if line.find('/*') >= 0 or line.find('*/') >= 0:
3303 return
3304
3305 for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
3306 error(filename, linenum, 'readability/alt_tokens', 2,
3307 'Use operator %s instead of %s' % (
3308 _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
3309
3310
erg@google.com4e00b9a2009-01-12 23:05:11 +00003311def GetLineWidth(line):
3312 """Determines the width of the line in column positions.
3313
3314 Args:
3315 line: A string, which may be a Unicode string.
3316
3317 Returns:
3318 The width of the line in column positions, accounting for Unicode
3319 combining characters and wide characters.
3320 """
3321 if isinstance(line, unicode):
3322 width = 0
erg@google.com8a95ecc2011-09-08 00:45:54 +00003323 for uc in unicodedata.normalize('NFC', line):
3324 if unicodedata.east_asian_width(uc) in ('W', 'F'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003325 width += 2
erg@google.com8a95ecc2011-09-08 00:45:54 +00003326 elif not unicodedata.combining(uc):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003327 width += 1
3328 return width
3329 else:
3330 return len(line)
3331
3332
erg@google.comd350fe52013-01-14 17:51:48 +00003333def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
erg@google.com8a95ecc2011-09-08 00:45:54 +00003334 error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003335 """Checks rules from the 'C++ style rules' section of cppguide.html.
3336
3337 Most of these rules are hard to test (naming, comment style), but we
3338 do what we can. In particular we check for 2-space indents, line lengths,
3339 tab usage, spaces inside code, etc.
3340
3341 Args:
3342 filename: The name of the current file.
3343 clean_lines: A CleansedLines instance containing the file.
3344 linenum: The number of the line to check.
3345 file_extension: The extension (without the dot) of the filename.
erg@google.comd350fe52013-01-14 17:51:48 +00003346 nesting_state: A _NestingState instance which maintains information about
3347 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003348 error: The function to call with any errors found.
3349 """
3350
erg@google.com2aa59982013-10-28 19:09:25 +00003351 # Don't use "elided" lines here, otherwise we can't check commented lines.
3352 # Don't want to use "raw" either, because we don't want to check inside C++11
3353 # raw strings,
3354 raw_lines = clean_lines.lines_without_raw_strings
erg@google.com4e00b9a2009-01-12 23:05:11 +00003355 line = raw_lines[linenum]
3356
3357 if line.find('\t') != -1:
3358 error(filename, linenum, 'whitespace/tab', 1,
3359 'Tab found; better to use spaces')
3360
3361 # One or three blank spaces at the beginning of the line is weird; it's
3362 # hard to reconcile that with 2-space indents.
3363 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
3364 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
3365 # if(RLENGTH > 20) complain = 0;
3366 # if(match($0, " +(error|private|public|protected):")) complain = 0;
3367 # if(match(prev, "&& *$")) complain = 0;
3368 # if(match(prev, "\\|\\| *$")) complain = 0;
3369 # if(match(prev, "[\",=><] *$")) complain = 0;
3370 # if(match($0, " <<")) complain = 0;
3371 # if(match(prev, " +for \\(")) complain = 0;
3372 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
3373 initial_spaces = 0
3374 cleansed_line = clean_lines.elided[linenum]
3375 while initial_spaces < len(line) and line[initial_spaces] == ' ':
3376 initial_spaces += 1
3377 if line and line[-1].isspace():
3378 error(filename, linenum, 'whitespace/end_of_line', 4,
3379 'Line ends in whitespace. Consider deleting these extra spaces.')
erg@google.comfd5da632013-10-25 17:39:45 +00003380 # There are certain situations we allow one space, notably for section labels
erg@google.com4e00b9a2009-01-12 23:05:11 +00003381 elif ((initial_spaces == 1 or initial_spaces == 3) and
3382 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
3383 error(filename, linenum, 'whitespace/indent', 3,
3384 'Weird number of spaces at line-start. '
3385 'Are you using a 2-space indent?')
erg@google.com4e00b9a2009-01-12 23:05:11 +00003386
3387 # Check if the line is a header guard.
3388 is_header_guard = False
3389 if file_extension == 'h':
3390 cppvar = GetHeaderGuardCPPVariable(filename)
3391 if (line.startswith('#ifndef %s' % cppvar) or
3392 line.startswith('#define %s' % cppvar) or
3393 line.startswith('#endif // %s' % cppvar)):
3394 is_header_guard = True
3395 # #include lines and header guards can be long, since there's no clean way to
3396 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00003397 #
3398 # URLs can be long too. It's possible to split these, but it makes them
3399 # harder to cut&paste.
erg@google.comd7d27472011-09-07 17:36:35 +00003400 #
3401 # The "$Id:...$" comment may also get very long without it being the
3402 # developers fault.
erg@google.coma87abb82009-02-24 01:41:01 +00003403 if (not line.startswith('#include') and not is_header_guard and
erg@google.comd7d27472011-09-07 17:36:35 +00003404 not Match(r'^\s*//.*http(s?)://\S*$', line) and
3405 not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003406 line_width = GetLineWidth(line)
erg@google.comab53edf2013-11-05 22:23:37 +00003407 extended_length = int((_line_length * 1.25))
3408 if line_width > extended_length:
erg@google.com4e00b9a2009-01-12 23:05:11 +00003409 error(filename, linenum, 'whitespace/line_length', 4,
erg@google.comab53edf2013-11-05 22:23:37 +00003410 'Lines should very rarely be longer than %i characters' %
3411 extended_length)
3412 elif line_width > _line_length:
erg@google.com4e00b9a2009-01-12 23:05:11 +00003413 error(filename, linenum, 'whitespace/line_length', 2,
erg@google.comab53edf2013-11-05 22:23:37 +00003414 'Lines should be <= %i characters long' % _line_length)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003415
3416 if (cleansed_line.count(';') > 1 and
3417 # for loops are allowed two ;'s (and may run over two lines).
3418 cleansed_line.find('for') == -1 and
3419 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
3420 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
3421 # It's ok to have many commands in a switch case that fits in 1 line
3422 not ((cleansed_line.find('case ') != -1 or
3423 cleansed_line.find('default:') != -1) and
3424 cleansed_line.find('break;') != -1)):
erg@google.comd350fe52013-01-14 17:51:48 +00003425 error(filename, linenum, 'whitespace/newline', 0,
erg@google.com4e00b9a2009-01-12 23:05:11 +00003426 'More than one command on the same line')
3427
3428 # Some more style checks
3429 CheckBraces(filename, clean_lines, linenum, error)
erg@google.comc6671232013-10-25 21:44:03 +00003430 CheckEmptyBlockBody(filename, clean_lines, linenum, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003431 CheckAccess(filename, clean_lines, linenum, nesting_state, error)
3432 CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003433 CheckCheck(filename, clean_lines, linenum, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003434 CheckAltTokens(filename, clean_lines, linenum, error)
3435 classinfo = nesting_state.InnermostClass()
3436 if classinfo:
3437 CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003438
3439
3440_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
3441_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
3442# Matches the first component of a filename delimited by -s and _s. That is:
3443# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
3444# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
3445# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
3446# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
3447_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
3448
3449
3450def _DropCommonSuffixes(filename):
3451 """Drops common suffixes like _test.cc or -inl.h from filename.
3452
3453 For example:
3454 >>> _DropCommonSuffixes('foo/foo-inl.h')
3455 'foo/foo'
3456 >>> _DropCommonSuffixes('foo/bar/foo.cc')
3457 'foo/bar/foo'
3458 >>> _DropCommonSuffixes('foo/foo_internal.h')
3459 'foo/foo'
3460 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
3461 'foo/foo_unusualinternal'
3462
3463 Args:
3464 filename: The input filename.
3465
3466 Returns:
3467 The filename with the common suffix removed.
3468 """
3469 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
3470 'inl.h', 'impl.h', 'internal.h'):
3471 if (filename.endswith(suffix) and len(filename) > len(suffix) and
3472 filename[-len(suffix) - 1] in ('-', '_')):
3473 return filename[:-len(suffix) - 1]
3474 return os.path.splitext(filename)[0]
3475
3476
3477def _IsTestFilename(filename):
3478 """Determines if the given filename has a suffix that identifies it as a test.
3479
3480 Args:
3481 filename: The input filename.
3482
3483 Returns:
3484 True if 'filename' looks like a test, False otherwise.
3485 """
3486 if (filename.endswith('_test.cc') or
3487 filename.endswith('_unittest.cc') or
3488 filename.endswith('_regtest.cc')):
3489 return True
3490 else:
3491 return False
3492
3493
3494def _ClassifyInclude(fileinfo, include, is_system):
3495 """Figures out what kind of header 'include' is.
3496
3497 Args:
3498 fileinfo: The current file cpplint is running over. A FileInfo instance.
3499 include: The path to a #included file.
3500 is_system: True if the #include used <> rather than "".
3501
3502 Returns:
3503 One of the _XXX_HEADER constants.
3504
3505 For example:
3506 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
3507 _C_SYS_HEADER
3508 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
3509 _CPP_SYS_HEADER
3510 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
3511 _LIKELY_MY_HEADER
3512 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
3513 ... 'bar/foo_other_ext.h', False)
3514 _POSSIBLE_MY_HEADER
3515 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
3516 _OTHER_HEADER
3517 """
3518 # This is a list of all standard c++ header files, except
3519 # those already checked for above.
erg@google.comfd5da632013-10-25 17:39:45 +00003520 is_cpp_h = include in _CPP_HEADERS
erg@google.com4e00b9a2009-01-12 23:05:11 +00003521
3522 if is_system:
3523 if is_cpp_h:
3524 return _CPP_SYS_HEADER
3525 else:
3526 return _C_SYS_HEADER
3527
3528 # If the target file and the include we're checking share a
3529 # basename when we drop common extensions, and the include
3530 # lives in . , then it's likely to be owned by the target file.
3531 target_dir, target_base = (
3532 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
3533 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
3534 if target_base == include_base and (
3535 include_dir == target_dir or
3536 include_dir == os.path.normpath(target_dir + '/../public')):
3537 return _LIKELY_MY_HEADER
3538
3539 # If the target and include share some initial basename
3540 # component, it's possible the target is implementing the
3541 # include, so it's allowed to be first, but we'll never
3542 # complain if it's not there.
3543 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
3544 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
3545 if (target_first_component and include_first_component and
3546 target_first_component.group(0) ==
3547 include_first_component.group(0)):
3548 return _POSSIBLE_MY_HEADER
3549
3550 return _OTHER_HEADER
3551
3552
erg@google.coma87abb82009-02-24 01:41:01 +00003553
erg@google.come35f7652009-06-19 20:52:09 +00003554def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
3555 """Check rules that are applicable to #include lines.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003556
erg@google.come35f7652009-06-19 20:52:09 +00003557 Strings on #include lines are NOT removed from elided line, to make
3558 certain tasks easier. However, to prevent false positives, checks
3559 applicable to #include lines in CheckLanguage must be put here.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003560
3561 Args:
3562 filename: The name of the current file.
3563 clean_lines: A CleansedLines instance containing the file.
3564 linenum: The number of the line to check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003565 include_state: An _IncludeState instance in which the headers are inserted.
3566 error: The function to call with any errors found.
3567 """
3568 fileinfo = FileInfo(filename)
3569
erg@google.come35f7652009-06-19 20:52:09 +00003570 line = clean_lines.lines[linenum]
erg@google.com4e00b9a2009-01-12 23:05:11 +00003571
3572 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.come35f7652009-06-19 20:52:09 +00003573 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003574 error(filename, linenum, 'build/include', 4,
3575 'Include the directory when naming .h files')
3576
3577 # we shouldn't include a file more than once. actually, there are a
3578 # handful of instances where doing so is okay, but in general it's
3579 # not.
erg@google.come35f7652009-06-19 20:52:09 +00003580 match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003581 if match:
3582 include = match.group(2)
3583 is_system = (match.group(1) == '<')
3584 if include in include_state:
3585 error(filename, linenum, 'build/include', 4,
3586 '"%s" already included at %s:%s' %
3587 (include, filename, include_state[include]))
3588 else:
3589 include_state[include] = linenum
3590
3591 # We want to ensure that headers appear in the right order:
3592 # 1) for foo.cc, foo.h (preferred location)
3593 # 2) c system files
3594 # 3) cpp system files
3595 # 4) for foo.cc, foo.h (deprecated location)
3596 # 5) other google headers
3597 #
3598 # We classify each include statement as one of those 5 types
3599 # using a number of techniques. The include_state object keeps
3600 # track of the highest type seen, and complains if we see a
3601 # lower type after that.
3602 error_message = include_state.CheckNextIncludeOrder(
3603 _ClassifyInclude(fileinfo, include, is_system))
3604 if error_message:
3605 error(filename, linenum, 'build/include_order', 4,
3606 '%s. Should be: %s.h, c system, c++ system, other.' %
3607 (error_message, fileinfo.BaseName()))
erg@google.comfd5da632013-10-25 17:39:45 +00003608 canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
3609 if not include_state.IsInAlphabeticalOrder(
3610 clean_lines, linenum, canonical_include):
erg@google.coma868d2d2009-10-09 21:18:45 +00003611 error(filename, linenum, 'build/include_alpha', 4,
3612 'Include "%s" not in alphabetical order' % include)
erg@google.comfd5da632013-10-25 17:39:45 +00003613 include_state.SetLastHeader(canonical_include)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003614
erg@google.come35f7652009-06-19 20:52:09 +00003615 # Look for any of the stream classes that are part of standard C++.
3616 match = _RE_PATTERN_INCLUDE.match(line)
3617 if match:
3618 include = match.group(2)
3619 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
3620 # Many unit tests use cout, so we exempt them.
3621 if not _IsTestFilename(filename):
3622 error(filename, linenum, 'readability/streams', 3,
3623 'Streams are highly discouraged.')
3624
erg@google.com8a95ecc2011-09-08 00:45:54 +00003625
3626def _GetTextInside(text, start_pattern):
erg@google.com2aa59982013-10-28 19:09:25 +00003627 r"""Retrieves all the text between matching open and close parentheses.
erg@google.com8a95ecc2011-09-08 00:45:54 +00003628
3629 Given a string of lines and a regular expression string, retrieve all the text
3630 following the expression and between opening punctuation symbols like
3631 (, [, or {, and the matching close-punctuation symbol. This properly nested
3632 occurrences of the punctuations, so for the text like
3633 printf(a(), b(c()));
3634 a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
3635 start_pattern must match string having an open punctuation symbol at the end.
3636
3637 Args:
3638 text: The lines to extract text. Its comments and strings must be elided.
3639 It can be single line and can span multiple lines.
3640 start_pattern: The regexp string indicating where to start extracting
3641 the text.
3642 Returns:
3643 The extracted text.
3644 None if either the opening string or ending punctuation could not be found.
3645 """
3646 # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
3647 # rewritten to use _GetTextInside (and use inferior regexp matching today).
3648
3649 # Give opening punctuations to get the matching close-punctuations.
3650 matching_punctuation = {'(': ')', '{': '}', '[': ']'}
3651 closing_punctuation = set(matching_punctuation.itervalues())
3652
3653 # Find the position to start extracting text.
3654 match = re.search(start_pattern, text, re.M)
3655 if not match: # start_pattern not found in text.
3656 return None
3657 start_position = match.end(0)
3658
3659 assert start_position > 0, (
3660 'start_pattern must ends with an opening punctuation.')
3661 assert text[start_position - 1] in matching_punctuation, (
3662 'start_pattern must ends with an opening punctuation.')
3663 # Stack of closing punctuations we expect to have in text after position.
3664 punctuation_stack = [matching_punctuation[text[start_position - 1]]]
3665 position = start_position
3666 while punctuation_stack and position < len(text):
3667 if text[position] == punctuation_stack[-1]:
3668 punctuation_stack.pop()
3669 elif text[position] in closing_punctuation:
3670 # A closing punctuation without matching opening punctuations.
3671 return None
3672 elif text[position] in matching_punctuation:
3673 punctuation_stack.append(matching_punctuation[text[position]])
3674 position += 1
3675 if punctuation_stack:
3676 # Opening punctuations left without matching close-punctuations.
3677 return None
3678 # punctuations match.
3679 return text[start_position:position - 1]
3680
3681
erg@google.comfd5da632013-10-25 17:39:45 +00003682# Patterns for matching call-by-reference parameters.
erg@google.com2aa59982013-10-28 19:09:25 +00003683#
3684# Supports nested templates up to 2 levels deep using this messy pattern:
3685# < (?: < (?: < [^<>]*
3686# >
3687# | [^<>] )*
3688# >
3689# | [^<>] )*
3690# >
erg@google.comfd5da632013-10-25 17:39:45 +00003691_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]*
3692_RE_PATTERN_TYPE = (
3693 r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
erg@google.com2aa59982013-10-28 19:09:25 +00003694 r'(?:\w|'
3695 r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
3696 r'::)+')
erg@google.comfd5da632013-10-25 17:39:45 +00003697# A call-by-reference parameter ends with '& identifier'.
3698_RE_PATTERN_REF_PARAM = re.compile(
3699 r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
3700 r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
3701# A call-by-const-reference parameter either ends with 'const& identifier'
3702# or looks like 'const type& identifier' when 'type' is atomic.
3703_RE_PATTERN_CONST_REF_PARAM = (
3704 r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
3705 r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
3706
3707
3708def CheckLanguage(filename, clean_lines, linenum, file_extension,
3709 include_state, nesting_state, error):
erg@google.come35f7652009-06-19 20:52:09 +00003710 """Checks rules from the 'C++ language rules' section of cppguide.html.
3711
3712 Some of these rules are hard to test (function overloading, using
3713 uint32 inappropriately), but we do the best we can.
3714
3715 Args:
3716 filename: The name of the current file.
3717 clean_lines: A CleansedLines instance containing the file.
3718 linenum: The number of the line to check.
3719 file_extension: The extension (without the dot) of the filename.
3720 include_state: An _IncludeState instance in which the headers are inserted.
erg@google.comfd5da632013-10-25 17:39:45 +00003721 nesting_state: A _NestingState instance which maintains information about
3722 the current stack of nested blocks being parsed.
erg@google.come35f7652009-06-19 20:52:09 +00003723 error: The function to call with any errors found.
3724 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00003725 # If the line is empty or consists of entirely a comment, no need to
3726 # check it.
3727 line = clean_lines.elided[linenum]
3728 if not line:
3729 return
3730
erg@google.come35f7652009-06-19 20:52:09 +00003731 match = _RE_PATTERN_INCLUDE.search(line)
3732 if match:
3733 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
3734 return
3735
erg@google.com2aa59982013-10-28 19:09:25 +00003736 # Reset include state across preprocessor directives. This is meant
3737 # to silence warnings for conditional includes.
3738 if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
3739 include_state.ResetSection()
3740
erg@google.com4e00b9a2009-01-12 23:05:11 +00003741 # Make Windows paths like Unix.
3742 fullname = os.path.abspath(filename).replace('\\', '/')
3743
3744 # TODO(unknown): figure out if they're using default arguments in fn proto.
3745
erg@google.com4e00b9a2009-01-12 23:05:11 +00003746 # Check to see if they're using an conversion function cast.
3747 # I just try to capture the most common basic types, though there are more.
3748 # Parameterless conversion functions, such as bool(), are allowed as they are
3749 # probably a member operator declaration or default constructor.
3750 match = Search(
erg@google.coma868d2d2009-10-09 21:18:45 +00003751 r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
erg@google.comc6671232013-10-25 21:44:03 +00003752 r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
3753 r'(\([^)].*)', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003754 if match:
erg@google.comc6671232013-10-25 21:44:03 +00003755 matched_new = match.group(1)
3756 matched_type = match.group(2)
3757 matched_funcptr = match.group(3)
3758
erg@google.com4e00b9a2009-01-12 23:05:11 +00003759 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
3760 # where type may be float(), int(string), etc. Without context they are
erg@google.comd7d27472011-09-07 17:36:35 +00003761 # virtually indistinguishable from int(x) casts. Likewise, gMock's
3762 # MockCallback takes a template parameter of the form return_type(arg_type),
3763 # which looks much like the cast we're trying to detect.
erg@google.comc6671232013-10-25 21:44:03 +00003764 #
3765 # std::function<> wrapper has a similar problem.
3766 #
3767 # Return types for function pointers also look like casts if they
3768 # don't have an extra space.
3769 if (matched_new is None and # If new operator, then this isn't a cast
erg@google.comd7d27472011-09-07 17:36:35 +00003770 not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
erg@google.comc6671232013-10-25 21:44:03 +00003771 Search(r'\bMockCallback<.*>', line) or
3772 Search(r'\bstd::function<.*>', line)) and
3773 not (matched_funcptr and
3774 Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
3775 matched_funcptr))):
erg@google.comd350fe52013-01-14 17:51:48 +00003776 # Try a bit harder to catch gmock lines: the only place where
3777 # something looks like an old-style cast is where we declare the
3778 # return type of the mocked method, and the only time when we
3779 # are missing context is if MOCK_METHOD was split across
erg@google.comc6671232013-10-25 21:44:03 +00003780 # multiple lines. The missing MOCK_METHOD is usually one or two
3781 # lines back, so scan back one or two lines.
3782 #
3783 # It's not possible for gmock macros to appear in the first 2
3784 # lines, since the class head + section name takes up 2 lines.
3785 if (linenum < 2 or
3786 not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
3787 clean_lines.elided[linenum - 1]) or
3788 Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
3789 clean_lines.elided[linenum - 2]))):
erg@google.comd350fe52013-01-14 17:51:48 +00003790 error(filename, linenum, 'readability/casting', 4,
3791 'Using deprecated casting style. '
3792 'Use static_cast<%s>(...) instead' %
erg@google.comc6671232013-10-25 21:44:03 +00003793 matched_type)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003794
3795 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3796 'static_cast',
erg@google.com8a95ecc2011-09-08 00:45:54 +00003797 r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
3798
3799 # This doesn't catch all cases. Consider (const char * const)"hello".
3800 #
3801 # (char *) "foo" should always be a const_cast (reinterpret_cast won't
3802 # compile).
3803 if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3804 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
3805 pass
3806 else:
3807 # Check pointer casts for other than string constants
3808 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3809 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003810
3811 # In addition, we look for people taking the address of a cast. This
3812 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
3813 # point where you think.
erg@google.comc6671232013-10-25 21:44:03 +00003814 match = Search(
3815 r'(?:&\(([^)]+)\)[\w(])|'
3816 r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
3817 if match and match.group(1) != '*':
erg@google.com4e00b9a2009-01-12 23:05:11 +00003818 error(filename, linenum, 'runtime/casting', 4,
3819 ('Are you taking an address of a cast? '
3820 'This is dangerous: could be a temp var. '
3821 'Take the address before doing the cast, rather than after'))
3822
erg@google.comc6671232013-10-25 21:44:03 +00003823 # Create an extended_line, which is the concatenation of the current and
3824 # next lines, for more effective checking of code that may span more than one
3825 # line.
3826 if linenum + 1 < clean_lines.NumLines():
3827 extended_line = line + clean_lines.elided[linenum + 1]
3828 else:
3829 extended_line = line
3830
erg@google.com4e00b9a2009-01-12 23:05:11 +00003831 # Check for people declaring static/global STL strings at the top level.
3832 # This is dangerous because the C++ language does not guarantee that
3833 # globals with constructors are initialized before the first access.
3834 match = Match(
3835 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
3836 line)
3837 # Make sure it's not a function.
3838 # Function template specialization looks like: "string foo<Type>(...".
3839 # Class template definitions look like: "string Foo<Type>::Method(...".
erg@google.com2aa59982013-10-28 19:09:25 +00003840 #
3841 # Also ignore things that look like operators. These are matched separately
3842 # because operator names cross non-word boundaries. If we change the pattern
3843 # above, we would decrease the accuracy of matching identifiers.
3844 if (match and
3845 not Search(r'\boperator\W', line) and
3846 not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003847 error(filename, linenum, 'runtime/string', 4,
3848 'For a static/global string constant, use a C style string instead: '
3849 '"%schar %s[]".' %
3850 (match.group(1), match.group(2)))
3851
erg@google.com4e00b9a2009-01-12 23:05:11 +00003852 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
3853 error(filename, linenum, 'runtime/init', 4,
3854 'You seem to be initializing a member variable with itself.')
3855
3856 if file_extension == 'h':
3857 # TODO(unknown): check that 1-arg constructors are explicit.
3858 # How to tell it's a constructor?
3859 # (handled in CheckForNonStandardConstructs for now)
3860 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
3861 # (level 1 error)
3862 pass
3863
3864 # Check if people are using the verboten C basic types. The only exception
3865 # we regularly allow is "unsigned short port" for port.
3866 if Search(r'\bshort port\b', line):
3867 if not Search(r'\bunsigned short port\b', line):
3868 error(filename, linenum, 'runtime/int', 4,
3869 'Use "unsigned short" for ports, not "short"')
3870 else:
3871 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
3872 if match:
3873 error(filename, linenum, 'runtime/int', 4,
3874 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
3875
3876 # When snprintf is used, the second argument shouldn't be a literal.
3877 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
erg+personal@google.com05189642010-04-30 20:43:03 +00003878 if match and match.group(2) != '0':
3879 # If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003880 error(filename, linenum, 'runtime/printf', 3,
3881 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
3882 'to snprintf.' % (match.group(1), match.group(2)))
3883
3884 # Check if some verboten C functions are being used.
3885 if Search(r'\bsprintf\b', line):
3886 error(filename, linenum, 'runtime/printf', 5,
3887 'Never use sprintf. Use snprintf instead.')
3888 match = Search(r'\b(strcpy|strcat)\b', line)
3889 if match:
3890 error(filename, linenum, 'runtime/printf', 4,
3891 'Almost always, snprintf is better than %s' % match.group(1))
3892
erg@google.coma868d2d2009-10-09 21:18:45 +00003893 # Check if some verboten operator overloading is going on
3894 # TODO(unknown): catch out-of-line unary operator&:
3895 # class X {};
3896 # int operator&(const X& x) { return 42; } // unary operator&
3897 # The trick is it's hard to tell apart from binary operator&:
3898 # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
3899 if Search(r'\boperator\s*&\s*\(\s*\)', line):
3900 error(filename, linenum, 'runtime/operator', 4,
3901 'Unary operator& is dangerous. Do not use it.')
3902
erg@google.com4e00b9a2009-01-12 23:05:11 +00003903 # Check for suspicious usage of "if" like
3904 # } if (a == b) {
3905 if Search(r'\}\s*if\s*\(', line):
3906 error(filename, linenum, 'readability/braces', 4,
3907 'Did you mean "else if"? If not, start a new line for "if".')
3908
3909 # Check for potential format string bugs like printf(foo).
3910 # We constrain the pattern not to pick things like DocidForPrintf(foo).
3911 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
erg@google.com8a95ecc2011-09-08 00:45:54 +00003912 # TODO(sugawarayu): Catch the following case. Need to change the calling
3913 # convention of the whole function to process multiple line to handle it.
3914 # printf(
3915 # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
3916 printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
3917 if printf_args:
3918 match = Match(r'([\w.\->()]+)$', printf_args)
erg@google.comd350fe52013-01-14 17:51:48 +00003919 if match and match.group(1) != '__VA_ARGS__':
erg@google.com8a95ecc2011-09-08 00:45:54 +00003920 function_name = re.search(r'\b((?:string)?printf)\s*\(',
3921 line, re.I).group(1)
3922 error(filename, linenum, 'runtime/printf', 4,
3923 'Potential format string bug. Do %s("%%s", %s) instead.'
3924 % (function_name, match.group(1)))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003925
3926 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
3927 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
3928 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
3929 error(filename, linenum, 'runtime/memset', 4,
3930 'Did you mean "memset(%s, 0, %s)"?'
3931 % (match.group(1), match.group(2)))
3932
3933 if Search(r'\busing namespace\b', line):
3934 error(filename, linenum, 'build/namespaces', 5,
3935 'Do not use namespace using-directives. '
3936 'Use using-declarations instead.')
3937
3938 # Detect variable-length arrays.
3939 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
3940 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
3941 match.group(3).find(']') == -1):
3942 # Split the size using space and arithmetic operators as delimiters.
3943 # If any of the resulting tokens are not compile time constants then
3944 # report the error.
3945 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
3946 is_const = True
3947 skip_next = False
3948 for tok in tokens:
3949 if skip_next:
3950 skip_next = False
3951 continue
3952
3953 if Search(r'sizeof\(.+\)', tok): continue
3954 if Search(r'arraysize\(\w+\)', tok): continue
3955
3956 tok = tok.lstrip('(')
3957 tok = tok.rstrip(')')
3958 if not tok: continue
3959 if Match(r'\d+', tok): continue
3960 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
3961 if Match(r'k[A-Z0-9]\w*', tok): continue
3962 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
3963 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
3964 # A catch all for tricky sizeof cases, including 'sizeof expression',
3965 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
erg@google.com8a95ecc2011-09-08 00:45:54 +00003966 # requires skipping the next token because we split on ' ' and '*'.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003967 if tok.startswith('sizeof'):
3968 skip_next = True
3969 continue
3970 is_const = False
3971 break
3972 if not is_const:
3973 error(filename, linenum, 'runtime/arrays', 1,
3974 'Do not use variable-length arrays. Use an appropriately named '
3975 "('k' followed by CamelCase) compile-time constant for the size.")
3976
3977 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
3978 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
3979 # in the class declaration.
3980 match = Match(
3981 (r'\s*'
3982 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
3983 r'\(.*\);$'),
3984 line)
3985 if match and linenum + 1 < clean_lines.NumLines():
3986 next_line = clean_lines.elided[linenum + 1]
erg@google.com8a95ecc2011-09-08 00:45:54 +00003987 # We allow some, but not all, declarations of variables to be present
3988 # in the statement that defines the class. The [\w\*,\s]* fragment of
3989 # the regular expression below allows users to declare instances of
3990 # the class or pointers to instances, but not less common types such
3991 # as function pointers or arrays. It's a tradeoff between allowing
3992 # reasonable code and avoiding trying to parse more C++ using regexps.
3993 if not Search(r'^\s*}[\w\*,\s]*;', next_line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003994 error(filename, linenum, 'readability/constructors', 3,
3995 match.group(1) + ' should be the last thing in the class')
3996
3997 # Check for use of unnamed namespaces in header files. Registration
3998 # macros are typically OK, so we allow use of "namespace {" on lines
3999 # that end with backslashes.
4000 if (file_extension == 'h'
4001 and Search(r'\bnamespace\s*{', line)
4002 and line[-1] != '\\'):
4003 error(filename, linenum, 'build/namespaces', 4,
4004 'Do not use unnamed namespaces in header files. See '
4005 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
4006 ' for more information.')
4007
erg@google.comc6671232013-10-25 21:44:03 +00004008def CheckForNonConstReference(filename, clean_lines, linenum,
4009 nesting_state, error):
4010 """Check for non-const references.
4011
4012 Separate from CheckLanguage since it scans backwards from current
4013 line, instead of scanning forward.
4014
4015 Args:
4016 filename: The name of the current file.
4017 clean_lines: A CleansedLines instance containing the file.
4018 linenum: The number of the line to check.
4019 nesting_state: A _NestingState instance which maintains information about
4020 the current stack of nested blocks being parsed.
4021 error: The function to call with any errors found.
4022 """
4023 # Do nothing if there is no '&' on current line.
4024 line = clean_lines.elided[linenum]
4025 if '&' not in line:
4026 return
4027
erg@google.com2aa59982013-10-28 19:09:25 +00004028 # Long type names may be broken across multiple lines, usually in one
4029 # of these forms:
4030 # LongType
4031 # ::LongTypeContinued &identifier
4032 # LongType::
4033 # LongTypeContinued &identifier
4034 # LongType<
4035 # ...>::LongTypeContinued &identifier
4036 #
4037 # If we detected a type split across two lines, join the previous
4038 # line to current line so that we can match const references
4039 # accordingly.
erg@google.comc6671232013-10-25 21:44:03 +00004040 #
4041 # Note that this only scans back one line, since scanning back
4042 # arbitrary number of lines would be expensive. If you have a type
4043 # that spans more than 2 lines, please use a typedef.
4044 if linenum > 1:
4045 previous = None
erg@google.com2aa59982013-10-28 19:09:25 +00004046 if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
erg@google.comc6671232013-10-25 21:44:03 +00004047 # previous_line\n + ::current_line
erg@google.com2aa59982013-10-28 19:09:25 +00004048 previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
erg@google.comc6671232013-10-25 21:44:03 +00004049 clean_lines.elided[linenum - 1])
erg@google.com2aa59982013-10-28 19:09:25 +00004050 elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
erg@google.comc6671232013-10-25 21:44:03 +00004051 # previous_line::\n + current_line
erg@google.com2aa59982013-10-28 19:09:25 +00004052 previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
erg@google.comc6671232013-10-25 21:44:03 +00004053 clean_lines.elided[linenum - 1])
4054 if previous:
4055 line = previous.group(1) + line.lstrip()
erg@google.com2aa59982013-10-28 19:09:25 +00004056 else:
4057 # Check for templated parameter that is split across multiple lines
4058 endpos = line.rfind('>')
4059 if endpos > -1:
4060 (_, startline, startpos) = ReverseCloseExpression(
4061 clean_lines, linenum, endpos)
4062 if startpos > -1 and startline < linenum:
4063 # Found the matching < on an earlier line, collect all
4064 # pieces up to current line.
4065 line = ''
4066 for i in xrange(startline, linenum + 1):
4067 line += clean_lines.elided[i].strip()
erg@google.comc6671232013-10-25 21:44:03 +00004068
4069 # Check for non-const references in function parameters. A single '&' may
4070 # found in the following places:
4071 # inside expression: binary & for bitwise AND
4072 # inside expression: unary & for taking the address of something
4073 # inside declarators: reference parameter
4074 # We will exclude the first two cases by checking that we are not inside a
4075 # function body, including one that was just introduced by a trailing '{'.
4076 # TODO(unknwon): Doesn't account for preprocessor directives.
4077 # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
4078 check_params = False
4079 if not nesting_state.stack:
4080 check_params = True # top level
4081 elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
4082 isinstance(nesting_state.stack[-1], _NamespaceInfo)):
4083 check_params = True # within class or namespace
4084 elif Match(r'.*{\s*$', line):
4085 if (len(nesting_state.stack) == 1 or
4086 isinstance(nesting_state.stack[-2], _ClassInfo) or
4087 isinstance(nesting_state.stack[-2], _NamespaceInfo)):
4088 check_params = True # just opened global/class/namespace block
4089 # We allow non-const references in a few standard places, like functions
4090 # called "swap()" or iostream operators like "<<" or ">>". Do not check
4091 # those function parameters.
4092 #
4093 # We also accept & in static_assert, which looks like a function but
4094 # it's actually a declaration expression.
4095 whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
4096 r'operator\s*[<>][<>]|'
4097 r'static_assert|COMPILE_ASSERT'
4098 r')\s*\(')
4099 if Search(whitelisted_functions, line):
4100 check_params = False
4101 elif not Search(r'\S+\([^)]*$', line):
4102 # Don't see a whitelisted function on this line. Actually we
4103 # didn't see any function name on this line, so this is likely a
4104 # multi-line parameter list. Try a bit harder to catch this case.
4105 for i in xrange(2):
4106 if (linenum > i and
4107 Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
4108 check_params = False
4109 break
4110
4111 if check_params:
4112 decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
4113 for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
4114 if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
4115 error(filename, linenum, 'runtime/references', 2,
4116 'Is this a non-const reference? '
erg@google.com2aa59982013-10-28 19:09:25 +00004117 'If so, make const or use a pointer: ' +
4118 ReplaceAll(' *<', '<', parameter))
erg@google.comc6671232013-10-25 21:44:03 +00004119
erg@google.com4e00b9a2009-01-12 23:05:11 +00004120
4121def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
4122 error):
4123 """Checks for a C-style cast by looking for the pattern.
4124
erg@google.com4e00b9a2009-01-12 23:05:11 +00004125 Args:
4126 filename: The name of the current file.
4127 linenum: The number of the line to check.
4128 line: The line of code to check.
4129 raw_line: The raw line of code to check, with comments.
4130 cast_type: The string for the C++ cast to recommend. This is either
erg@google.com8a95ecc2011-09-08 00:45:54 +00004131 reinterpret_cast, static_cast, or const_cast, depending.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004132 pattern: The regular expression used to find C-style casts.
4133 error: The function to call with any errors found.
erg@google.com8a95ecc2011-09-08 00:45:54 +00004134
4135 Returns:
4136 True if an error was emitted.
4137 False otherwise.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004138 """
4139 match = Search(pattern, line)
4140 if not match:
erg@google.com8a95ecc2011-09-08 00:45:54 +00004141 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +00004142
erg@google.comfd5da632013-10-25 17:39:45 +00004143 # Exclude lines with sizeof, since sizeof looks like a cast.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004144 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
4145 if sizeof_match:
erg@google.comfd5da632013-10-25 17:39:45 +00004146 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +00004147
erg@google.comd350fe52013-01-14 17:51:48 +00004148 # operator++(int) and operator--(int)
4149 if (line[0:match.start(1) - 1].endswith(' operator++') or
4150 line[0:match.start(1) - 1].endswith(' operator--')):
4151 return False
4152
erg@google.comc6671232013-10-25 21:44:03 +00004153 # A single unnamed argument for a function tends to look like old
4154 # style cast. If we see those, don't issue warnings for deprecated
4155 # casts, instead issue warnings for unnamed arguments where
4156 # appropriate.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004157 #
erg@google.comc6671232013-10-25 21:44:03 +00004158 # These are things that we want warnings for, since the style guide
4159 # explicitly require all parameters to be named:
4160 # Function(int);
4161 # Function(int) {
4162 # ConstMember(int) const;
4163 # ConstMember(int) const {
4164 # ExceptionMember(int) throw (...);
4165 # ExceptionMember(int) throw (...) {
4166 # PureVirtual(int) = 0;
4167 #
4168 # These are functions of some sort, where the compiler would be fine
4169 # if they had named parameters, but people often omit those
4170 # identifiers to reduce clutter:
4171 # (FunctionPointer)(int);
4172 # (FunctionPointer)(int) = value;
4173 # Function((function_pointer_arg)(int))
4174 # <TemplateArgument(int)>;
4175 # <(FunctionPointerTemplateArgument)(int)>;
4176 remainder = line[match.end(0):]
4177 if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder):
4178 # Looks like an unnamed parameter.
4179
4180 # Don't warn on any kind of template arguments.
4181 if Match(r'^\s*>', remainder):
4182 return False
4183
4184 # Don't warn on assignments to function pointers, but keep warnings for
4185 # unnamed parameters to pure virtual functions. Note that this pattern
4186 # will also pass on assignments of "0" to function pointers, but the
4187 # preferred values for those would be "nullptr" or "NULL".
4188 matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
4189 if matched_zero and matched_zero.group(1) != '0':
4190 return False
4191
4192 # Don't warn on function pointer declarations. For this we need
4193 # to check what came before the "(type)" string.
4194 if Match(r'.*\)\s*$', line[0:match.start(0)]):
4195 return False
4196
4197 # Don't warn if the parameter is named with block comments, e.g.:
4198 # Function(int /*unused_param*/);
4199 if '/*' in raw_line:
4200 return False
4201
4202 # Passed all filters, issue warning here.
4203 error(filename, linenum, 'readability/function', 3,
4204 'All parameters should be named in a function')
erg@google.com8a95ecc2011-09-08 00:45:54 +00004205 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +00004206
4207 # At this point, all that should be left is actual casts.
4208 error(filename, linenum, 'readability/casting', 4,
4209 'Using C-style cast. Use %s<%s>(...) instead' %
4210 (cast_type, match.group(1)))
4211
erg@google.com8a95ecc2011-09-08 00:45:54 +00004212 return True
4213
erg@google.com4e00b9a2009-01-12 23:05:11 +00004214
4215_HEADERS_CONTAINING_TEMPLATES = (
4216 ('<deque>', ('deque',)),
4217 ('<functional>', ('unary_function', 'binary_function',
4218 'plus', 'minus', 'multiplies', 'divides', 'modulus',
4219 'negate',
4220 'equal_to', 'not_equal_to', 'greater', 'less',
4221 'greater_equal', 'less_equal',
4222 'logical_and', 'logical_or', 'logical_not',
4223 'unary_negate', 'not1', 'binary_negate', 'not2',
4224 'bind1st', 'bind2nd',
4225 'pointer_to_unary_function',
4226 'pointer_to_binary_function',
4227 'ptr_fun',
4228 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
4229 'mem_fun_ref_t',
4230 'const_mem_fun_t', 'const_mem_fun1_t',
4231 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
4232 'mem_fun_ref',
4233 )),
4234 ('<limits>', ('numeric_limits',)),
4235 ('<list>', ('list',)),
4236 ('<map>', ('map', 'multimap',)),
4237 ('<memory>', ('allocator',)),
4238 ('<queue>', ('queue', 'priority_queue',)),
4239 ('<set>', ('set', 'multiset',)),
4240 ('<stack>', ('stack',)),
4241 ('<string>', ('char_traits', 'basic_string',)),
4242 ('<utility>', ('pair',)),
4243 ('<vector>', ('vector',)),
4244
4245 # gcc extensions.
4246 # Note: std::hash is their hash, ::hash is our hash
4247 ('<hash_map>', ('hash_map', 'hash_multimap',)),
4248 ('<hash_set>', ('hash_set', 'hash_multiset',)),
4249 ('<slist>', ('slist',)),
4250 )
4251
erg@google.com4e00b9a2009-01-12 23:05:11 +00004252_RE_PATTERN_STRING = re.compile(r'\bstring\b')
4253
4254_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00004255for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
4256 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004257 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
4258 # type::max().
4259 _re_pattern_algorithm_header.append(
4260 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
4261 _template,
4262 '<algorithm>'))
4263
4264_re_pattern_templates = []
4265for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
4266 for _template in _templates:
4267 _re_pattern_templates.append(
4268 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
4269 _template + '<>',
4270 _header))
4271
4272
erg@google.come35f7652009-06-19 20:52:09 +00004273def FilesBelongToSameModule(filename_cc, filename_h):
4274 """Check if these two filenames belong to the same module.
4275
4276 The concept of a 'module' here is a as follows:
4277 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
4278 same 'module' if they are in the same directory.
4279 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
4280 to belong to the same module here.
4281
4282 If the filename_cc contains a longer path than the filename_h, for example,
4283 '/absolute/path/to/base/sysinfo.cc', and this file would include
4284 'base/sysinfo.h', this function also produces the prefix needed to open the
4285 header. This is used by the caller of this function to more robustly open the
4286 header file. We don't have access to the real include paths in this context,
4287 so we need this guesswork here.
4288
4289 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
4290 according to this implementation. Because of this, this function gives
4291 some false positives. This should be sufficiently rare in practice.
4292
4293 Args:
4294 filename_cc: is the path for the .cc file
4295 filename_h: is the path for the header path
4296
4297 Returns:
4298 Tuple with a bool and a string:
4299 bool: True if filename_cc and filename_h belong to the same module.
4300 string: the additional prefix needed to open the header file.
4301 """
4302
4303 if not filename_cc.endswith('.cc'):
4304 return (False, '')
4305 filename_cc = filename_cc[:-len('.cc')]
4306 if filename_cc.endswith('_unittest'):
4307 filename_cc = filename_cc[:-len('_unittest')]
4308 elif filename_cc.endswith('_test'):
4309 filename_cc = filename_cc[:-len('_test')]
4310 filename_cc = filename_cc.replace('/public/', '/')
4311 filename_cc = filename_cc.replace('/internal/', '/')
4312
4313 if not filename_h.endswith('.h'):
4314 return (False, '')
4315 filename_h = filename_h[:-len('.h')]
4316 if filename_h.endswith('-inl'):
4317 filename_h = filename_h[:-len('-inl')]
4318 filename_h = filename_h.replace('/public/', '/')
4319 filename_h = filename_h.replace('/internal/', '/')
4320
4321 files_belong_to_same_module = filename_cc.endswith(filename_h)
4322 common_path = ''
4323 if files_belong_to_same_module:
4324 common_path = filename_cc[:-len(filename_h)]
4325 return files_belong_to_same_module, common_path
4326
4327
4328def UpdateIncludeState(filename, include_state, io=codecs):
4329 """Fill up the include_state with new includes found from the file.
4330
4331 Args:
4332 filename: the name of the header to read.
4333 include_state: an _IncludeState instance in which the headers are inserted.
4334 io: The io factory to use to read the file. Provided for testability.
4335
4336 Returns:
4337 True if a header was succesfully added. False otherwise.
4338 """
4339 headerfile = None
4340 try:
4341 headerfile = io.open(filename, 'r', 'utf8', 'replace')
4342 except IOError:
4343 return False
4344 linenum = 0
4345 for line in headerfile:
4346 linenum += 1
4347 clean_line = CleanseComments(line)
4348 match = _RE_PATTERN_INCLUDE.search(clean_line)
4349 if match:
4350 include = match.group(2)
4351 # The value formatting is cute, but not really used right now.
4352 # What matters here is that the key is in include_state.
4353 include_state.setdefault(include, '%s:%d' % (filename, linenum))
4354 return True
4355
4356
4357def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
4358 io=codecs):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004359 """Reports for missing stl includes.
4360
4361 This function will output warnings to make sure you are including the headers
4362 necessary for the stl containers and functions that you use. We only give one
4363 reason to include a header. For example, if you use both equal_to<> and
4364 less<> in a .h file, only one (the latter in the file) of these will be
4365 reported as a reason to include the <functional>.
4366
erg@google.com4e00b9a2009-01-12 23:05:11 +00004367 Args:
4368 filename: The name of the current file.
4369 clean_lines: A CleansedLines instance containing the file.
4370 include_state: An _IncludeState instance.
4371 error: The function to call with any errors found.
erg@google.come35f7652009-06-19 20:52:09 +00004372 io: The IO factory to use to read the header file. Provided for unittest
4373 injection.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004374 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00004375 required = {} # A map of header name to linenumber and the template entity.
4376 # Example of required: { '<functional>': (1219, 'less<>') }
4377
4378 for linenum in xrange(clean_lines.NumLines()):
4379 line = clean_lines.elided[linenum]
4380 if not line or line[0] == '#':
4381 continue
4382
4383 # String is special -- it is a non-templatized type in STL.
erg@google.com8a95ecc2011-09-08 00:45:54 +00004384 matched = _RE_PATTERN_STRING.search(line)
4385 if matched:
erg+personal@google.com05189642010-04-30 20:43:03 +00004386 # Don't warn about strings in non-STL namespaces:
4387 # (We check only the first match per line; good enough.)
erg@google.com8a95ecc2011-09-08 00:45:54 +00004388 prefix = line[:matched.start()]
erg+personal@google.com05189642010-04-30 20:43:03 +00004389 if prefix.endswith('std::') or not prefix.endswith('::'):
4390 required['<string>'] = (linenum, 'string')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004391
4392 for pattern, template, header in _re_pattern_algorithm_header:
4393 if pattern.search(line):
4394 required[header] = (linenum, template)
4395
4396 # The following function is just a speed up, no semantics are changed.
4397 if not '<' in line: # Reduces the cpu time usage by skipping lines.
4398 continue
4399
4400 for pattern, template, header in _re_pattern_templates:
4401 if pattern.search(line):
4402 required[header] = (linenum, template)
4403
erg@google.come35f7652009-06-19 20:52:09 +00004404 # The policy is that if you #include something in foo.h you don't need to
4405 # include it again in foo.cc. Here, we will look at possible includes.
4406 # Let's copy the include_state so it is only messed up within this function.
4407 include_state = include_state.copy()
4408
4409 # Did we find the header for this file (if any) and succesfully load it?
4410 header_found = False
4411
4412 # Use the absolute path so that matching works properly.
erg@google.com90ecb622012-01-30 19:34:23 +00004413 abs_filename = FileInfo(filename).FullName()
erg@google.come35f7652009-06-19 20:52:09 +00004414
4415 # For Emacs's flymake.
4416 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
4417 # by flymake and that file name might end with '_flymake.cc'. In that case,
4418 # restore original file name here so that the corresponding header file can be
4419 # found.
4420 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
4421 # instead of 'foo_flymake.h'
erg+personal@google.com05189642010-04-30 20:43:03 +00004422 abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.come35f7652009-06-19 20:52:09 +00004423
4424 # include_state is modified during iteration, so we iterate over a copy of
4425 # the keys.
erg@google.com8a95ecc2011-09-08 00:45:54 +00004426 header_keys = include_state.keys()
4427 for header in header_keys:
erg@google.come35f7652009-06-19 20:52:09 +00004428 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
4429 fullpath = common_path + header
4430 if same_module and UpdateIncludeState(fullpath, include_state, io):
4431 header_found = True
4432
4433 # If we can't find the header file for a .cc, assume it's because we don't
4434 # know where to look. In that case we'll give up as we're not sure they
4435 # didn't include it in the .h file.
4436 # TODO(unknown): Do a better job of finding .h files so we are confident that
4437 # not having the .h file means there isn't one.
4438 if filename.endswith('.cc') and not header_found:
4439 return
4440
erg@google.com4e00b9a2009-01-12 23:05:11 +00004441 # All the lines have been processed, report the errors found.
4442 for required_header_unstripped in required:
4443 template = required[required_header_unstripped][1]
erg@google.com4e00b9a2009-01-12 23:05:11 +00004444 if required_header_unstripped.strip('<>"') not in include_state:
4445 error(filename, required[required_header_unstripped][0],
4446 'build/include_what_you_use', 4,
4447 'Add #include ' + required_header_unstripped + ' for ' + template)
4448
4449
erg@google.com8a95ecc2011-09-08 00:45:54 +00004450_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
4451
4452
4453def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
4454 """Check that make_pair's template arguments are deduced.
4455
4456 G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
4457 specified explicitly, and such use isn't intended in any case.
4458
4459 Args:
4460 filename: The name of the current file.
4461 clean_lines: A CleansedLines instance containing the file.
4462 linenum: The number of the line to check.
4463 error: The function to call with any errors found.
4464 """
erg@google.com2aa59982013-10-28 19:09:25 +00004465 line = clean_lines.elided[linenum]
erg@google.com8a95ecc2011-09-08 00:45:54 +00004466 match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
4467 if match:
4468 error(filename, linenum, 'build/explicit_make_pair',
4469 4, # 4 = high confidence
erg@google.comd350fe52013-01-14 17:51:48 +00004470 'For C++11-compatibility, omit template arguments from make_pair'
4471 ' OR use pair directly OR if appropriate, construct a pair directly')
erg@google.com8a95ecc2011-09-08 00:45:54 +00004472
4473
erg@google.comd350fe52013-01-14 17:51:48 +00004474def ProcessLine(filename, file_extension, clean_lines, line,
4475 include_state, function_state, nesting_state, error,
4476 extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004477 """Processes a single line in the file.
4478
4479 Args:
4480 filename: Filename of the file that is being processed.
4481 file_extension: The extension (dot not included) of the file.
4482 clean_lines: An array of strings, each representing a line of the file,
4483 with comments stripped.
4484 line: Number of line being processed.
4485 include_state: An _IncludeState instance in which the headers are inserted.
4486 function_state: A _FunctionState instance which counts function lines, etc.
erg@google.comd350fe52013-01-14 17:51:48 +00004487 nesting_state: A _NestingState instance which maintains information about
4488 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004489 error: A callable to which errors are reported, which takes 4 arguments:
4490 filename, line number, error level, and message
erg@google.comefeacdf2011-09-07 21:12:16 +00004491 extra_check_functions: An array of additional check functions that will be
4492 run on each source line. Each function takes 4
4493 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004494 """
4495 raw_lines = clean_lines.raw_lines
erg+personal@google.com05189642010-04-30 20:43:03 +00004496 ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.comd350fe52013-01-14 17:51:48 +00004497 nesting_state.Update(filename, clean_lines, line, error)
4498 if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
4499 return
erg@google.com4e00b9a2009-01-12 23:05:11 +00004500 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004501 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
erg@google.comd350fe52013-01-14 17:51:48 +00004502 CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004503 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
erg@google.comfd5da632013-10-25 17:39:45 +00004504 nesting_state, error)
erg@google.comc6671232013-10-25 21:44:03 +00004505 CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004506 CheckForNonStandardConstructs(filename, clean_lines, line,
erg@google.comd350fe52013-01-14 17:51:48 +00004507 nesting_state, error)
erg@google.com2aa59982013-10-28 19:09:25 +00004508 CheckVlogArguments(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004509 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com36649102009-03-25 21:18:36 +00004510 CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com8a95ecc2011-09-08 00:45:54 +00004511 CheckMakePairUsesDeduction(filename, clean_lines, line, error)
erg@google.comefeacdf2011-09-07 21:12:16 +00004512 for check_fn in extra_check_functions:
4513 check_fn(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004514
erg@google.comefeacdf2011-09-07 21:12:16 +00004515def ProcessFileData(filename, file_extension, lines, error,
4516 extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004517 """Performs lint checks and reports any errors to the given error function.
4518
4519 Args:
4520 filename: Filename of the file that is being processed.
4521 file_extension: The extension (dot not included) of the file.
4522 lines: An array of strings, each representing a line of the file, with the
erg@google.com8a95ecc2011-09-08 00:45:54 +00004523 last element being empty if the file is terminated with a newline.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004524 error: A callable to which errors are reported, which takes 4 arguments:
erg@google.comefeacdf2011-09-07 21:12:16 +00004525 filename, line number, error level, and message
4526 extra_check_functions: An array of additional check functions that will be
4527 run on each source line. Each function takes 4
4528 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004529 """
4530 lines = (['// marker so line numbers and indices both start at 1'] + lines +
4531 ['// marker so line numbers end in a known way'])
4532
4533 include_state = _IncludeState()
4534 function_state = _FunctionState()
erg@google.comd350fe52013-01-14 17:51:48 +00004535 nesting_state = _NestingState()
erg@google.com4e00b9a2009-01-12 23:05:11 +00004536
erg+personal@google.com05189642010-04-30 20:43:03 +00004537 ResetNolintSuppressions()
4538
erg@google.com4e00b9a2009-01-12 23:05:11 +00004539 CheckForCopyright(filename, lines, error)
4540
4541 if file_extension == 'h':
4542 CheckForHeaderGuard(filename, lines, error)
4543
4544 RemoveMultiLineComments(filename, lines, error)
4545 clean_lines = CleansedLines(lines)
4546 for line in xrange(clean_lines.NumLines()):
4547 ProcessLine(filename, file_extension, clean_lines, line,
erg@google.comd350fe52013-01-14 17:51:48 +00004548 include_state, function_state, nesting_state, error,
erg@google.comefeacdf2011-09-07 21:12:16 +00004549 extra_check_functions)
erg@google.com2aa59982013-10-28 19:09:25 +00004550 nesting_state.CheckCompletedBlocks(filename, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004551
4552 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
4553
4554 # We check here rather than inside ProcessLine so that we see raw
4555 # lines rather than "cleaned" lines.
erg@google.com2aa59982013-10-28 19:09:25 +00004556 CheckForBadCharacters(filename, lines, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004557
4558 CheckForNewlineAtEOF(filename, lines, error)
4559
erg@google.comefeacdf2011-09-07 21:12:16 +00004560def ProcessFile(filename, vlevel, extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004561 """Does google-lint on a single file.
4562
4563 Args:
4564 filename: The name of the file to parse.
4565
4566 vlevel: The level of errors to report. Every error of confidence
4567 >= verbose_level will be reported. 0 is a good default.
erg@google.comefeacdf2011-09-07 21:12:16 +00004568
4569 extra_check_functions: An array of additional check functions that will be
4570 run on each source line. Each function takes 4
4571 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004572 """
4573
4574 _SetVerboseLevel(vlevel)
4575
4576 try:
4577 # Support the UNIX convention of using "-" for stdin. Note that
4578 # we are not opening the file with universal newline support
4579 # (which codecs doesn't support anyway), so the resulting lines do
4580 # contain trailing '\r' characters if we are reading a file that
4581 # has CRLF endings.
4582 # If after the split a trailing '\r' is present, it is removed
4583 # below. If it is not expected to be present (i.e. os.linesep !=
4584 # '\r\n' as in Windows), a warning is issued below if this file
4585 # is processed.
4586
4587 if filename == '-':
4588 lines = codecs.StreamReaderWriter(sys.stdin,
4589 codecs.getreader('utf8'),
4590 codecs.getwriter('utf8'),
4591 'replace').read().split('\n')
4592 else:
4593 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
4594
4595 carriage_return_found = False
4596 # Remove trailing '\r'.
4597 for linenum in range(len(lines)):
4598 if lines[linenum].endswith('\r'):
4599 lines[linenum] = lines[linenum].rstrip('\r')
4600 carriage_return_found = True
4601
4602 except IOError:
4603 sys.stderr.write(
4604 "Skipping input '%s': Can't open for reading\n" % filename)
4605 return
4606
4607 # Note, if no dot is found, this will give the entire filename as the ext.
4608 file_extension = filename[filename.rfind('.') + 1:]
4609
4610 # When reading from stdin, the extension is unknown, so no cpplint tests
4611 # should rely on the extension.
erg@google.com2aa59982013-10-28 19:09:25 +00004612 valid_extensions = ['cc', 'h', 'cpp', 'cu', 'cuh']
4613 if filename != '-' and file_extension not in valid_extensions:
4614 sys.stderr.write('Ignoring %s; not a valid file name '
4615 '(.cc, .h, .cpp, .cu, .cuh)\n' % filename)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004616 else:
erg@google.comefeacdf2011-09-07 21:12:16 +00004617 ProcessFileData(filename, file_extension, lines, Error,
4618 extra_check_functions)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004619 if carriage_return_found and os.linesep != '\r\n':
erg@google.com8a95ecc2011-09-08 00:45:54 +00004620 # Use 0 for linenum since outputting only one error for potentially
erg@google.com4e00b9a2009-01-12 23:05:11 +00004621 # several lines.
4622 Error(filename, 0, 'whitespace/newline', 1,
4623 'One or more unexpected \\r (^M) found;'
4624 'better to use only a \\n')
4625
4626 sys.stderr.write('Done processing %s\n' % filename)
4627
4628
4629def PrintUsage(message):
4630 """Prints a brief usage string and exits, optionally with an error message.
4631
4632 Args:
4633 message: The optional error message.
4634 """
4635 sys.stderr.write(_USAGE)
4636 if message:
4637 sys.exit('\nFATAL ERROR: ' + message)
4638 else:
4639 sys.exit(1)
4640
4641
4642def PrintCategories():
4643 """Prints a list of all the error-categories used by error messages.
4644
4645 These are the categories used to filter messages via --filter.
4646 """
erg+personal@google.com05189642010-04-30 20:43:03 +00004647 sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com4e00b9a2009-01-12 23:05:11 +00004648 sys.exit(0)
4649
4650
4651def ParseArguments(args):
4652 """Parses the command line arguments.
4653
4654 This may set the output format and verbosity level as side-effects.
4655
4656 Args:
4657 args: The command line arguments:
4658
4659 Returns:
4660 The list of filenames to lint.
4661 """
4662 try:
4663 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.coma868d2d2009-10-09 21:18:45 +00004664 'counting=',
erg@google.com4d70a882013-04-16 21:06:32 +00004665 'filter=',
erg@google.comab53edf2013-11-05 22:23:37 +00004666 'root=',
4667 'linelength='])
erg@google.com4e00b9a2009-01-12 23:05:11 +00004668 except getopt.GetoptError:
4669 PrintUsage('Invalid arguments.')
4670
4671 verbosity = _VerboseLevel()
4672 output_format = _OutputFormat()
4673 filters = ''
erg@google.coma868d2d2009-10-09 21:18:45 +00004674 counting_style = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +00004675
4676 for (opt, val) in opts:
4677 if opt == '--help':
4678 PrintUsage(None)
4679 elif opt == '--output':
erg@google.comc6671232013-10-25 21:44:03 +00004680 if val not in ('emacs', 'vs7', 'eclipse'):
erg@google.com02c27fd2013-05-28 21:34:34 +00004681 PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004682 output_format = val
4683 elif opt == '--verbose':
4684 verbosity = int(val)
4685 elif opt == '--filter':
4686 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00004687 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00004688 PrintCategories()
erg@google.coma868d2d2009-10-09 21:18:45 +00004689 elif opt == '--counting':
4690 if val not in ('total', 'toplevel', 'detailed'):
4691 PrintUsage('Valid counting options are total, toplevel, and detailed')
4692 counting_style = val
erg@google.com4d70a882013-04-16 21:06:32 +00004693 elif opt == '--root':
4694 global _root
4695 _root = val
erg@google.comab53edf2013-11-05 22:23:37 +00004696 elif opt == '--linelength':
4697 global _line_length
4698 try:
4699 _line_length = int(val)
4700 except ValueError:
4701 PrintUsage('Line length must be digits.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004702
4703 if not filenames:
4704 PrintUsage('No files were specified.')
4705
4706 _SetOutputFormat(output_format)
4707 _SetVerboseLevel(verbosity)
4708 _SetFilters(filters)
erg@google.coma868d2d2009-10-09 21:18:45 +00004709 _SetCountingStyle(counting_style)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004710
4711 return filenames
4712
4713
4714def main():
4715 filenames = ParseArguments(sys.argv[1:])
4716
4717 # Change stderr to write with replacement characters so we don't die
4718 # if we try to print something containing non-ASCII characters.
4719 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
4720 codecs.getreader('utf8'),
4721 codecs.getwriter('utf8'),
4722 'replace')
4723
erg@google.coma868d2d2009-10-09 21:18:45 +00004724 _cpplint_state.ResetErrorCounts()
erg@google.com4e00b9a2009-01-12 23:05:11 +00004725 for filename in filenames:
4726 ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.coma868d2d2009-10-09 21:18:45 +00004727 _cpplint_state.PrintErrorCounts()
4728
erg@google.com4e00b9a2009-01-12 23:05:11 +00004729 sys.exit(_cpplint_state.error_count > 0)
4730
4731
4732if __name__ == '__main__':
4733 main()