blob: d9b3e6b54637de889b19525a846fcd6c5669202d [file] [log] [blame]
erg@google.com4e00b9a2009-01-12 23:05:11 +00001#!/usr/bin/python2.4
2#
erg@google.com5210aec2011-09-06 20:19:05 +00003# Copyright (c) 2011 Google Inc. All rights reserved.
4# Copyright (c) 2009 Torch Mobile Inc.
erg@google.com4e00b9a2009-01-12 23:05:11 +00005#
erg@google.com969161c2009-06-26 22:06:46 +00006# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met:
erg@google.com4e00b9a2009-01-12 23:05:11 +00009#
erg@google.com969161c2009-06-26 22:06:46 +000010# * Redistributions of source code must retain the above copyright
11# notice, this list of conditions and the following disclaimer.
12# * Redistributions in binary form must reproduce the above
13# copyright notice, this list of conditions and the following disclaimer
14# in the documentation and/or other materials provided with the
15# distribution.
16# * Neither the name of Google Inc. nor the names of its
17# contributors may be used to endorse or promote products derived from
18# this software without specific prior written permission.
erg@google.com4e00b9a2009-01-12 23:05:11 +000019#
erg@google.com969161c2009-06-26 22:06:46 +000020# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com4e00b9a2009-01-12 23:05:11 +000031
32# Here are some issues that I've had people identify in my code during reviews,
33# that I think are possible to flag automatically in a lint tool. If these were
34# caught by lint, it would save time both for myself and that of my reviewers.
35# Most likely, some of these are beyond the scope of the current lint framework,
36# but I think it is valuable to retain these wish-list items even if they cannot
37# be immediately implemented.
38#
39# Suggestions
40# -----------
41# - Check for no 'explicit' for multi-arg ctor
42# - Check for boolean assign RHS in parens
43# - Check for ctor initializer-list colon position and spacing
44# - Check that if there's a ctor, there should be a dtor
45# - Check accessors that return non-pointer member variables are
46# declared const
47# - Check accessors that return non-const pointer member vars are
48# *not* declared const
49# - Check for using public includes for testing
50# - Check for spaces between brackets in one-line inline method
51# - Check for no assert()
52# - Check for spaces surrounding operators
53# - Check for 0 in pointer context (should be NULL)
54# - Check for 0 in char context (should be '\0')
55# - Check for camel-case method name conventions for methods
56# that are not simple inline getters and setters
57# - Check that base classes have virtual destructors
58# put " // namespace" after } that closes a namespace, with
59# namespace's name after 'namespace' if it is named.
60# - Do not indent namespace contents
61# - Avoid inlining non-trivial constructors in header files
62# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
63# - Check for old-school (void) cast for call-sites of functions
64# ignored return value
65# - Check gUnit usage of anonymous namespace
66# - Check for class declaration order (typedefs, consts, enums,
67# ctor(s?), dtor, friend declarations, methods, member vars)
68#
69
70"""Does google-lint on c++ files.
71
72The goal of this script is to identify places in the code that *may*
73be in non-compliance with google style. It does not attempt to fix
74up these problems -- the point is to educate. It does also not
75attempt to find all problems, or to ensure that everything it does
76find is legitimately a problem.
77
78In particular, we can get very confused by /* and // inside strings!
79We do a small hack, which is to ignore //'s with "'s after them on the
80same line, but it is far from perfect (in either direction).
81"""
82
83import codecs
84import getopt
85import math # for log
86import os
87import re
88import sre_compile
89import string
90import sys
91import unicodedata
92
93
94_USAGE = """
95Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.coma868d2d2009-10-09 21:18:45 +000096 [--counting=total|toplevel|detailed]
erg@google.com4e00b9a2009-01-12 23:05:11 +000097 <file> [file] ...
98
99 The style guidelines this tries to follow are those in
100 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
101
102 Every problem is given a confidence score from 1-5, with 5 meaning we are
103 certain of the problem, and 1 meaning it could be a legitimate construct.
104 This will miss some errors, and is not a substitute for a code review.
105
erg+personal@google.com05189642010-04-30 20:43:03 +0000106 To suppress false-positive errors of a certain category, add a
107 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
108 suppresses errors of all categories on that line.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000109
110 The files passed in will be linted; at least one file must be provided.
111 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
112
113 Flags:
114
115 output=vs7
116 By default, the output is formatted to ease emacs parsing. Visual Studio
117 compatible output (vs7) may also be used. Other formats are unsupported.
118
119 verbose=#
120 Specify a number 0-5 to restrict errors to certain verbosity levels.
121
122 filter=-x,+y,...
123 Specify a comma-separated list of category-filters to apply: only
124 error messages whose category names pass the filters will be printed.
125 (Category names are printed with the message and look like
126 "[whitespace/indent]".) Filters are evaluated left to right.
127 "-FOO" and "FOO" means "do not print categories that start with FOO".
128 "+FOO" means "do print categories that start with FOO".
129
130 Examples: --filter=-whitespace,+whitespace/braces
131 --filter=whitespace,runtime/printf,+runtime/printf_format
132 --filter=-,+build/include_what_you_use
133
134 To see a list of all the categories used in cpplint, pass no arg:
135 --filter=
erg@google.coma868d2d2009-10-09 21:18:45 +0000136
137 counting=total|toplevel|detailed
138 The total number of errors found is always printed. If
139 'toplevel' is provided, then the count of errors in each of
140 the top-level categories like 'build' and 'whitespace' will
141 also be printed. If 'detailed' is provided, then a count
142 is provided for each category like 'build/class'.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000143"""
144
145# We categorize each error message we print. Here are the categories.
146# We want an explicit list so we can list them all in cpplint --filter=.
147# If you add a new error message with a new category, add it to the list
148# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.coma87abb82009-02-24 01:41:01 +0000149# \ used for clearer layout -- pylint: disable-msg=C6013
erg+personal@google.com05189642010-04-30 20:43:03 +0000150_ERROR_CATEGORIES = [
151 'build/class',
152 'build/deprecated',
153 'build/endif_comment',
154 'build/forward_decl',
155 'build/header_guard',
156 'build/include',
157 'build/include_alpha',
158 'build/include_order',
159 'build/include_what_you_use',
160 'build/namespaces',
161 'build/printf_format',
162 'build/storage_class',
163 'legal/copyright',
164 'readability/braces',
165 'readability/casting',
166 'readability/check',
167 'readability/constructors',
168 'readability/fn_size',
169 'readability/function',
170 'readability/multiline_comment',
171 'readability/multiline_string',
172 'readability/nolint',
173 'readability/streams',
174 'readability/todo',
175 'readability/utf8',
176 'runtime/arrays',
177 'runtime/casting',
178 'runtime/explicit',
179 'runtime/int',
180 'runtime/init',
181 'runtime/invalid_increment',
182 'runtime/member_string_references',
183 'runtime/memset',
184 'runtime/operator',
185 'runtime/printf',
186 'runtime/printf_format',
187 'runtime/references',
188 'runtime/rtti',
189 'runtime/sizeof',
190 'runtime/string',
191 'runtime/threadsafe_fn',
192 'runtime/virtual',
193 'whitespace/blank_line',
194 'whitespace/braces',
195 'whitespace/comma',
196 'whitespace/comments',
erg@google.com5210aec2011-09-06 20:19:05 +0000197 'whitespace/declaration',
erg+personal@google.com05189642010-04-30 20:43:03 +0000198 'whitespace/end_of_line',
199 'whitespace/ending_newline',
200 'whitespace/indent',
201 'whitespace/labels',
202 'whitespace/line_length',
203 'whitespace/newline',
204 'whitespace/operators',
205 'whitespace/parens',
206 'whitespace/semicolon',
207 'whitespace/tab',
208 'whitespace/todo'
209 ]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000210
erg@google.come35f7652009-06-19 20:52:09 +0000211# The default state of the category filter. This is overrided by the --filter=
212# flag. By default all errors are on, so only add here categories that should be
213# off by default (i.e., categories that must be enabled by the --filter= flags).
214# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.coma868d2d2009-10-09 21:18:45 +0000215_DEFAULT_FILTERS = [ '-build/include_alpha' ]
erg@google.come35f7652009-06-19 20:52:09 +0000216
erg@google.com4e00b9a2009-01-12 23:05:11 +0000217# We used to check for high-bit characters, but after much discussion we
218# decided those were OK, as long as they were in UTF-8 and didn't represent
219# hard-coded international strings, which belong in a seperate i18n file.
220
221# Headers that we consider STL headers.
222_STL_HEADERS = frozenset([
223 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
224 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
erg+personal@google.com05189642010-04-30 20:43:03 +0000225 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
226 'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000227 'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
228 'utility', 'vector', 'vector.h',
229 ])
230
231
232# Non-STL C++ system headers.
233_CPP_HEADERS = frozenset([
234 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
235 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
236 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
237 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
238 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
239 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
240 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
241 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
242 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
243 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
244 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
245 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
246 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
247 ])
248
249
250# Assertion macros. These are defined in base/logging.h and
251# testing/base/gunit.h. Note that the _M versions need to come first
252# for substring matching to work.
253_CHECK_MACROS = [
erg@google.come35f7652009-06-19 20:52:09 +0000254 'DCHECK', 'CHECK',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000255 'EXPECT_TRUE_M', 'EXPECT_TRUE',
256 'ASSERT_TRUE_M', 'ASSERT_TRUE',
257 'EXPECT_FALSE_M', 'EXPECT_FALSE',
258 'ASSERT_FALSE_M', 'ASSERT_FALSE',
259 ]
260
erg@google.come35f7652009-06-19 20:52:09 +0000261# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com4e00b9a2009-01-12 23:05:11 +0000262_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
263
264for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
265 ('>=', 'GE'), ('>', 'GT'),
266 ('<=', 'LE'), ('<', 'LT')]:
erg@google.come35f7652009-06-19 20:52:09 +0000267 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com4e00b9a2009-01-12 23:05:11 +0000268 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
269 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
270 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
271 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
272 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
273
274for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
275 ('>=', 'LT'), ('>', 'LE'),
276 ('<=', 'GT'), ('<', 'GE')]:
277 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
278 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
279 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
280 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
281
282
283# These constants define types of headers for use with
284# _IncludeState.CheckNextIncludeOrder().
285_C_SYS_HEADER = 1
286_CPP_SYS_HEADER = 2
287_LIKELY_MY_HEADER = 3
288_POSSIBLE_MY_HEADER = 4
289_OTHER_HEADER = 5
290
291
292_regexp_compile_cache = {}
293
erg+personal@google.com05189642010-04-30 20:43:03 +0000294# Finds occurrences of NOLINT or NOLINT(...).
295_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
296
297# {str, set(int)}: a map from error categories to sets of linenumbers
298# on which those errors are expected and should be suppressed.
299_error_suppressions = {}
300
301def ParseNolintSuppressions(filename, raw_line, linenum, error):
302 """Updates the global list of error-suppressions.
303
304 Parses any NOLINT comments on the current line, updating the global
305 error_suppressions store. Reports an error if the NOLINT comment
306 was malformed.
307
308 Args:
309 filename: str, the name of the input file.
310 raw_line: str, the line of input text, with comments.
311 linenum: int, the number of the current line.
312 error: function, an error handler.
313 """
314 # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
315 m = _RE_SUPPRESSION.search(raw_line)
316 if m:
317 category = m.group(1)
318 if category in (None, '(*)'): # => "suppress all"
319 _error_suppressions.setdefault(None, set()).add(linenum)
320 else:
321 if category.startswith('(') and category.endswith(')'):
322 category = category[1:-1]
323 if category in _ERROR_CATEGORIES:
324 _error_suppressions.setdefault(category, set()).add(linenum)
325 else:
326 error(filename, linenum, 'readability/nolint', 5,
327 'Unknown NOLINT error category: %s' % category)
328
329
330def ResetNolintSuppressions():
331 "Resets the set of NOLINT suppressions to empty."
332 _error_suppressions.clear()
333
334
335def IsErrorSuppressedByNolint(category, linenum):
336 """Returns true if the specified error category is suppressed on this line.
337
338 Consults the global error_suppressions map populated by
339 ParseNolintSuppressions/ResetNolintSuppressions.
340
341 Args:
342 category: str, the category of the error.
343 linenum: int, the current line number.
344 Returns:
345 bool, True iff the error should be suppressed due to a NOLINT comment.
346 """
347 return (linenum in _error_suppressions.get(category, set()) or
348 linenum in _error_suppressions.get(None, set()))
erg@google.com4e00b9a2009-01-12 23:05:11 +0000349
350def Match(pattern, s):
351 """Matches the string with the pattern, caching the compiled regexp."""
352 # The regexp compilation caching is inlined in both Match and Search for
353 # performance reasons; factoring it out into a separate function turns out
354 # to be noticeably expensive.
355 if not pattern in _regexp_compile_cache:
356 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
357 return _regexp_compile_cache[pattern].match(s)
358
359
360def Search(pattern, s):
361 """Searches the string for the pattern, caching the compiled regexp."""
362 if not pattern in _regexp_compile_cache:
363 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
364 return _regexp_compile_cache[pattern].search(s)
365
366
367class _IncludeState(dict):
368 """Tracks line numbers for includes, and the order in which includes appear.
369
370 As a dict, an _IncludeState object serves as a mapping between include
371 filename and line number on which that file was included.
372
373 Call CheckNextIncludeOrder() once for each header in the file, passing
374 in the type constants defined above. Calls in an illegal order will
375 raise an _IncludeError with an appropriate error message.
376
377 """
378 # self._section will move monotonically through this set. If it ever
379 # needs to move backwards, CheckNextIncludeOrder will raise an error.
380 _INITIAL_SECTION = 0
381 _MY_H_SECTION = 1
382 _C_SECTION = 2
383 _CPP_SECTION = 3
384 _OTHER_H_SECTION = 4
385
386 _TYPE_NAMES = {
387 _C_SYS_HEADER: 'C system header',
388 _CPP_SYS_HEADER: 'C++ system header',
389 _LIKELY_MY_HEADER: 'header this file implements',
390 _POSSIBLE_MY_HEADER: 'header this file may implement',
391 _OTHER_HEADER: 'other header',
392 }
393 _SECTION_NAMES = {
394 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
395 _MY_H_SECTION: 'a header this file implements',
396 _C_SECTION: 'C system header',
397 _CPP_SECTION: 'C++ system header',
398 _OTHER_H_SECTION: 'other header',
399 }
400
401 def __init__(self):
402 dict.__init__(self)
erg@google.coma868d2d2009-10-09 21:18:45 +0000403 # The name of the current section.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000404 self._section = self._INITIAL_SECTION
erg@google.coma868d2d2009-10-09 21:18:45 +0000405 # The path of last found header.
406 self._last_header = ''
407
408 def CanonicalizeAlphabeticalOrder(self, header_path):
409 """Returns a path canonicalized for alphabetical comparisson.
410
411 - replaces "-" with "_" so they both cmp the same.
412 - removes '-inl' since we don't require them to be after the main header.
413 - lowercase everything, just in case.
414
415 Args:
416 header_path: Path to be canonicalized.
417
418 Returns:
419 Canonicalized path.
420 """
421 return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
422
423 def IsInAlphabeticalOrder(self, header_path):
424 """Check if a header is in alphabetical order with the previous header.
425
426 Args:
427 header_path: Header to be checked.
428
429 Returns:
430 Returns true if the header is in alphabetical order.
431 """
432 canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
433 if self._last_header > canonical_header:
434 return False
435 self._last_header = canonical_header
436 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +0000437
438 def CheckNextIncludeOrder(self, header_type):
439 """Returns a non-empty error message if the next header is out of order.
440
441 This function also updates the internal state to be ready to check
442 the next include.
443
444 Args:
445 header_type: One of the _XXX_HEADER constants defined above.
446
447 Returns:
448 The empty string if the header is in the right order, or an
449 error message describing what's wrong.
450
451 """
452 error_message = ('Found %s after %s' %
453 (self._TYPE_NAMES[header_type],
454 self._SECTION_NAMES[self._section]))
455
erg@google.coma868d2d2009-10-09 21:18:45 +0000456 last_section = self._section
457
erg@google.com4e00b9a2009-01-12 23:05:11 +0000458 if header_type == _C_SYS_HEADER:
459 if self._section <= self._C_SECTION:
460 self._section = self._C_SECTION
461 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000462 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000463 return error_message
464 elif header_type == _CPP_SYS_HEADER:
465 if self._section <= self._CPP_SECTION:
466 self._section = self._CPP_SECTION
467 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000468 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000469 return error_message
470 elif header_type == _LIKELY_MY_HEADER:
471 if self._section <= self._MY_H_SECTION:
472 self._section = self._MY_H_SECTION
473 else:
474 self._section = self._OTHER_H_SECTION
475 elif header_type == _POSSIBLE_MY_HEADER:
476 if self._section <= self._MY_H_SECTION:
477 self._section = self._MY_H_SECTION
478 else:
479 # This will always be the fallback because we're not sure
480 # enough that the header is associated with this file.
481 self._section = self._OTHER_H_SECTION
482 else:
483 assert header_type == _OTHER_HEADER
484 self._section = self._OTHER_H_SECTION
485
erg@google.coma868d2d2009-10-09 21:18:45 +0000486 if last_section != self._section:
487 self._last_header = ''
488
erg@google.com4e00b9a2009-01-12 23:05:11 +0000489 return ''
490
491
492class _CppLintState(object):
493 """Maintains module-wide state.."""
494
495 def __init__(self):
496 self.verbose_level = 1 # global setting.
497 self.error_count = 0 # global count of reported errors
erg@google.come35f7652009-06-19 20:52:09 +0000498 # filters to apply when emitting error messages
499 self.filters = _DEFAULT_FILTERS[:]
erg@google.coma868d2d2009-10-09 21:18:45 +0000500 self.counting = 'total' # In what way are we counting errors?
501 self.errors_by_category = {} # string to int dict storing error counts
erg@google.com4e00b9a2009-01-12 23:05:11 +0000502
503 # output format:
504 # "emacs" - format that emacs can parse (default)
505 # "vs7" - format that Microsoft Visual Studio 7 can parse
506 self.output_format = 'emacs'
507
508 def SetOutputFormat(self, output_format):
509 """Sets the output format for errors."""
510 self.output_format = output_format
511
512 def SetVerboseLevel(self, level):
513 """Sets the module's verbosity, and returns the previous setting."""
514 last_verbose_level = self.verbose_level
515 self.verbose_level = level
516 return last_verbose_level
517
erg@google.coma868d2d2009-10-09 21:18:45 +0000518 def SetCountingStyle(self, counting_style):
519 """Sets the module's counting options."""
520 self.counting = counting_style
521
erg@google.com4e00b9a2009-01-12 23:05:11 +0000522 def SetFilters(self, filters):
523 """Sets the error-message filters.
524
525 These filters are applied when deciding whether to emit a given
526 error message.
527
528 Args:
529 filters: A string of comma-separated filters (eg "+whitespace/indent").
530 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000531
532 Raises:
533 ValueError: The comma-separated filters did not all start with '+' or '-'.
534 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000535 """
erg@google.come35f7652009-06-19 20:52:09 +0000536 # Default filters always have less priority than the flag ones.
537 self.filters = _DEFAULT_FILTERS[:]
538 for filt in filters.split(','):
539 clean_filt = filt.strip()
540 if clean_filt:
541 self.filters.append(clean_filt)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000542 for filt in self.filters:
543 if not (filt.startswith('+') or filt.startswith('-')):
544 raise ValueError('Every filter in --filters must start with + or -'
545 ' (%s does not)' % filt)
546
erg@google.coma868d2d2009-10-09 21:18:45 +0000547 def ResetErrorCounts(self):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000548 """Sets the module's error statistic back to zero."""
549 self.error_count = 0
erg@google.coma868d2d2009-10-09 21:18:45 +0000550 self.errors_by_category = {}
erg@google.com4e00b9a2009-01-12 23:05:11 +0000551
erg@google.coma868d2d2009-10-09 21:18:45 +0000552 def IncrementErrorCount(self, category):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000553 """Bumps the module's error statistic."""
554 self.error_count += 1
erg@google.coma868d2d2009-10-09 21:18:45 +0000555 if self.counting in ('toplevel', 'detailed'):
556 if self.counting != 'detailed':
557 category = category.split('/')[0]
558 if category not in self.errors_by_category:
559 self.errors_by_category[category] = 0
560 self.errors_by_category[category] += 1
erg@google.com4e00b9a2009-01-12 23:05:11 +0000561
erg@google.coma868d2d2009-10-09 21:18:45 +0000562 def PrintErrorCounts(self):
563 """Print a summary of errors by category, and the total."""
564 for category, count in self.errors_by_category.iteritems():
565 sys.stderr.write('Category \'%s\' errors found: %d\n' %
566 (category, count))
567 sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000568
569_cpplint_state = _CppLintState()
570
571
572def _OutputFormat():
573 """Gets the module's output format."""
574 return _cpplint_state.output_format
575
576
577def _SetOutputFormat(output_format):
578 """Sets the module's output format."""
579 _cpplint_state.SetOutputFormat(output_format)
580
581
582def _VerboseLevel():
583 """Returns the module's verbosity setting."""
584 return _cpplint_state.verbose_level
585
586
587def _SetVerboseLevel(level):
588 """Sets the module's verbosity, and returns the previous setting."""
589 return _cpplint_state.SetVerboseLevel(level)
590
591
erg@google.coma868d2d2009-10-09 21:18:45 +0000592def _SetCountingStyle(level):
593 """Sets the module's counting options."""
594 _cpplint_state.SetCountingStyle(level)
595
596
erg@google.com4e00b9a2009-01-12 23:05:11 +0000597def _Filters():
598 """Returns the module's list of output filters, as a list."""
599 return _cpplint_state.filters
600
601
602def _SetFilters(filters):
603 """Sets the module's error-message filters.
604
605 These filters are applied when deciding whether to emit a given
606 error message.
607
608 Args:
609 filters: A string of comma-separated filters (eg "whitespace/indent").
610 Each filter should start with + or -; else we die.
611 """
612 _cpplint_state.SetFilters(filters)
613
614
615class _FunctionState(object):
616 """Tracks current function name and the number of lines in its body."""
617
618 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
619 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
620
621 def __init__(self):
622 self.in_a_function = False
623 self.lines_in_function = 0
624 self.current_function = ''
625
626 def Begin(self, function_name):
627 """Start analyzing function body.
628
629 Args:
630 function_name: The name of the function being tracked.
631 """
632 self.in_a_function = True
633 self.lines_in_function = 0
634 self.current_function = function_name
635
636 def Count(self):
637 """Count line in current function body."""
638 if self.in_a_function:
639 self.lines_in_function += 1
640
641 def Check(self, error, filename, linenum):
642 """Report if too many lines in function body.
643
644 Args:
645 error: The function to call with any errors found.
646 filename: The name of the current file.
647 linenum: The number of the line to check.
648 """
649 if Match(r'T(EST|est)', self.current_function):
650 base_trigger = self._TEST_TRIGGER
651 else:
652 base_trigger = self._NORMAL_TRIGGER
653 trigger = base_trigger * 2**_VerboseLevel()
654
655 if self.lines_in_function > trigger:
656 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
657 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
658 if error_level > 5:
659 error_level = 5
660 error(filename, linenum, 'readability/fn_size', error_level,
661 'Small and focused functions are preferred:'
662 ' %s has %d non-comment lines'
663 ' (error triggered by exceeding %d lines).' % (
664 self.current_function, self.lines_in_function, trigger))
665
666 def End(self):
667 """Stop analizing function body."""
668 self.in_a_function = False
669
670
671class _IncludeError(Exception):
672 """Indicates a problem with the include order in a file."""
673 pass
674
675
676class FileInfo:
677 """Provides utility functions for filenames.
678
679 FileInfo provides easy access to the components of a file's path
680 relative to the project root.
681 """
682
683 def __init__(self, filename):
684 self._filename = filename
685
686 def FullName(self):
687 """Make Windows paths like Unix."""
688 return os.path.abspath(self._filename).replace('\\', '/')
689
690 def RepositoryName(self):
691 """FullName after removing the local path to the repository.
692
693 If we have a real absolute path name here we can try to do something smart:
694 detecting the root of the checkout and truncating /path/to/checkout from
695 the name so that we get header guards that don't include things like
696 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
697 people on different computers who have checked the source out to different
698 locations won't see bogus errors.
699 """
700 fullname = self.FullName()
701
702 if os.path.exists(fullname):
703 project_dir = os.path.dirname(fullname)
704
705 if os.path.exists(os.path.join(project_dir, ".svn")):
706 # If there's a .svn file in the current directory, we recursively look
707 # up the directory tree for the top of the SVN checkout
708 root_dir = project_dir
709 one_up_dir = os.path.dirname(root_dir)
710 while os.path.exists(os.path.join(one_up_dir, ".svn")):
711 root_dir = os.path.dirname(root_dir)
712 one_up_dir = os.path.dirname(one_up_dir)
713
714 prefix = os.path.commonprefix([root_dir, project_dir])
715 return fullname[len(prefix) + 1:]
716
erg@google.com5e169692010-01-28 20:17:01 +0000717 # Not SVN? Try to find a git or hg top level directory by searching up
718 # from the current path.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000719 root_dir = os.path.dirname(fullname)
720 while (root_dir != os.path.dirname(root_dir) and
erg@google.com5e169692010-01-28 20:17:01 +0000721 not os.path.exists(os.path.join(root_dir, ".git")) and
722 not os.path.exists(os.path.join(root_dir, ".hg"))):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000723 root_dir = os.path.dirname(root_dir)
erg@google.com42e59b02010-10-04 22:18:07 +0000724
725 if (os.path.exists(os.path.join(root_dir, ".git")) or
726 os.path.exists(os.path.join(root_dir, ".hg"))):
727 prefix = os.path.commonprefix([root_dir, project_dir])
728 return fullname[len(prefix) + 1:]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000729
730 # Don't know what to do; header guard warnings may be wrong...
731 return fullname
732
733 def Split(self):
734 """Splits the file into the directory, basename, and extension.
735
736 For 'chrome/browser/browser.cc', Split() would
737 return ('chrome/browser', 'browser', '.cc')
738
739 Returns:
740 A tuple of (directory, basename, extension).
741 """
742
743 googlename = self.RepositoryName()
744 project, rest = os.path.split(googlename)
745 return (project,) + os.path.splitext(rest)
746
747 def BaseName(self):
748 """File base name - text after the final slash, before the final period."""
749 return self.Split()[1]
750
751 def Extension(self):
752 """File extension - text following the final period."""
753 return self.Split()[2]
754
755 def NoExtension(self):
756 """File has no source file extension."""
757 return '/'.join(self.Split()[0:2])
758
759 def IsSource(self):
760 """File has a source file extension."""
761 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
762
763
erg+personal@google.com05189642010-04-30 20:43:03 +0000764def _ShouldPrintError(category, confidence, linenum):
765 """Returns true iff confidence >= verbose, category passes
766 filter and is not NOLINT-suppressed."""
767
768 # There are three ways we might decide not to print an error message:
769 # a "NOLINT(category)" comment appears in the source,
erg@google.com4e00b9a2009-01-12 23:05:11 +0000770 # the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com05189642010-04-30 20:43:03 +0000771 if IsErrorSuppressedByNolint(category, linenum):
772 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +0000773 if confidence < _cpplint_state.verbose_level:
774 return False
775
776 is_filtered = False
777 for one_filter in _Filters():
778 if one_filter.startswith('-'):
779 if category.startswith(one_filter[1:]):
780 is_filtered = True
781 elif one_filter.startswith('+'):
782 if category.startswith(one_filter[1:]):
783 is_filtered = False
784 else:
785 assert False # should have been checked for in SetFilter.
786 if is_filtered:
787 return False
788
789 return True
790
791
792def Error(filename, linenum, category, confidence, message):
793 """Logs the fact we've found a lint error.
794
795 We log where the error was found, and also our confidence in the error,
796 that is, how certain we are this is a legitimate style regression, and
797 not a misidentification or a use that's sometimes justified.
798
erg+personal@google.com05189642010-04-30 20:43:03 +0000799 False positives can be suppressed by the use of
800 "cpplint(category)" comments on the offending line. These are
801 parsed into _error_suppressions.
802
erg@google.com4e00b9a2009-01-12 23:05:11 +0000803 Args:
804 filename: The name of the file containing the error.
805 linenum: The number of the line containing the error.
806 category: A string used to describe the "category" this bug
807 falls under: "whitespace", say, or "runtime". Categories
808 may have a hierarchy separated by slashes: "whitespace/indent".
809 confidence: A number from 1-5 representing a confidence score for
810 the error, with 5 meaning that we are certain of the problem,
811 and 1 meaning that it could be a legitimate construct.
812 message: The error message.
813 """
erg+personal@google.com05189642010-04-30 20:43:03 +0000814 if _ShouldPrintError(category, confidence, linenum):
erg@google.coma868d2d2009-10-09 21:18:45 +0000815 _cpplint_state.IncrementErrorCount(category)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000816 if _cpplint_state.output_format == 'vs7':
817 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
818 filename, linenum, message, category, confidence))
819 else:
820 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
821 filename, linenum, message, category, confidence))
822
823
824# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
825_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
826 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
827# Matches strings. Escape codes should already be removed by ESCAPES.
828_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
829# Matches characters. Escape codes should already be removed by ESCAPES.
830_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
831# Matches multi-line C++ comments.
832# This RE is a little bit more complicated than one might expect, because we
833# have to take care of space removals tools so we can handle comments inside
834# statements better.
835# The current rule is: We only clear spaces from both sides when we're at the
836# end of the line. Otherwise, we try to remove spaces from the right side,
837# if this doesn't work we try on left side but only if there's a non-character
838# on the right.
839_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
840 r"""(\s*/\*.*\*/\s*$|
841 /\*.*\*/\s+|
842 \s+/\*.*\*/(?=\W)|
843 /\*.*\*/)""", re.VERBOSE)
844
845
846def IsCppString(line):
847 """Does line terminate so, that the next symbol is in string constant.
848
849 This function does not consider single-line nor multi-line comments.
850
851 Args:
852 line: is a partial line of code starting from the 0..n.
853
854 Returns:
855 True, if next character appended to 'line' is inside a
856 string constant.
857 """
858
859 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
860 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
861
862
863def FindNextMultiLineCommentStart(lines, lineix):
864 """Find the beginning marker for a multiline comment."""
865 while lineix < len(lines):
866 if lines[lineix].strip().startswith('/*'):
867 # Only return this marker if the comment goes beyond this line
868 if lines[lineix].strip().find('*/', 2) < 0:
869 return lineix
870 lineix += 1
871 return len(lines)
872
873
874def FindNextMultiLineCommentEnd(lines, lineix):
875 """We are inside a comment, find the end marker."""
876 while lineix < len(lines):
877 if lines[lineix].strip().endswith('*/'):
878 return lineix
879 lineix += 1
880 return len(lines)
881
882
883def RemoveMultiLineCommentsFromRange(lines, begin, end):
884 """Clears a range of lines for multi-line comments."""
885 # Having // dummy comments makes the lines non-empty, so we will not get
886 # unnecessary blank line warnings later in the code.
887 for i in range(begin, end):
888 lines[i] = '// dummy'
889
890
891def RemoveMultiLineComments(filename, lines, error):
892 """Removes multiline (c-style) comments from lines."""
893 lineix = 0
894 while lineix < len(lines):
895 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
896 if lineix_begin >= len(lines):
897 return
898 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
899 if lineix_end >= len(lines):
900 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
901 'Could not find end of multi-line comment')
902 return
903 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
904 lineix = lineix_end + 1
905
906
907def CleanseComments(line):
908 """Removes //-comments and single-line C-style /* */ comments.
909
910 Args:
911 line: A line of C++ source.
912
913 Returns:
914 The line with single-line comments removed.
915 """
916 commentpos = line.find('//')
917 if commentpos != -1 and not IsCppString(line[:commentpos]):
918 line = line[:commentpos]
919 # get rid of /* ... */
920 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
921
922
erg@google.coma87abb82009-02-24 01:41:01 +0000923class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000924 """Holds 3 copies of all lines with different preprocessing applied to them.
925
926 1) elided member contains lines without strings and comments,
927 2) lines member contains lines without comments, and
928 3) raw member contains all the lines without processing.
929 All these three members are of <type 'list'>, and of the same length.
930 """
931
932 def __init__(self, lines):
933 self.elided = []
934 self.lines = []
935 self.raw_lines = lines
936 self.num_lines = len(lines)
937 for linenum in range(len(lines)):
938 self.lines.append(CleanseComments(lines[linenum]))
939 elided = self._CollapseStrings(lines[linenum])
940 self.elided.append(CleanseComments(elided))
941
942 def NumLines(self):
943 """Returns the number of lines represented."""
944 return self.num_lines
945
946 @staticmethod
947 def _CollapseStrings(elided):
948 """Collapses strings and chars on a line to simple "" or '' blocks.
949
950 We nix strings first so we're not fooled by text like '"http://"'
951
952 Args:
953 elided: The line being processed.
954
955 Returns:
956 The line with collapsed strings.
957 """
958 if not _RE_PATTERN_INCLUDE.match(elided):
959 # Remove escaped characters first to make quote/single quote collapsing
960 # basic. Things that look like escaped characters shouldn't occur
961 # outside of strings and chars.
962 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
963 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
964 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
965 return elided
966
967
968def CloseExpression(clean_lines, linenum, pos):
969 """If input points to ( or { or [, finds the position that closes it.
970
971 If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
972 linenum/pos that correspond to the closing of the expression.
973
974 Args:
975 clean_lines: A CleansedLines instance containing the file.
976 linenum: The number of the line to check.
977 pos: A position on the line.
978
979 Returns:
980 A tuple (line, linenum, pos) pointer *past* the closing brace, or
981 (line, len(lines), -1) if we never find a close. Note we ignore
982 strings and comments when matching; and the line we return is the
983 'cleansed' line at linenum.
984 """
985
986 line = clean_lines.elided[linenum]
987 startchar = line[pos]
988 if startchar not in '({[':
989 return (line, clean_lines.NumLines(), -1)
990 if startchar == '(': endchar = ')'
991 if startchar == '[': endchar = ']'
992 if startchar == '{': endchar = '}'
993
994 num_open = line.count(startchar) - line.count(endchar)
995 while linenum < clean_lines.NumLines() and num_open > 0:
996 linenum += 1
997 line = clean_lines.elided[linenum]
998 num_open += line.count(startchar) - line.count(endchar)
999 # OK, now find the endchar that actually got us back to even
1000 endpos = len(line)
1001 while num_open >= 0:
1002 endpos = line.rfind(')', 0, endpos)
1003 num_open -= 1 # chopped off another )
1004 return (line, linenum, endpos + 1)
1005
1006
1007def CheckForCopyright(filename, lines, error):
1008 """Logs an error if no Copyright message appears at the top of the file."""
1009
1010 # We'll say it should occur by line 10. Don't forget there's a
1011 # dummy line at the front.
1012 for line in xrange(1, min(len(lines), 11)):
1013 if re.search(r'Copyright', lines[line], re.I): break
1014 else: # means no copyright line was found
1015 error(filename, 0, 'legal/copyright', 5,
1016 'No copyright message found. '
1017 'You should have a line: "Copyright [year] <Copyright Owner>"')
1018
1019
1020def GetHeaderGuardCPPVariable(filename):
1021 """Returns the CPP variable that should be used as a header guard.
1022
1023 Args:
1024 filename: The name of a C++ header file.
1025
1026 Returns:
1027 The CPP variable that should be used as a header guard in the
1028 named file.
1029
1030 """
1031
erg+personal@google.com05189642010-04-30 20:43:03 +00001032 # Restores original filename in case that cpplint is invoked from Emacs's
1033 # flymake.
1034 filename = re.sub(r'_flymake\.h$', '.h', filename)
1035
erg@google.com4e00b9a2009-01-12 23:05:11 +00001036 fileinfo = FileInfo(filename)
1037 return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
1038
1039
1040def CheckForHeaderGuard(filename, lines, error):
1041 """Checks that the file contains a header guard.
1042
erg@google.coma87abb82009-02-24 01:41:01 +00001043 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +00001044 headers, checks that the full pathname is used.
1045
1046 Args:
1047 filename: The name of the C++ header file.
1048 lines: An array of strings, each representing a line of the file.
1049 error: The function to call with any errors found.
1050 """
1051
1052 cppvar = GetHeaderGuardCPPVariable(filename)
1053
1054 ifndef = None
1055 ifndef_linenum = 0
1056 define = None
1057 endif = None
1058 endif_linenum = 0
1059 for linenum, line in enumerate(lines):
1060 linesplit = line.split()
1061 if len(linesplit) >= 2:
1062 # find the first occurrence of #ifndef and #define, save arg
1063 if not ifndef and linesplit[0] == '#ifndef':
1064 # set ifndef to the header guard presented on the #ifndef line.
1065 ifndef = linesplit[1]
1066 ifndef_linenum = linenum
1067 if not define and linesplit[0] == '#define':
1068 define = linesplit[1]
1069 # find the last occurrence of #endif, save entire line
1070 if line.startswith('#endif'):
1071 endif = line
1072 endif_linenum = linenum
1073
1074 if not ifndef or not define or ifndef != define:
1075 error(filename, 0, 'build/header_guard', 5,
1076 'No #ifndef header guard found, suggested CPP variable is: %s' %
1077 cppvar)
1078 return
1079
1080 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1081 # for backward compatibility.
erg+personal@google.com05189642010-04-30 20:43:03 +00001082 if ifndef != cppvar:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001083 error_level = 0
1084 if ifndef != cppvar + '_':
1085 error_level = 5
1086
erg+personal@google.com05189642010-04-30 20:43:03 +00001087 ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1088 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001089 error(filename, ifndef_linenum, 'build/header_guard', error_level,
1090 '#ifndef header guard has wrong style, please use: %s' % cppvar)
1091
erg+personal@google.com05189642010-04-30 20:43:03 +00001092 if endif != ('#endif // %s' % cppvar):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001093 error_level = 0
1094 if endif != ('#endif // %s' % (cppvar + '_')):
1095 error_level = 5
1096
erg+personal@google.com05189642010-04-30 20:43:03 +00001097 ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1098 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001099 error(filename, endif_linenum, 'build/header_guard', error_level,
1100 '#endif line should be "#endif // %s"' % cppvar)
1101
1102
1103def CheckForUnicodeReplacementCharacters(filename, lines, error):
1104 """Logs an error for each line containing Unicode replacement characters.
1105
1106 These indicate that either the file contained invalid UTF-8 (likely)
1107 or Unicode replacement characters (which it shouldn't). Note that
1108 it's possible for this to throw off line numbering if the invalid
1109 UTF-8 occurred adjacent to a newline.
1110
1111 Args:
1112 filename: The name of the current file.
1113 lines: An array of strings, each representing a line of the file.
1114 error: The function to call with any errors found.
1115 """
1116 for linenum, line in enumerate(lines):
1117 if u'\ufffd' in line:
1118 error(filename, linenum, 'readability/utf8', 5,
1119 'Line contains invalid UTF-8 (or Unicode replacement character).')
1120
1121
1122def CheckForNewlineAtEOF(filename, lines, error):
1123 """Logs an error if there is no newline char at the end of the file.
1124
1125 Args:
1126 filename: The name of the current file.
1127 lines: An array of strings, each representing a line of the file.
1128 error: The function to call with any errors found.
1129 """
1130
1131 # The array lines() was created by adding two newlines to the
1132 # original file (go figure), then splitting on \n.
1133 # To verify that the file ends in \n, we just have to make sure the
1134 # last-but-two element of lines() exists and is empty.
1135 if len(lines) < 3 or lines[-2]:
1136 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1137 'Could not find a newline character at the end of the file.')
1138
1139
1140def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1141 """Logs an error if we see /* ... */ or "..." that extend past one line.
1142
1143 /* ... */ comments are legit inside macros, for one line.
1144 Otherwise, we prefer // comments, so it's ok to warn about the
1145 other. Likewise, it's ok for strings to extend across multiple
1146 lines, as long as a line continuation character (backslash)
1147 terminates each line. Although not currently prohibited by the C++
1148 style guide, it's ugly and unnecessary. We don't do well with either
1149 in this lint program, so we warn about both.
1150
1151 Args:
1152 filename: The name of the current file.
1153 clean_lines: A CleansedLines instance containing the file.
1154 linenum: The number of the line to check.
1155 error: The function to call with any errors found.
1156 """
1157 line = clean_lines.elided[linenum]
1158
1159 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1160 # second (escaped) slash may trigger later \" detection erroneously.
1161 line = line.replace('\\\\', '')
1162
1163 if line.count('/*') > line.count('*/'):
1164 error(filename, linenum, 'readability/multiline_comment', 5,
1165 'Complex multi-line /*...*/-style comment found. '
1166 'Lint may give bogus warnings. '
1167 'Consider replacing these with //-style comments, '
1168 'with #if 0...#endif, '
1169 'or with more clearly structured multi-line comments.')
1170
1171 if (line.count('"') - line.count('\\"')) % 2:
1172 error(filename, linenum, 'readability/multiline_string', 5,
1173 'Multi-line string ("...") found. This lint script doesn\'t '
1174 'do well with such strings, and may give bogus warnings. They\'re '
1175 'ugly and unnecessary, and you should use concatenation instead".')
1176
1177
1178threading_list = (
1179 ('asctime(', 'asctime_r('),
1180 ('ctime(', 'ctime_r('),
1181 ('getgrgid(', 'getgrgid_r('),
1182 ('getgrnam(', 'getgrnam_r('),
1183 ('getlogin(', 'getlogin_r('),
1184 ('getpwnam(', 'getpwnam_r('),
1185 ('getpwuid(', 'getpwuid_r('),
1186 ('gmtime(', 'gmtime_r('),
1187 ('localtime(', 'localtime_r('),
1188 ('rand(', 'rand_r('),
1189 ('readdir(', 'readdir_r('),
1190 ('strtok(', 'strtok_r('),
1191 ('ttyname(', 'ttyname_r('),
1192 )
1193
1194
1195def CheckPosixThreading(filename, clean_lines, linenum, error):
1196 """Checks for calls to thread-unsafe functions.
1197
1198 Much code has been originally written without consideration of
1199 multi-threading. Also, engineers are relying on their old experience;
1200 they have learned posix before threading extensions were added. These
1201 tests guide the engineers to use thread-safe functions (when using
1202 posix directly).
1203
1204 Args:
1205 filename: The name of the current file.
1206 clean_lines: A CleansedLines instance containing the file.
1207 linenum: The number of the line to check.
1208 error: The function to call with any errors found.
1209 """
1210 line = clean_lines.elided[linenum]
1211 for single_thread_function, multithread_safe_function in threading_list:
1212 ix = line.find(single_thread_function)
erg@google.coma87abb82009-02-24 01:41:01 +00001213 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001214 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1215 line[ix - 1] not in ('_', '.', '>'))):
1216 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1217 'Consider using ' + multithread_safe_function +
1218 '...) instead of ' + single_thread_function +
1219 '...) for improved thread safety.')
1220
1221
erg@google.coma868d2d2009-10-09 21:18:45 +00001222# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com36649102009-03-25 21:18:36 +00001223# incrementing a value.
erg@google.coma868d2d2009-10-09 21:18:45 +00001224_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com36649102009-03-25 21:18:36 +00001225 r'^\s*\*\w+(\+\+|--);')
1226
1227
1228def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.coma868d2d2009-10-09 21:18:45 +00001229 """Checks for invalid increment *count++.
erg@google.com36649102009-03-25 21:18:36 +00001230
1231 For example following function:
1232 void increment_counter(int* count) {
1233 *count++;
1234 }
1235 is invalid, because it effectively does count++, moving pointer, and should
1236 be replaced with ++*count, (*count)++ or *count += 1.
1237
1238 Args:
1239 filename: The name of the current file.
1240 clean_lines: A CleansedLines instance containing the file.
1241 linenum: The number of the line to check.
1242 error: The function to call with any errors found.
1243 """
1244 line = clean_lines.elided[linenum]
erg@google.coma868d2d2009-10-09 21:18:45 +00001245 if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com36649102009-03-25 21:18:36 +00001246 error(filename, linenum, 'runtime/invalid_increment', 5,
1247 'Changing pointer instead of value (or unused value of operator*).')
1248
1249
erg@google.com5210aec2011-09-06 20:19:05 +00001250# Matches Foo *foo declarations.
1251_RE_PATTERN_POINTER_DECLARATION_WHITESPACE = re.compile(
1252 r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+')
1253
1254def CheckPointerDeclarationWhitespace(filename, clean_lines, linenum, error):
1255 """Checks for Foo *foo declarations.
1256
1257 Args:
1258 filename: The name of the current file.
1259 clean_lines: A CleansedLines instance containing the file.
1260 linenum: The number of the line to check.
1261 error: The function to call with any errors found.
1262 """
1263 line = clean_lines.elided[linenum]
1264 matched = _RE_PATTERN_POINTER_DECLARATION_WHITESPACE.match(line)
1265 if matched:
1266 error(filename, linenum, 'whitespace/declaration', 3,
1267 'Declaration has space between type name and %s in %s' %
1268 (matched.group('pointer_operator'), matched.group(0).strip()))
1269
1270
erg@google.com4e00b9a2009-01-12 23:05:11 +00001271class _ClassInfo(object):
1272 """Stores information about a class."""
1273
1274 def __init__(self, name, linenum):
1275 self.name = name
1276 self.linenum = linenum
1277 self.seen_open_brace = False
1278 self.is_derived = False
1279 self.virtual_method_linenumber = None
1280 self.has_virtual_destructor = False
1281 self.brace_depth = 0
1282
1283
1284class _ClassState(object):
1285 """Holds the current state of the parse relating to class declarations.
1286
1287 It maintains a stack of _ClassInfos representing the parser's guess
1288 as to the current nesting of class declarations. The innermost class
1289 is at the top (back) of the stack. Typically, the stack will either
1290 be empty or have exactly one entry.
1291 """
1292
1293 def __init__(self):
1294 self.classinfo_stack = []
1295
1296 def CheckFinished(self, filename, error):
1297 """Checks that all classes have been completely parsed.
1298
1299 Call this when all lines in a file have been processed.
1300 Args:
1301 filename: The name of the current file.
1302 error: The function to call with any errors found.
1303 """
1304 if self.classinfo_stack:
1305 # Note: This test can result in false positives if #ifdef constructs
1306 # get in the way of brace matching. See the testBuildClass test in
1307 # cpplint_unittest.py for an example of this.
1308 error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
1309 'Failed to find complete declaration of class %s' %
1310 self.classinfo_stack[0].name)
1311
1312
1313def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1314 class_state, error):
1315 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1316
1317 Complain about several constructs which gcc-2 accepts, but which are
1318 not standard C++. Warning about these in lint is one way to ease the
1319 transition to new compilers.
1320 - put storage class first (e.g. "static const" instead of "const static").
1321 - "%lld" instead of %qd" in printf-type functions.
1322 - "%1$d" is non-standard in printf-type functions.
1323 - "\%" is an undefined character escape sequence.
1324 - text after #endif is not allowed.
1325 - invalid inner-style forward declaration.
1326 - >? and <? operators, and their >?= and <?= cousins.
1327 - classes with virtual methods need virtual destructors (compiler warning
1328 available, but not turned on yet.)
1329
erg@google.coma868d2d2009-10-09 21:18:45 +00001330 Additionally, check for constructor/destructor style violations and reference
1331 members, as it is very convenient to do so while checking for
1332 gcc-2 compliance.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001333
1334 Args:
1335 filename: The name of the current file.
1336 clean_lines: A CleansedLines instance containing the file.
1337 linenum: The number of the line to check.
1338 class_state: A _ClassState instance which maintains information about
1339 the current stack of nested class declarations being parsed.
1340 error: A callable to which errors are reported, which takes 4 arguments:
1341 filename, line number, error level, and message
1342 """
1343
1344 # Remove comments from the line, but leave in strings for now.
1345 line = clean_lines.lines[linenum]
1346
1347 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1348 error(filename, linenum, 'runtime/printf_format', 3,
1349 '%q in format strings is deprecated. Use %ll instead.')
1350
1351 if Search(r'printf\s*\(.*".*%\d+\$', line):
1352 error(filename, linenum, 'runtime/printf_format', 2,
1353 '%N$ formats are unconventional. Try rewriting to avoid them.')
1354
1355 # Remove escaped backslashes before looking for undefined escapes.
1356 line = line.replace('\\\\', '')
1357
1358 if Search(r'("|\').*\\(%|\[|\(|{)', line):
1359 error(filename, linenum, 'build/printf_format', 3,
1360 '%, [, (, and { are undefined character escapes. Unescape them.')
1361
1362 # For the rest, work with both comments and strings removed.
1363 line = clean_lines.elided[linenum]
1364
1365 if Search(r'\b(const|volatile|void|char|short|int|long'
1366 r'|float|double|signed|unsigned'
1367 r'|schar|u?int8|u?int16|u?int32|u?int64)'
1368 r'\s+(auto|register|static|extern|typedef)\b',
1369 line):
1370 error(filename, linenum, 'build/storage_class', 5,
1371 'Storage class (static, extern, typedef, etc) should be first.')
1372
1373 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1374 error(filename, linenum, 'build/endif_comment', 5,
1375 'Uncommented text after #endif is non-standard. Use a comment.')
1376
1377 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1378 error(filename, linenum, 'build/forward_decl', 5,
1379 'Inner-style forward declarations are invalid. Remove this line.')
1380
1381 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1382 line):
1383 error(filename, linenum, 'build/deprecated', 3,
1384 '>? and <? (max and min) operators are non-standard and deprecated.')
1385
erg@google.coma868d2d2009-10-09 21:18:45 +00001386 if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
1387 # TODO(unknown): Could it be expanded safely to arbitrary references,
1388 # without triggering too many false positives? The first
1389 # attempt triggered 5 warnings for mostly benign code in the regtest, hence
1390 # the restriction.
1391 # Here's the original regexp, for the reference:
1392 # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
1393 # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
1394 error(filename, linenum, 'runtime/member_string_references', 2,
1395 'const string& members are dangerous. It is much better to use '
1396 'alternatives, such as pointers or simple constants.')
1397
erg@google.com4e00b9a2009-01-12 23:05:11 +00001398 # Track class entry and exit, and attempt to find cases within the
1399 # class declaration that don't meet the C++ style
1400 # guidelines. Tracking is very dependent on the code matching Google
1401 # style guidelines, but it seems to perform well enough in testing
1402 # to be a worthwhile addition to the checks.
1403 classinfo_stack = class_state.classinfo_stack
1404 # Look for a class declaration
1405 class_decl_match = Match(
1406 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1407 if class_decl_match:
1408 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
1409
1410 # Everything else in this function uses the top of the stack if it's
1411 # not empty.
1412 if not classinfo_stack:
1413 return
1414
1415 classinfo = classinfo_stack[-1]
1416
1417 # If the opening brace hasn't been seen look for it and also
1418 # parent class declarations.
1419 if not classinfo.seen_open_brace:
1420 # If the line has a ';' in it, assume it's a forward declaration or
1421 # a single-line class declaration, which we won't process.
1422 if line.find(';') != -1:
1423 classinfo_stack.pop()
1424 return
1425 classinfo.seen_open_brace = (line.find('{') != -1)
1426 # Look for a bare ':'
1427 if Search('(^|[^:]):($|[^:])', line):
1428 classinfo.is_derived = True
1429 if not classinfo.seen_open_brace:
1430 return # Everything else in this function is for after open brace
1431
1432 # The class may have been declared with namespace or classname qualifiers.
1433 # The constructor and destructor will not have those qualifiers.
1434 base_classname = classinfo.name.split('::')[-1]
1435
1436 # Look for single-argument constructors that aren't marked explicit.
1437 # Technically a valid construct, but against style.
1438 args = Match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1439 % re.escape(base_classname),
1440 line)
1441 if (args and
1442 args.group(1) != 'void' and
1443 not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1444 args.group(1).strip())):
1445 error(filename, linenum, 'runtime/explicit', 5,
1446 'Single-argument constructors should be marked explicit.')
1447
1448 # Look for methods declared virtual.
1449 if Search(r'\bvirtual\b', line):
1450 classinfo.virtual_method_linenumber = linenum
1451 # Only look for a destructor declaration on the same line. It would
1452 # be extremely unlikely for the destructor declaration to occupy
1453 # more than one line.
1454 if Search(r'~%s\s*\(' % base_classname, line):
1455 classinfo.has_virtual_destructor = True
1456
1457 # Look for class end.
1458 brace_depth = classinfo.brace_depth
1459 brace_depth = brace_depth + line.count('{') - line.count('}')
1460 if brace_depth <= 0:
1461 classinfo = classinfo_stack.pop()
1462 # Try to detect missing virtual destructor declarations.
1463 # For now, only warn if a non-derived class with virtual methods lacks
1464 # a virtual destructor. This is to make it less likely that people will
1465 # declare derived virtual destructors without declaring the base
1466 # destructor virtual.
1467 if ((classinfo.virtual_method_linenumber is not None) and
1468 (not classinfo.has_virtual_destructor) and
1469 (not classinfo.is_derived)): # Only warn for base classes
1470 error(filename, classinfo.linenum, 'runtime/virtual', 4,
1471 'The class %s probably needs a virtual destructor due to '
1472 'having virtual method(s), one declared at line %d.'
1473 % (classinfo.name, classinfo.virtual_method_linenumber))
1474 else:
1475 classinfo.brace_depth = brace_depth
1476
1477
1478def CheckSpacingForFunctionCall(filename, line, linenum, error):
1479 """Checks for the correctness of various spacing around function calls.
1480
1481 Args:
1482 filename: The name of the current file.
1483 line: The text of the line to check.
1484 linenum: The number of the line to check.
1485 error: The function to call with any errors found.
1486 """
1487
1488 # Since function calls often occur inside if/for/while/switch
1489 # expressions - which have their own, more liberal conventions - we
1490 # first see if we should be looking inside such an expression for a
1491 # function call, to which we can apply more strict standards.
1492 fncall = line # if there's no control flow construct, look at whole line
1493 for pattern in (r'\bif\s*\((.*)\)\s*{',
1494 r'\bfor\s*\((.*)\)\s*{',
1495 r'\bwhile\s*\((.*)\)\s*[{;]',
1496 r'\bswitch\s*\((.*)\)\s*{'):
1497 match = Search(pattern, line)
1498 if match:
1499 fncall = match.group(1) # look inside the parens for function calls
1500 break
1501
1502 # Except in if/for/while/switch, there should never be space
1503 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
1504 # for nested parens ( (a+b) + c ). Likewise, there should never be
1505 # a space before a ( when it's a function argument. I assume it's a
1506 # function argument when the char before the whitespace is legal in
1507 # a function name (alnum + _) and we're not starting a macro. Also ignore
1508 # pointers and references to arrays and functions coz they're too tricky:
1509 # we use a very simple way to recognize these:
1510 # " (something)(maybe-something)" or
1511 # " (something)(maybe-something," or
1512 # " (something)[something]"
1513 # Note that we assume the contents of [] to be short enough that
1514 # they'll never need to wrap.
1515 if ( # Ignore control structures.
1516 not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1517 # Ignore pointers/references to functions.
1518 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1519 # Ignore pointers/references to arrays.
1520 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com36649102009-03-25 21:18:36 +00001521 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
erg@google.com4e00b9a2009-01-12 23:05:11 +00001522 error(filename, linenum, 'whitespace/parens', 4,
1523 'Extra space after ( in function call')
erg@google.com36649102009-03-25 21:18:36 +00001524 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001525 error(filename, linenum, 'whitespace/parens', 2,
1526 'Extra space after (')
1527 if (Search(r'\w\s+\(', fncall) and
1528 not Search(r'#\s*define|typedef', fncall)):
1529 error(filename, linenum, 'whitespace/parens', 4,
1530 'Extra space before ( in function call')
1531 # If the ) is followed only by a newline or a { + newline, assume it's
1532 # part of a control statement (if/while/etc), and don't complain
1533 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1534 error(filename, linenum, 'whitespace/parens', 2,
1535 'Extra space before )')
1536
1537
1538def IsBlankLine(line):
1539 """Returns true if the given line is blank.
1540
1541 We consider a line to be blank if the line is empty or consists of
1542 only white spaces.
1543
1544 Args:
1545 line: A line of a string.
1546
1547 Returns:
1548 True, if the given line is blank.
1549 """
1550 return not line or line.isspace()
1551
1552
1553def CheckForFunctionLengths(filename, clean_lines, linenum,
1554 function_state, error):
1555 """Reports for long function bodies.
1556
1557 For an overview why this is done, see:
1558 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1559
1560 Uses a simplistic algorithm assuming other style guidelines
1561 (especially spacing) are followed.
1562 Only checks unindented functions, so class members are unchecked.
1563 Trivial bodies are unchecked, so constructors with huge initializer lists
1564 may be missed.
1565 Blank/comment lines are not counted so as to avoid encouraging the removal
1566 of vertical space and commments just to get through a lint check.
1567 NOLINT *on the last line of a function* disables this check.
1568
1569 Args:
1570 filename: The name of the current file.
1571 clean_lines: A CleansedLines instance containing the file.
1572 linenum: The number of the line to check.
1573 function_state: Current function name and lines in body so far.
1574 error: The function to call with any errors found.
1575 """
1576 lines = clean_lines.lines
1577 line = lines[linenum]
1578 raw = clean_lines.raw_lines
1579 raw_line = raw[linenum]
1580 joined_line = ''
1581
1582 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00001583 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00001584 match_result = Match(regexp, line)
1585 if match_result:
1586 # If the name is all caps and underscores, figure it's a macro and
1587 # ignore it, unless it's TEST or TEST_F.
1588 function_name = match_result.group(1).split()[-1]
1589 if function_name == 'TEST' or function_name == 'TEST_F' or (
1590 not Match(r'[A-Z_]+$', function_name)):
1591 starting_func = True
1592
1593 if starting_func:
1594 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00001595 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001596 start_line = lines[start_linenum]
1597 joined_line += ' ' + start_line.lstrip()
1598 if Search(r'(;|})', start_line): # Declarations and trivial functions
1599 body_found = True
1600 break # ... ignore
1601 elif Search(r'{', start_line):
1602 body_found = True
1603 function = Search(r'((\w|:)*)\(', line).group(1)
1604 if Match(r'TEST', function): # Handle TEST... macros
1605 parameter_regexp = Search(r'(\(.*\))', joined_line)
1606 if parameter_regexp: # Ignore bad syntax
1607 function += parameter_regexp.group(1)
1608 else:
1609 function += '()'
1610 function_state.Begin(function)
1611 break
1612 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00001613 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001614 error(filename, linenum, 'readability/fn_size', 5,
1615 'Lint failed to find start of function body.')
1616 elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com05189642010-04-30 20:43:03 +00001617 function_state.Check(error, filename, linenum)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001618 function_state.End()
1619 elif not Match(r'^\s*$', line):
1620 function_state.Count() # Count non-blank/non-comment lines.
1621
1622
1623_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1624
1625
1626def CheckComment(comment, filename, linenum, error):
1627 """Checks for common mistakes in TODO comments.
1628
1629 Args:
1630 comment: The text of the comment from the line in question.
1631 filename: The name of the current file.
1632 linenum: The number of the line to check.
1633 error: The function to call with any errors found.
1634 """
1635 match = _RE_PATTERN_TODO.match(comment)
1636 if match:
1637 # One whitespace is correct; zero whitespace is handled elsewhere.
1638 leading_whitespace = match.group(1)
1639 if len(leading_whitespace) > 1:
1640 error(filename, linenum, 'whitespace/todo', 2,
1641 'Too many spaces before TODO')
1642
1643 username = match.group(2)
1644 if not username:
1645 error(filename, linenum, 'readability/todo', 2,
1646 'Missing username in TODO; it should look like '
1647 '"// TODO(my_username): Stuff."')
1648
1649 middle_whitespace = match.group(3)
erg@google.coma87abb82009-02-24 01:41:01 +00001650 # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001651 if middle_whitespace != ' ' and middle_whitespace != '':
1652 error(filename, linenum, 'whitespace/todo', 2,
1653 'TODO(my_username) should be followed by a space')
1654
1655
1656def CheckSpacing(filename, clean_lines, linenum, error):
1657 """Checks for the correctness of various spacing issues in the code.
1658
1659 Things we check for: spaces around operators, spaces after
1660 if/for/while/switch, no spaces around parens in function calls, two
1661 spaces between code and comment, don't start a block with a blank
1662 line, don't end a function with a blank line, don't have too many
1663 blank lines in a row.
1664
1665 Args:
1666 filename: The name of the current file.
1667 clean_lines: A CleansedLines instance containing the file.
1668 linenum: The number of the line to check.
1669 error: The function to call with any errors found.
1670 """
1671
1672 raw = clean_lines.raw_lines
1673 line = raw[linenum]
1674
1675 # Before nixing comments, check if the line is blank for no good
1676 # reason. This includes the first line after a block is opened, and
1677 # blank lines at the end of a function (ie, right before a line like '}'
1678 if IsBlankLine(line):
1679 elided = clean_lines.elided
1680 prev_line = elided[linenum - 1]
1681 prevbrace = prev_line.rfind('{')
1682 # TODO(unknown): Don't complain if line before blank line, and line after,
1683 # both start with alnums and are indented the same amount.
1684 # This ignores whitespace at the start of a namespace block
1685 # because those are not usually indented.
1686 if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
1687 and prev_line[:prevbrace].find('namespace') == -1):
1688 # OK, we have a blank line at the start of a code block. Before we
1689 # complain, we check if it is an exception to the rule: The previous
1690 # non-empty line has the paramters of a function header that are indented
1691 # 4 spaces (because they did not fit in a 80 column line when placed on
1692 # the same line as the function name). We also check for the case where
1693 # the previous line is indented 6 spaces, which may happen when the
1694 # initializers of a constructor do not fit into a 80 column line.
1695 exception = False
1696 if Match(r' {6}\w', prev_line): # Initializer list?
1697 # We are looking for the opening column of initializer list, which
1698 # should be indented 4 spaces to cause 6 space indentation afterwards.
1699 search_position = linenum-2
1700 while (search_position >= 0
1701 and Match(r' {6}\w', elided[search_position])):
1702 search_position -= 1
1703 exception = (search_position >= 0
1704 and elided[search_position][:5] == ' :')
1705 else:
1706 # Search for the function arguments or an initializer list. We use a
1707 # simple heuristic here: If the line is indented 4 spaces; and we have a
1708 # closing paren, without the opening paren, followed by an opening brace
1709 # or colon (for initializer lists) we assume that it is the last line of
1710 # a function header. If we have a colon indented 4 spaces, it is an
1711 # initializer list.
1712 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1713 prev_line)
1714 or Match(r' {4}:', prev_line))
1715
1716 if not exception:
1717 error(filename, linenum, 'whitespace/blank_line', 2,
1718 'Blank line at the start of a code block. Is this needed?')
1719 # This doesn't ignore whitespace at the end of a namespace block
1720 # because that is too hard without pairing open/close braces;
1721 # however, a special exception is made for namespace closing
1722 # brackets which have a comment containing "namespace".
1723 #
1724 # Also, ignore blank lines at the end of a block in a long if-else
1725 # chain, like this:
1726 # if (condition1) {
1727 # // Something followed by a blank line
1728 #
1729 # } else if (condition2) {
1730 # // Something else
1731 # }
1732 if linenum + 1 < clean_lines.NumLines():
1733 next_line = raw[linenum + 1]
1734 if (next_line
1735 and Match(r'\s*}', next_line)
1736 and next_line.find('namespace') == -1
1737 and next_line.find('} else ') == -1):
1738 error(filename, linenum, 'whitespace/blank_line', 3,
1739 'Blank line at the end of a code block. Is this needed?')
1740
1741 # Next, we complain if there's a comment too near the text
1742 commentpos = line.find('//')
1743 if commentpos != -1:
1744 # Check if the // may be in quotes. If so, ignore it
erg@google.coma87abb82009-02-24 01:41:01 +00001745 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001746 if (line.count('"', 0, commentpos) -
1747 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
1748 # Allow one space for new scopes, two spaces otherwise:
1749 if (not Match(r'^\s*{ //', line) and
1750 ((commentpos >= 1 and
1751 line[commentpos-1] not in string.whitespace) or
1752 (commentpos >= 2 and
1753 line[commentpos-2] not in string.whitespace))):
1754 error(filename, linenum, 'whitespace/comments', 2,
1755 'At least two spaces is best between code and comments')
1756 # There should always be a space between the // and the comment
1757 commentend = commentpos + 2
1758 if commentend < len(line) and not line[commentend] == ' ':
1759 # but some lines are exceptions -- e.g. if they're big
1760 # comment delimiters like:
1761 # //----------------------------------------------------------
erg@google.coma51c16b2010-11-17 18:09:31 +00001762 # or are an empty C++ style Doxygen comment, like:
1763 # ///
erg@google.come35f7652009-06-19 20:52:09 +00001764 # or they begin with multiple slashes followed by a space:
1765 # //////// Header comment
1766 match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.coma51c16b2010-11-17 18:09:31 +00001767 Search(r'^/$', line[commentend:]) or
erg@google.come35f7652009-06-19 20:52:09 +00001768 Search(r'^/+ ', line[commentend:]))
erg@google.com4e00b9a2009-01-12 23:05:11 +00001769 if not match:
1770 error(filename, linenum, 'whitespace/comments', 4,
1771 'Should have a space between // and comment')
1772 CheckComment(line[commentpos:], filename, linenum, error)
1773
1774 line = clean_lines.elided[linenum] # get rid of comments and strings
1775
1776 # Don't try to do spacing checks for operator methods
1777 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1778
1779 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1780 # Otherwise not. Note we only check for non-spaces on *both* sides;
1781 # sometimes people put non-spaces on one side when aligning ='s among
1782 # many lines (not that this is behavior that I approve of...)
1783 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
1784 error(filename, linenum, 'whitespace/operators', 4,
1785 'Missing spaces around =')
1786
1787 # It's ok not to have spaces around binary operators like + - * /, but if
1788 # there's too little whitespace, we get concerned. It's hard to tell,
1789 # though, so we punt on this one for now. TODO.
1790
1791 # You should always have whitespace around binary operators.
1792 # Alas, we can't test < or > because they're legitimately used sans spaces
1793 # (a->b, vector<int> a). The only time we can tell is a < with no >, and
1794 # only if it's not template params list spilling into the next line.
1795 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1796 if not match:
1797 # Note that while it seems that the '<[^<]*' term in the following
1798 # regexp could be simplified to '<.*', which would indeed match
1799 # the same class of strings, the [^<] means that searching for the
1800 # regexp takes linear rather than quadratic time.
1801 if not Search(r'<[^<]*,\s*$', line): # template params spill
1802 match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1803 if match:
1804 error(filename, linenum, 'whitespace/operators', 3,
1805 'Missing spaces around %s' % match.group(1))
1806 # We allow no-spaces around << and >> when used like this: 10<<20, but
1807 # not otherwise (particularly, not when used as streams)
1808 match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1809 if match:
1810 error(filename, linenum, 'whitespace/operators', 3,
1811 'Missing spaces around %s' % match.group(1))
1812
1813 # There shouldn't be space around unary operators
1814 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1815 if match:
1816 error(filename, linenum, 'whitespace/operators', 4,
1817 'Extra space for operator %s' % match.group(1))
1818
1819 # A pet peeve of mine: no spaces after an if, while, switch, or for
1820 match = Search(r' (if\(|for\(|while\(|switch\()', line)
1821 if match:
1822 error(filename, linenum, 'whitespace/parens', 5,
1823 'Missing space before ( in %s' % match.group(1))
1824
1825 # For if/for/while/switch, the left and right parens should be
1826 # consistent about how many spaces are inside the parens, and
1827 # there should either be zero or one spaces inside the parens.
1828 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.come35f7652009-06-19 20:52:09 +00001829 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001830 match = Search(r'\b(if|for|while|switch)\s*'
1831 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1832 line)
1833 if match:
1834 if len(match.group(2)) != len(match.group(4)):
1835 if not (match.group(3) == ';' and
erg@google.come35f7652009-06-19 20:52:09 +00001836 len(match.group(2)) == 1 + len(match.group(4)) or
1837 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001838 error(filename, linenum, 'whitespace/parens', 5,
1839 'Mismatching spaces inside () in %s' % match.group(1))
1840 if not len(match.group(2)) in [0, 1]:
1841 error(filename, linenum, 'whitespace/parens', 5,
1842 'Should have zero or one spaces inside ( and ) in %s' %
1843 match.group(1))
1844
1845 # You should always have a space after a comma (either as fn arg or operator)
1846 if Search(r',[^\s]', line):
1847 error(filename, linenum, 'whitespace/comma', 3,
1848 'Missing space after ,')
1849
1850 # Next we will look for issues with function calls.
1851 CheckSpacingForFunctionCall(filename, line, linenum, error)
1852
1853 # Except after an opening paren, you should have spaces before your braces.
1854 # And since you should never have braces at the beginning of a line, this is
1855 # an easy test.
1856 if Search(r'[^ (]{', line):
1857 error(filename, linenum, 'whitespace/braces', 5,
1858 'Missing space before {')
1859
1860 # Make sure '} else {' has spaces.
1861 if Search(r'}else', line):
1862 error(filename, linenum, 'whitespace/braces', 5,
1863 'Missing space before else')
1864
1865 # You shouldn't have spaces before your brackets, except maybe after
1866 # 'delete []' or 'new char * []'.
1867 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
1868 error(filename, linenum, 'whitespace/braces', 5,
1869 'Extra space before [')
1870
1871 # You shouldn't have a space before a semicolon at the end of the line.
1872 # There's a special case for "for" since the style guide allows space before
1873 # the semicolon there.
1874 if Search(r':\s*;\s*$', line):
1875 error(filename, linenum, 'whitespace/semicolon', 5,
1876 'Semicolon defining empty statement. Use { } instead.')
1877 elif Search(r'^\s*;\s*$', line):
1878 error(filename, linenum, 'whitespace/semicolon', 5,
1879 'Line contains only semicolon. If this should be an empty statement, '
1880 'use { } instead.')
1881 elif (Search(r'\s+;\s*$', line) and
1882 not Search(r'\bfor\b', line)):
1883 error(filename, linenum, 'whitespace/semicolon', 5,
1884 'Extra space before last semicolon. If this should be an empty '
1885 'statement, use { } instead.')
1886
1887
1888def GetPreviousNonBlankLine(clean_lines, linenum):
1889 """Return the most recent non-blank line and its line number.
1890
1891 Args:
1892 clean_lines: A CleansedLines instance containing the file contents.
1893 linenum: The number of the line to check.
1894
1895 Returns:
1896 A tuple with two elements. The first element is the contents of the last
1897 non-blank line before the current line, or the empty string if this is the
1898 first non-blank line. The second is the line number of that line, or -1
1899 if this is the first non-blank line.
1900 """
1901
1902 prevlinenum = linenum - 1
1903 while prevlinenum >= 0:
1904 prevline = clean_lines.elided[prevlinenum]
1905 if not IsBlankLine(prevline): # if not a blank line...
1906 return (prevline, prevlinenum)
1907 prevlinenum -= 1
1908 return ('', -1)
1909
1910
1911def CheckBraces(filename, clean_lines, linenum, error):
1912 """Looks for misplaced braces (e.g. at the end of line).
1913
1914 Args:
1915 filename: The name of the current file.
1916 clean_lines: A CleansedLines instance containing the file.
1917 linenum: The number of the line to check.
1918 error: The function to call with any errors found.
1919 """
1920
1921 line = clean_lines.elided[linenum] # get rid of comments and strings
1922
1923 if Match(r'\s*{\s*$', line):
1924 # We allow an open brace to start a line in the case where someone
1925 # is using braces in a block to explicitly create a new scope,
1926 # which is commonly used to control the lifetime of
1927 # stack-allocated variables. We don't detect this perfectly: we
1928 # just don't complain if the last non-whitespace character on the
1929 # previous non-blank line is ';', ':', '{', or '}'.
1930 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1931 if not Search(r'[;:}{]\s*$', prevline):
1932 error(filename, linenum, 'whitespace/braces', 4,
1933 '{ should almost always be at the end of the previous line')
1934
1935 # An else clause should be on the same line as the preceding closing brace.
1936 if Match(r'\s*else\s*', line):
1937 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1938 if Match(r'\s*}\s*$', prevline):
1939 error(filename, linenum, 'whitespace/newline', 4,
1940 'An else should appear on the same line as the preceding }')
1941
1942 # If braces come on one side of an else, they should be on both.
1943 # However, we have to worry about "else if" that spans multiple lines!
1944 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
1945 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
1946 # find the ( after the if
1947 pos = line.find('else if')
1948 pos = line.find('(', pos)
1949 if pos > 0:
1950 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
1951 if endline[endpos:].find('{') == -1: # must be brace after if
1952 error(filename, linenum, 'readability/braces', 5,
1953 'If an else has a brace on one side, it should have it on both')
1954 else: # common case: else not followed by a multi-line if
1955 error(filename, linenum, 'readability/braces', 5,
1956 'If an else has a brace on one side, it should have it on both')
1957
1958 # Likewise, an else should never have the else clause on the same line
1959 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
1960 error(filename, linenum, 'whitespace/newline', 4,
1961 'Else clause should never be on same line as else (use 2 lines)')
1962
1963 # In the same way, a do/while should never be on one line
1964 if Match(r'\s*do [^\s{]', line):
1965 error(filename, linenum, 'whitespace/newline', 4,
1966 'do/while clauses should not be on a single line')
1967
1968 # Braces shouldn't be followed by a ; unless they're defining a struct
1969 # or initializing an array.
1970 # We can't tell in general, but we can for some common cases.
1971 prevlinenum = linenum
1972 while True:
1973 (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
1974 if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
1975 line = prevline + line
1976 else:
1977 break
1978 if (Search(r'{.*}\s*;', line) and
1979 line.count('{') == line.count('}') and
1980 not Search(r'struct|class|enum|\s*=\s*{', line)):
1981 error(filename, linenum, 'readability/braces', 4,
1982 "You don't need a ; after a }")
1983
1984
1985def ReplaceableCheck(operator, macro, line):
1986 """Determine whether a basic CHECK can be replaced with a more specific one.
1987
1988 For example suggest using CHECK_EQ instead of CHECK(a == b) and
1989 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1990
1991 Args:
1992 operator: The C++ operator used in the CHECK.
1993 macro: The CHECK or EXPECT macro being called.
1994 line: The current source line.
1995
1996 Returns:
1997 True if the CHECK can be replaced with a more specific one.
1998 """
1999
2000 # This matches decimal and hex integers, strings, and chars (in that order).
2001 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
2002
2003 # Expression to match two sides of the operator with something that
2004 # looks like a literal, since CHECK(x == iterator) won't compile.
2005 # This means we can't catch all the cases where a more specific
2006 # CHECK is possible, but it's less annoying than dealing with
2007 # extraneous warnings.
2008 match_this = (r'\s*' + macro + r'\((\s*' +
2009 match_constant + r'\s*' + operator + r'[^<>].*|'
2010 r'.*[^<>]' + operator + r'\s*' + match_constant +
2011 r'\s*\))')
2012
2013 # Don't complain about CHECK(x == NULL) or similar because
2014 # CHECK_EQ(x, NULL) won't compile (requires a cast).
2015 # Also, don't complain about more complex boolean expressions
2016 # involving && or || such as CHECK(a == b || c == d).
2017 return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
2018
2019
2020def CheckCheck(filename, clean_lines, linenum, error):
2021 """Checks the use of CHECK and EXPECT macros.
2022
2023 Args:
2024 filename: The name of the current file.
2025 clean_lines: A CleansedLines instance containing the file.
2026 linenum: The number of the line to check.
2027 error: The function to call with any errors found.
2028 """
2029
2030 # Decide the set of replacement macros that should be suggested
2031 raw_lines = clean_lines.raw_lines
2032 current_macro = ''
2033 for macro in _CHECK_MACROS:
2034 if raw_lines[linenum].find(macro) >= 0:
2035 current_macro = macro
2036 break
2037 if not current_macro:
2038 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2039 return
2040
2041 line = clean_lines.elided[linenum] # get rid of comments and strings
2042
2043 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
2044 for operator in ['==', '!=', '>=', '>', '<=', '<']:
2045 if ReplaceableCheck(operator, current_macro, line):
2046 error(filename, linenum, 'readability/check', 2,
2047 'Consider using %s instead of %s(a %s b)' % (
2048 _CHECK_REPLACEMENT[current_macro][operator],
2049 current_macro, operator))
2050 break
2051
2052
2053def GetLineWidth(line):
2054 """Determines the width of the line in column positions.
2055
2056 Args:
2057 line: A string, which may be a Unicode string.
2058
2059 Returns:
2060 The width of the line in column positions, accounting for Unicode
2061 combining characters and wide characters.
2062 """
2063 if isinstance(line, unicode):
2064 width = 0
2065 for c in unicodedata.normalize('NFC', line):
2066 if unicodedata.east_asian_width(c) in ('W', 'F'):
2067 width += 2
2068 elif not unicodedata.combining(c):
2069 width += 1
2070 return width
2071 else:
2072 return len(line)
2073
2074
2075def CheckStyle(filename, clean_lines, linenum, file_extension, error):
2076 """Checks rules from the 'C++ style rules' section of cppguide.html.
2077
2078 Most of these rules are hard to test (naming, comment style), but we
2079 do what we can. In particular we check for 2-space indents, line lengths,
2080 tab usage, spaces inside code, etc.
2081
2082 Args:
2083 filename: The name of the current file.
2084 clean_lines: A CleansedLines instance containing the file.
2085 linenum: The number of the line to check.
2086 file_extension: The extension (without the dot) of the filename.
2087 error: The function to call with any errors found.
2088 """
2089
2090 raw_lines = clean_lines.raw_lines
2091 line = raw_lines[linenum]
2092
2093 if line.find('\t') != -1:
2094 error(filename, linenum, 'whitespace/tab', 1,
2095 'Tab found; better to use spaces')
2096
2097 # One or three blank spaces at the beginning of the line is weird; it's
2098 # hard to reconcile that with 2-space indents.
2099 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
2100 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
2101 # if(RLENGTH > 20) complain = 0;
2102 # if(match($0, " +(error|private|public|protected):")) complain = 0;
2103 # if(match(prev, "&& *$")) complain = 0;
2104 # if(match(prev, "\\|\\| *$")) complain = 0;
2105 # if(match(prev, "[\",=><] *$")) complain = 0;
2106 # if(match($0, " <<")) complain = 0;
2107 # if(match(prev, " +for \\(")) complain = 0;
2108 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
2109 initial_spaces = 0
2110 cleansed_line = clean_lines.elided[linenum]
2111 while initial_spaces < len(line) and line[initial_spaces] == ' ':
2112 initial_spaces += 1
2113 if line and line[-1].isspace():
2114 error(filename, linenum, 'whitespace/end_of_line', 4,
2115 'Line ends in whitespace. Consider deleting these extra spaces.')
2116 # There are certain situations we allow one space, notably for labels
2117 elif ((initial_spaces == 1 or initial_spaces == 3) and
2118 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
2119 error(filename, linenum, 'whitespace/indent', 3,
2120 'Weird number of spaces at line-start. '
2121 'Are you using a 2-space indent?')
2122 # Labels should always be indented at least one space.
2123 elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
2124 line):
2125 error(filename, linenum, 'whitespace/labels', 4,
2126 'Labels should always be indented at least one space. '
erg+personal@google.com05189642010-04-30 20:43:03 +00002127 'If this is a member-initializer list in a constructor or '
2128 'the base class list in a class definition, the colon should '
2129 'be on the following line.')
2130
erg@google.com4e00b9a2009-01-12 23:05:11 +00002131
2132 # Check if the line is a header guard.
2133 is_header_guard = False
2134 if file_extension == 'h':
2135 cppvar = GetHeaderGuardCPPVariable(filename)
2136 if (line.startswith('#ifndef %s' % cppvar) or
2137 line.startswith('#define %s' % cppvar) or
2138 line.startswith('#endif // %s' % cppvar)):
2139 is_header_guard = True
2140 # #include lines and header guards can be long, since there's no clean way to
2141 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00002142 #
2143 # URLs can be long too. It's possible to split these, but it makes them
2144 # harder to cut&paste.
2145 if (not line.startswith('#include') and not is_header_guard and
erg@google.com36649102009-03-25 21:18:36 +00002146 not Match(r'^\s*//.*http(s?)://\S*$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002147 line_width = GetLineWidth(line)
2148 if line_width > 100:
2149 error(filename, linenum, 'whitespace/line_length', 4,
2150 'Lines should very rarely be longer than 100 characters')
2151 elif line_width > 80:
2152 error(filename, linenum, 'whitespace/line_length', 2,
2153 'Lines should be <= 80 characters long')
2154
2155 if (cleansed_line.count(';') > 1 and
2156 # for loops are allowed two ;'s (and may run over two lines).
2157 cleansed_line.find('for') == -1 and
2158 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
2159 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
2160 # It's ok to have many commands in a switch case that fits in 1 line
2161 not ((cleansed_line.find('case ') != -1 or
2162 cleansed_line.find('default:') != -1) and
2163 cleansed_line.find('break;') != -1)):
2164 error(filename, linenum, 'whitespace/newline', 4,
2165 'More than one command on the same line')
2166
2167 # Some more style checks
2168 CheckBraces(filename, clean_lines, linenum, error)
2169 CheckSpacing(filename, clean_lines, linenum, error)
2170 CheckCheck(filename, clean_lines, linenum, error)
2171
2172
2173_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2174_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2175# Matches the first component of a filename delimited by -s and _s. That is:
2176# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2177# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
2178# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
2179# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
2180_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2181
2182
2183def _DropCommonSuffixes(filename):
2184 """Drops common suffixes like _test.cc or -inl.h from filename.
2185
2186 For example:
2187 >>> _DropCommonSuffixes('foo/foo-inl.h')
2188 'foo/foo'
2189 >>> _DropCommonSuffixes('foo/bar/foo.cc')
2190 'foo/bar/foo'
2191 >>> _DropCommonSuffixes('foo/foo_internal.h')
2192 'foo/foo'
2193 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
2194 'foo/foo_unusualinternal'
2195
2196 Args:
2197 filename: The input filename.
2198
2199 Returns:
2200 The filename with the common suffix removed.
2201 """
2202 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
2203 'inl.h', 'impl.h', 'internal.h'):
2204 if (filename.endswith(suffix) and len(filename) > len(suffix) and
2205 filename[-len(suffix) - 1] in ('-', '_')):
2206 return filename[:-len(suffix) - 1]
2207 return os.path.splitext(filename)[0]
2208
2209
2210def _IsTestFilename(filename):
2211 """Determines if the given filename has a suffix that identifies it as a test.
2212
2213 Args:
2214 filename: The input filename.
2215
2216 Returns:
2217 True if 'filename' looks like a test, False otherwise.
2218 """
2219 if (filename.endswith('_test.cc') or
2220 filename.endswith('_unittest.cc') or
2221 filename.endswith('_regtest.cc')):
2222 return True
2223 else:
2224 return False
2225
2226
2227def _ClassifyInclude(fileinfo, include, is_system):
2228 """Figures out what kind of header 'include' is.
2229
2230 Args:
2231 fileinfo: The current file cpplint is running over. A FileInfo instance.
2232 include: The path to a #included file.
2233 is_system: True if the #include used <> rather than "".
2234
2235 Returns:
2236 One of the _XXX_HEADER constants.
2237
2238 For example:
2239 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
2240 _C_SYS_HEADER
2241 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
2242 _CPP_SYS_HEADER
2243 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
2244 _LIKELY_MY_HEADER
2245 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
2246 ... 'bar/foo_other_ext.h', False)
2247 _POSSIBLE_MY_HEADER
2248 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
2249 _OTHER_HEADER
2250 """
2251 # This is a list of all standard c++ header files, except
2252 # those already checked for above.
2253 is_stl_h = include in _STL_HEADERS
2254 is_cpp_h = is_stl_h or include in _CPP_HEADERS
2255
2256 if is_system:
2257 if is_cpp_h:
2258 return _CPP_SYS_HEADER
2259 else:
2260 return _C_SYS_HEADER
2261
2262 # If the target file and the include we're checking share a
2263 # basename when we drop common extensions, and the include
2264 # lives in . , then it's likely to be owned by the target file.
2265 target_dir, target_base = (
2266 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
2267 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
2268 if target_base == include_base and (
2269 include_dir == target_dir or
2270 include_dir == os.path.normpath(target_dir + '/../public')):
2271 return _LIKELY_MY_HEADER
2272
2273 # If the target and include share some initial basename
2274 # component, it's possible the target is implementing the
2275 # include, so it's allowed to be first, but we'll never
2276 # complain if it's not there.
2277 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
2278 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
2279 if (target_first_component and include_first_component and
2280 target_first_component.group(0) ==
2281 include_first_component.group(0)):
2282 return _POSSIBLE_MY_HEADER
2283
2284 return _OTHER_HEADER
2285
2286
erg@google.coma87abb82009-02-24 01:41:01 +00002287
erg@google.come35f7652009-06-19 20:52:09 +00002288def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
2289 """Check rules that are applicable to #include lines.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002290
erg@google.come35f7652009-06-19 20:52:09 +00002291 Strings on #include lines are NOT removed from elided line, to make
2292 certain tasks easier. However, to prevent false positives, checks
2293 applicable to #include lines in CheckLanguage must be put here.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002294
2295 Args:
2296 filename: The name of the current file.
2297 clean_lines: A CleansedLines instance containing the file.
2298 linenum: The number of the line to check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002299 include_state: An _IncludeState instance in which the headers are inserted.
2300 error: The function to call with any errors found.
2301 """
2302 fileinfo = FileInfo(filename)
2303
erg@google.come35f7652009-06-19 20:52:09 +00002304 line = clean_lines.lines[linenum]
erg@google.com4e00b9a2009-01-12 23:05:11 +00002305
2306 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.come35f7652009-06-19 20:52:09 +00002307 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002308 error(filename, linenum, 'build/include', 4,
2309 'Include the directory when naming .h files')
2310
2311 # we shouldn't include a file more than once. actually, there are a
2312 # handful of instances where doing so is okay, but in general it's
2313 # not.
erg@google.come35f7652009-06-19 20:52:09 +00002314 match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002315 if match:
2316 include = match.group(2)
2317 is_system = (match.group(1) == '<')
2318 if include in include_state:
2319 error(filename, linenum, 'build/include', 4,
2320 '"%s" already included at %s:%s' %
2321 (include, filename, include_state[include]))
2322 else:
2323 include_state[include] = linenum
2324
2325 # We want to ensure that headers appear in the right order:
2326 # 1) for foo.cc, foo.h (preferred location)
2327 # 2) c system files
2328 # 3) cpp system files
2329 # 4) for foo.cc, foo.h (deprecated location)
2330 # 5) other google headers
2331 #
2332 # We classify each include statement as one of those 5 types
2333 # using a number of techniques. The include_state object keeps
2334 # track of the highest type seen, and complains if we see a
2335 # lower type after that.
2336 error_message = include_state.CheckNextIncludeOrder(
2337 _ClassifyInclude(fileinfo, include, is_system))
2338 if error_message:
2339 error(filename, linenum, 'build/include_order', 4,
2340 '%s. Should be: %s.h, c system, c++ system, other.' %
2341 (error_message, fileinfo.BaseName()))
erg@google.coma868d2d2009-10-09 21:18:45 +00002342 if not include_state.IsInAlphabeticalOrder(include):
2343 error(filename, linenum, 'build/include_alpha', 4,
2344 'Include "%s" not in alphabetical order' % include)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002345
erg@google.come35f7652009-06-19 20:52:09 +00002346 # Look for any of the stream classes that are part of standard C++.
2347 match = _RE_PATTERN_INCLUDE.match(line)
2348 if match:
2349 include = match.group(2)
2350 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2351 # Many unit tests use cout, so we exempt them.
2352 if not _IsTestFilename(filename):
2353 error(filename, linenum, 'readability/streams', 3,
2354 'Streams are highly discouraged.')
2355
2356def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
2357 error):
2358 """Checks rules from the 'C++ language rules' section of cppguide.html.
2359
2360 Some of these rules are hard to test (function overloading, using
2361 uint32 inappropriately), but we do the best we can.
2362
2363 Args:
2364 filename: The name of the current file.
2365 clean_lines: A CleansedLines instance containing the file.
2366 linenum: The number of the line to check.
2367 file_extension: The extension (without the dot) of the filename.
2368 include_state: An _IncludeState instance in which the headers are inserted.
2369 error: The function to call with any errors found.
2370 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00002371 # If the line is empty or consists of entirely a comment, no need to
2372 # check it.
2373 line = clean_lines.elided[linenum]
2374 if not line:
2375 return
2376
erg@google.come35f7652009-06-19 20:52:09 +00002377 match = _RE_PATTERN_INCLUDE.search(line)
2378 if match:
2379 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
2380 return
2381
erg@google.com4e00b9a2009-01-12 23:05:11 +00002382 # Create an extended_line, which is the concatenation of the current and
2383 # next lines, for more effective checking of code that may span more than one
2384 # line.
2385 if linenum + 1 < clean_lines.NumLines():
2386 extended_line = line + clean_lines.elided[linenum + 1]
2387 else:
2388 extended_line = line
2389
2390 # Make Windows paths like Unix.
2391 fullname = os.path.abspath(filename).replace('\\', '/')
2392
2393 # TODO(unknown): figure out if they're using default arguments in fn proto.
2394
erg@google.com4e00b9a2009-01-12 23:05:11 +00002395 # Check for non-const references in functions. This is tricky because &
2396 # is also used to take the address of something. We allow <> for templates,
2397 # (ignoring whatever is between the braces) and : for classes.
2398 # These are complicated re's. They try to capture the following:
2399 # paren (for fn-prototype start), typename, &, varname. For the const
2400 # version, we're willing for const to be before typename or after
2401 # Don't check the implemention on same line.
2402 fnline = line.split('{', 1)[0]
2403 if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
2404 len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
2405 r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
2406 len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
2407 fnline))):
2408
2409 # We allow non-const references in a few standard places, like functions
2410 # called "swap()" or iostream operators like "<<" or ">>".
2411 if not Search(
2412 r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
2413 fnline):
2414 error(filename, linenum, 'runtime/references', 2,
2415 'Is this a non-const reference? '
2416 'If so, make const or use a pointer.')
2417
2418 # Check to see if they're using an conversion function cast.
2419 # I just try to capture the most common basic types, though there are more.
2420 # Parameterless conversion functions, such as bool(), are allowed as they are
2421 # probably a member operator declaration or default constructor.
2422 match = Search(
erg@google.coma868d2d2009-10-09 21:18:45 +00002423 r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
2424 r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002425 if match:
2426 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2427 # where type may be float(), int(string), etc. Without context they are
2428 # virtually indistinguishable from int(x) casts.
erg@google.coma868d2d2009-10-09 21:18:45 +00002429 if (match.group(1) is None and # If new operator, then this isn't a cast
2430 not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002431 error(filename, linenum, 'readability/casting', 4,
2432 'Using deprecated casting style. '
2433 'Use static_cast<%s>(...) instead' %
erg@google.coma868d2d2009-10-09 21:18:45 +00002434 match.group(2))
erg@google.com4e00b9a2009-01-12 23:05:11 +00002435
2436 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2437 'static_cast',
2438 r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2439 error)
2440 # This doesn't catch all cases. Consider (const char * const)"hello".
2441 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2442 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2443
2444 # In addition, we look for people taking the address of a cast. This
2445 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2446 # point where you think.
2447 if Search(
2448 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2449 error(filename, linenum, 'runtime/casting', 4,
2450 ('Are you taking an address of a cast? '
2451 'This is dangerous: could be a temp var. '
2452 'Take the address before doing the cast, rather than after'))
2453
2454 # Check for people declaring static/global STL strings at the top level.
2455 # This is dangerous because the C++ language does not guarantee that
2456 # globals with constructors are initialized before the first access.
2457 match = Match(
2458 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2459 line)
2460 # Make sure it's not a function.
2461 # Function template specialization looks like: "string foo<Type>(...".
2462 # Class template definitions look like: "string Foo<Type>::Method(...".
2463 if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2464 match.group(3)):
2465 error(filename, linenum, 'runtime/string', 4,
2466 'For a static/global string constant, use a C style string instead: '
2467 '"%schar %s[]".' %
2468 (match.group(1), match.group(2)))
2469
2470 # Check that we're not using RTTI outside of testing code.
2471 if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
2472 error(filename, linenum, 'runtime/rtti', 5,
2473 'Do not use dynamic_cast<>. If you need to cast within a class '
2474 "hierarchy, use static_cast<> to upcast. Google doesn't support "
2475 'RTTI.')
2476
2477 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2478 error(filename, linenum, 'runtime/init', 4,
2479 'You seem to be initializing a member variable with itself.')
2480
2481 if file_extension == 'h':
2482 # TODO(unknown): check that 1-arg constructors are explicit.
2483 # How to tell it's a constructor?
2484 # (handled in CheckForNonStandardConstructs for now)
2485 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
2486 # (level 1 error)
2487 pass
2488
2489 # Check if people are using the verboten C basic types. The only exception
2490 # we regularly allow is "unsigned short port" for port.
2491 if Search(r'\bshort port\b', line):
2492 if not Search(r'\bunsigned short port\b', line):
2493 error(filename, linenum, 'runtime/int', 4,
2494 'Use "unsigned short" for ports, not "short"')
2495 else:
2496 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
2497 if match:
2498 error(filename, linenum, 'runtime/int', 4,
2499 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
2500
2501 # When snprintf is used, the second argument shouldn't be a literal.
2502 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
erg+personal@google.com05189642010-04-30 20:43:03 +00002503 if match and match.group(2) != '0':
2504 # If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002505 error(filename, linenum, 'runtime/printf', 3,
2506 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2507 'to snprintf.' % (match.group(1), match.group(2)))
2508
2509 # Check if some verboten C functions are being used.
2510 if Search(r'\bsprintf\b', line):
2511 error(filename, linenum, 'runtime/printf', 5,
2512 'Never use sprintf. Use snprintf instead.')
2513 match = Search(r'\b(strcpy|strcat)\b', line)
2514 if match:
2515 error(filename, linenum, 'runtime/printf', 4,
2516 'Almost always, snprintf is better than %s' % match.group(1))
2517
2518 if Search(r'\bsscanf\b', line):
2519 error(filename, linenum, 'runtime/printf', 1,
2520 'sscanf can be ok, but is slow and can overflow buffers.')
2521
erg@google.coma868d2d2009-10-09 21:18:45 +00002522 # Check if some verboten operator overloading is going on
2523 # TODO(unknown): catch out-of-line unary operator&:
2524 # class X {};
2525 # int operator&(const X& x) { return 42; } // unary operator&
2526 # The trick is it's hard to tell apart from binary operator&:
2527 # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
2528 if Search(r'\boperator\s*&\s*\(\s*\)', line):
2529 error(filename, linenum, 'runtime/operator', 4,
2530 'Unary operator& is dangerous. Do not use it.')
2531
erg@google.com4e00b9a2009-01-12 23:05:11 +00002532 # Check for suspicious usage of "if" like
2533 # } if (a == b) {
2534 if Search(r'\}\s*if\s*\(', line):
2535 error(filename, linenum, 'readability/braces', 4,
2536 'Did you mean "else if"? If not, start a new line for "if".')
2537
2538 # Check for potential format string bugs like printf(foo).
2539 # We constrain the pattern not to pick things like DocidForPrintf(foo).
2540 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2541 match = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2542 if match:
2543 error(filename, linenum, 'runtime/printf', 4,
2544 'Potential format string bug. Do %s("%%s", %s) instead.'
2545 % (match.group(1), match.group(2)))
2546
2547 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2548 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2549 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
2550 error(filename, linenum, 'runtime/memset', 4,
2551 'Did you mean "memset(%s, 0, %s)"?'
2552 % (match.group(1), match.group(2)))
2553
2554 if Search(r'\busing namespace\b', line):
2555 error(filename, linenum, 'build/namespaces', 5,
2556 'Do not use namespace using-directives. '
2557 'Use using-declarations instead.')
2558
2559 # Detect variable-length arrays.
2560 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2561 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
2562 match.group(3).find(']') == -1):
2563 # Split the size using space and arithmetic operators as delimiters.
2564 # If any of the resulting tokens are not compile time constants then
2565 # report the error.
2566 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2567 is_const = True
2568 skip_next = False
2569 for tok in tokens:
2570 if skip_next:
2571 skip_next = False
2572 continue
2573
2574 if Search(r'sizeof\(.+\)', tok): continue
2575 if Search(r'arraysize\(\w+\)', tok): continue
2576
2577 tok = tok.lstrip('(')
2578 tok = tok.rstrip(')')
2579 if not tok: continue
2580 if Match(r'\d+', tok): continue
2581 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
2582 if Match(r'k[A-Z0-9]\w*', tok): continue
2583 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
2584 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
2585 # A catch all for tricky sizeof cases, including 'sizeof expression',
2586 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2587 # requires skipping the next token becasue we split on ' ' and '*'.
2588 if tok.startswith('sizeof'):
2589 skip_next = True
2590 continue
2591 is_const = False
2592 break
2593 if not is_const:
2594 error(filename, linenum, 'runtime/arrays', 1,
2595 'Do not use variable-length arrays. Use an appropriately named '
2596 "('k' followed by CamelCase) compile-time constant for the size.")
2597
2598 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
2599 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
2600 # in the class declaration.
2601 match = Match(
2602 (r'\s*'
2603 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
2604 r'\(.*\);$'),
2605 line)
2606 if match and linenum + 1 < clean_lines.NumLines():
2607 next_line = clean_lines.elided[linenum + 1]
2608 if not Search(r'^\s*};', next_line):
2609 error(filename, linenum, 'readability/constructors', 3,
2610 match.group(1) + ' should be the last thing in the class')
2611
2612 # Check for use of unnamed namespaces in header files. Registration
2613 # macros are typically OK, so we allow use of "namespace {" on lines
2614 # that end with backslashes.
2615 if (file_extension == 'h'
2616 and Search(r'\bnamespace\s*{', line)
2617 and line[-1] != '\\'):
2618 error(filename, linenum, 'build/namespaces', 4,
2619 'Do not use unnamed namespaces in header files. See '
2620 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2621 ' for more information.')
2622
2623
2624def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
2625 error):
2626 """Checks for a C-style cast by looking for the pattern.
2627
2628 This also handles sizeof(type) warnings, due to similarity of content.
2629
2630 Args:
2631 filename: The name of the current file.
2632 linenum: The number of the line to check.
2633 line: The line of code to check.
2634 raw_line: The raw line of code to check, with comments.
2635 cast_type: The string for the C++ cast to recommend. This is either
2636 reinterpret_cast or static_cast, depending.
2637 pattern: The regular expression used to find C-style casts.
2638 error: The function to call with any errors found.
2639 """
2640 match = Search(pattern, line)
2641 if not match:
2642 return
2643
2644 # e.g., sizeof(int)
2645 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
2646 if sizeof_match:
2647 error(filename, linenum, 'runtime/sizeof', 1,
2648 'Using sizeof(type). Use sizeof(varname) instead if possible')
2649 return
2650
2651 remainder = line[match.end(0):]
2652
2653 # The close paren is for function pointers as arguments to a function.
2654 # eg, void foo(void (*bar)(int));
2655 # The semicolon check is a more basic function check; also possibly a
2656 # function pointer typedef.
2657 # eg, void foo(int); or void foo(int) const;
2658 # The equals check is for function pointer assignment.
2659 # eg, void *(*foo)(int) = ...
2660 #
2661 # Right now, this will only catch cases where there's a single argument, and
2662 # it's unnamed. It should probably be expanded to check for multiple
2663 # arguments with some unnamed.
2664 function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2665 if function_match:
2666 if (not function_match.group(3) or
2667 function_match.group(3) == ';' or
2668 raw_line.find('/*') < 0):
2669 error(filename, linenum, 'readability/function', 3,
2670 'All parameters should be named in a function')
2671 return
2672
2673 # At this point, all that should be left is actual casts.
2674 error(filename, linenum, 'readability/casting', 4,
2675 'Using C-style cast. Use %s<%s>(...) instead' %
2676 (cast_type, match.group(1)))
2677
2678
2679_HEADERS_CONTAINING_TEMPLATES = (
2680 ('<deque>', ('deque',)),
2681 ('<functional>', ('unary_function', 'binary_function',
2682 'plus', 'minus', 'multiplies', 'divides', 'modulus',
2683 'negate',
2684 'equal_to', 'not_equal_to', 'greater', 'less',
2685 'greater_equal', 'less_equal',
2686 'logical_and', 'logical_or', 'logical_not',
2687 'unary_negate', 'not1', 'binary_negate', 'not2',
2688 'bind1st', 'bind2nd',
2689 'pointer_to_unary_function',
2690 'pointer_to_binary_function',
2691 'ptr_fun',
2692 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2693 'mem_fun_ref_t',
2694 'const_mem_fun_t', 'const_mem_fun1_t',
2695 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2696 'mem_fun_ref',
2697 )),
2698 ('<limits>', ('numeric_limits',)),
2699 ('<list>', ('list',)),
2700 ('<map>', ('map', 'multimap',)),
2701 ('<memory>', ('allocator',)),
2702 ('<queue>', ('queue', 'priority_queue',)),
2703 ('<set>', ('set', 'multiset',)),
2704 ('<stack>', ('stack',)),
2705 ('<string>', ('char_traits', 'basic_string',)),
2706 ('<utility>', ('pair',)),
2707 ('<vector>', ('vector',)),
2708
2709 # gcc extensions.
2710 # Note: std::hash is their hash, ::hash is our hash
2711 ('<hash_map>', ('hash_map', 'hash_multimap',)),
2712 ('<hash_set>', ('hash_set', 'hash_multiset',)),
2713 ('<slist>', ('slist',)),
2714 )
2715
2716_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2717 # We can trust with reasonable confidence that map gives us pair<>, too.
2718 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2719}
2720
2721_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2722
2723_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00002724for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2725 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002726 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2727 # type::max().
2728 _re_pattern_algorithm_header.append(
2729 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2730 _template,
2731 '<algorithm>'))
2732
2733_re_pattern_templates = []
2734for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2735 for _template in _templates:
2736 _re_pattern_templates.append(
2737 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2738 _template + '<>',
2739 _header))
2740
2741
erg@google.come35f7652009-06-19 20:52:09 +00002742def FilesBelongToSameModule(filename_cc, filename_h):
2743 """Check if these two filenames belong to the same module.
2744
2745 The concept of a 'module' here is a as follows:
2746 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
2747 same 'module' if they are in the same directory.
2748 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
2749 to belong to the same module here.
2750
2751 If the filename_cc contains a longer path than the filename_h, for example,
2752 '/absolute/path/to/base/sysinfo.cc', and this file would include
2753 'base/sysinfo.h', this function also produces the prefix needed to open the
2754 header. This is used by the caller of this function to more robustly open the
2755 header file. We don't have access to the real include paths in this context,
2756 so we need this guesswork here.
2757
2758 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
2759 according to this implementation. Because of this, this function gives
2760 some false positives. This should be sufficiently rare in practice.
2761
2762 Args:
2763 filename_cc: is the path for the .cc file
2764 filename_h: is the path for the header path
2765
2766 Returns:
2767 Tuple with a bool and a string:
2768 bool: True if filename_cc and filename_h belong to the same module.
2769 string: the additional prefix needed to open the header file.
2770 """
2771
2772 if not filename_cc.endswith('.cc'):
2773 return (False, '')
2774 filename_cc = filename_cc[:-len('.cc')]
2775 if filename_cc.endswith('_unittest'):
2776 filename_cc = filename_cc[:-len('_unittest')]
2777 elif filename_cc.endswith('_test'):
2778 filename_cc = filename_cc[:-len('_test')]
2779 filename_cc = filename_cc.replace('/public/', '/')
2780 filename_cc = filename_cc.replace('/internal/', '/')
2781
2782 if not filename_h.endswith('.h'):
2783 return (False, '')
2784 filename_h = filename_h[:-len('.h')]
2785 if filename_h.endswith('-inl'):
2786 filename_h = filename_h[:-len('-inl')]
2787 filename_h = filename_h.replace('/public/', '/')
2788 filename_h = filename_h.replace('/internal/', '/')
2789
2790 files_belong_to_same_module = filename_cc.endswith(filename_h)
2791 common_path = ''
2792 if files_belong_to_same_module:
2793 common_path = filename_cc[:-len(filename_h)]
2794 return files_belong_to_same_module, common_path
2795
2796
2797def UpdateIncludeState(filename, include_state, io=codecs):
2798 """Fill up the include_state with new includes found from the file.
2799
2800 Args:
2801 filename: the name of the header to read.
2802 include_state: an _IncludeState instance in which the headers are inserted.
2803 io: The io factory to use to read the file. Provided for testability.
2804
2805 Returns:
2806 True if a header was succesfully added. False otherwise.
2807 """
2808 headerfile = None
2809 try:
2810 headerfile = io.open(filename, 'r', 'utf8', 'replace')
2811 except IOError:
2812 return False
2813 linenum = 0
2814 for line in headerfile:
2815 linenum += 1
2816 clean_line = CleanseComments(line)
2817 match = _RE_PATTERN_INCLUDE.search(clean_line)
2818 if match:
2819 include = match.group(2)
2820 # The value formatting is cute, but not really used right now.
2821 # What matters here is that the key is in include_state.
2822 include_state.setdefault(include, '%s:%d' % (filename, linenum))
2823 return True
2824
2825
2826def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
2827 io=codecs):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002828 """Reports for missing stl includes.
2829
2830 This function will output warnings to make sure you are including the headers
2831 necessary for the stl containers and functions that you use. We only give one
2832 reason to include a header. For example, if you use both equal_to<> and
2833 less<> in a .h file, only one (the latter in the file) of these will be
2834 reported as a reason to include the <functional>.
2835
erg@google.com4e00b9a2009-01-12 23:05:11 +00002836 Args:
2837 filename: The name of the current file.
2838 clean_lines: A CleansedLines instance containing the file.
2839 include_state: An _IncludeState instance.
2840 error: The function to call with any errors found.
erg@google.come35f7652009-06-19 20:52:09 +00002841 io: The IO factory to use to read the header file. Provided for unittest
2842 injection.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002843 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00002844 required = {} # A map of header name to linenumber and the template entity.
2845 # Example of required: { '<functional>': (1219, 'less<>') }
2846
2847 for linenum in xrange(clean_lines.NumLines()):
2848 line = clean_lines.elided[linenum]
2849 if not line or line[0] == '#':
2850 continue
2851
2852 # String is special -- it is a non-templatized type in STL.
erg+personal@google.com05189642010-04-30 20:43:03 +00002853 m = _RE_PATTERN_STRING.search(line)
2854 if m:
2855 # Don't warn about strings in non-STL namespaces:
2856 # (We check only the first match per line; good enough.)
2857 prefix = line[:m.start()]
2858 if prefix.endswith('std::') or not prefix.endswith('::'):
2859 required['<string>'] = (linenum, 'string')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002860
2861 for pattern, template, header in _re_pattern_algorithm_header:
2862 if pattern.search(line):
2863 required[header] = (linenum, template)
2864
2865 # The following function is just a speed up, no semantics are changed.
2866 if not '<' in line: # Reduces the cpu time usage by skipping lines.
2867 continue
2868
2869 for pattern, template, header in _re_pattern_templates:
2870 if pattern.search(line):
2871 required[header] = (linenum, template)
2872
erg@google.come35f7652009-06-19 20:52:09 +00002873 # The policy is that if you #include something in foo.h you don't need to
2874 # include it again in foo.cc. Here, we will look at possible includes.
2875 # Let's copy the include_state so it is only messed up within this function.
2876 include_state = include_state.copy()
2877
2878 # Did we find the header for this file (if any) and succesfully load it?
2879 header_found = False
2880
2881 # Use the absolute path so that matching works properly.
2882 abs_filename = os.path.abspath(filename)
2883
2884 # For Emacs's flymake.
2885 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
2886 # by flymake and that file name might end with '_flymake.cc'. In that case,
2887 # restore original file name here so that the corresponding header file can be
2888 # found.
2889 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
2890 # instead of 'foo_flymake.h'
erg+personal@google.com05189642010-04-30 20:43:03 +00002891 abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.come35f7652009-06-19 20:52:09 +00002892
2893 # include_state is modified during iteration, so we iterate over a copy of
2894 # the keys.
2895 for header in include_state.keys(): #NOLINT
2896 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
2897 fullpath = common_path + header
2898 if same_module and UpdateIncludeState(fullpath, include_state, io):
2899 header_found = True
2900
2901 # If we can't find the header file for a .cc, assume it's because we don't
2902 # know where to look. In that case we'll give up as we're not sure they
2903 # didn't include it in the .h file.
2904 # TODO(unknown): Do a better job of finding .h files so we are confident that
2905 # not having the .h file means there isn't one.
2906 if filename.endswith('.cc') and not header_found:
2907 return
2908
erg@google.com4e00b9a2009-01-12 23:05:11 +00002909 # All the lines have been processed, report the errors found.
2910 for required_header_unstripped in required:
2911 template = required[required_header_unstripped][1]
2912 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2913 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2914 if [True for header in headers if header in include_state]:
2915 continue
2916 if required_header_unstripped.strip('<>"') not in include_state:
2917 error(filename, required[required_header_unstripped][0],
2918 'build/include_what_you_use', 4,
2919 'Add #include ' + required_header_unstripped + ' for ' + template)
2920
2921
2922def ProcessLine(filename, file_extension,
2923 clean_lines, line, include_state, function_state,
2924 class_state, error):
2925 """Processes a single line in the file.
2926
2927 Args:
2928 filename: Filename of the file that is being processed.
2929 file_extension: The extension (dot not included) of the file.
2930 clean_lines: An array of strings, each representing a line of the file,
2931 with comments stripped.
2932 line: Number of line being processed.
2933 include_state: An _IncludeState instance in which the headers are inserted.
2934 function_state: A _FunctionState instance which counts function lines, etc.
2935 class_state: A _ClassState instance which maintains information about
2936 the current stack of nested class declarations being parsed.
2937 error: A callable to which errors are reported, which takes 4 arguments:
2938 filename, line number, error level, and message
2939
2940 """
2941 raw_lines = clean_lines.raw_lines
erg+personal@google.com05189642010-04-30 20:43:03 +00002942 ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002943 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002944 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
2945 CheckStyle(filename, clean_lines, line, file_extension, error)
2946 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
2947 error)
2948 CheckForNonStandardConstructs(filename, clean_lines, line,
2949 class_state, error)
2950 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com36649102009-03-25 21:18:36 +00002951 CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com5210aec2011-09-06 20:19:05 +00002952 CheckPointerDeclarationWhitespace(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002953
2954
2955def ProcessFileData(filename, file_extension, lines, error):
2956 """Performs lint checks and reports any errors to the given error function.
2957
2958 Args:
2959 filename: Filename of the file that is being processed.
2960 file_extension: The extension (dot not included) of the file.
2961 lines: An array of strings, each representing a line of the file, with the
2962 last element being empty if the file is termined with a newline.
2963 error: A callable to which errors are reported, which takes 4 arguments:
2964 """
2965 lines = (['// marker so line numbers and indices both start at 1'] + lines +
2966 ['// marker so line numbers end in a known way'])
2967
2968 include_state = _IncludeState()
2969 function_state = _FunctionState()
2970 class_state = _ClassState()
2971
erg+personal@google.com05189642010-04-30 20:43:03 +00002972 ResetNolintSuppressions()
2973
erg@google.com4e00b9a2009-01-12 23:05:11 +00002974 CheckForCopyright(filename, lines, error)
2975
2976 if file_extension == 'h':
2977 CheckForHeaderGuard(filename, lines, error)
2978
2979 RemoveMultiLineComments(filename, lines, error)
2980 clean_lines = CleansedLines(lines)
2981 for line in xrange(clean_lines.NumLines()):
2982 ProcessLine(filename, file_extension, clean_lines, line,
2983 include_state, function_state, class_state, error)
2984 class_state.CheckFinished(filename, error)
2985
2986 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
2987
2988 # We check here rather than inside ProcessLine so that we see raw
2989 # lines rather than "cleaned" lines.
2990 CheckForUnicodeReplacementCharacters(filename, lines, error)
2991
2992 CheckForNewlineAtEOF(filename, lines, error)
2993
erg@google.com4e00b9a2009-01-12 23:05:11 +00002994def ProcessFile(filename, vlevel):
2995 """Does google-lint on a single file.
2996
2997 Args:
2998 filename: The name of the file to parse.
2999
3000 vlevel: The level of errors to report. Every error of confidence
3001 >= verbose_level will be reported. 0 is a good default.
3002 """
3003
3004 _SetVerboseLevel(vlevel)
3005
3006 try:
3007 # Support the UNIX convention of using "-" for stdin. Note that
3008 # we are not opening the file with universal newline support
3009 # (which codecs doesn't support anyway), so the resulting lines do
3010 # contain trailing '\r' characters if we are reading a file that
3011 # has CRLF endings.
3012 # If after the split a trailing '\r' is present, it is removed
3013 # below. If it is not expected to be present (i.e. os.linesep !=
3014 # '\r\n' as in Windows), a warning is issued below if this file
3015 # is processed.
3016
3017 if filename == '-':
3018 lines = codecs.StreamReaderWriter(sys.stdin,
3019 codecs.getreader('utf8'),
3020 codecs.getwriter('utf8'),
3021 'replace').read().split('\n')
3022 else:
3023 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
3024
3025 carriage_return_found = False
3026 # Remove trailing '\r'.
3027 for linenum in range(len(lines)):
3028 if lines[linenum].endswith('\r'):
3029 lines[linenum] = lines[linenum].rstrip('\r')
3030 carriage_return_found = True
3031
3032 except IOError:
3033 sys.stderr.write(
3034 "Skipping input '%s': Can't open for reading\n" % filename)
3035 return
3036
3037 # Note, if no dot is found, this will give the entire filename as the ext.
3038 file_extension = filename[filename.rfind('.') + 1:]
3039
3040 # When reading from stdin, the extension is unknown, so no cpplint tests
3041 # should rely on the extension.
3042 if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
3043 and file_extension != 'cpp'):
3044 sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
3045 else:
3046 ProcessFileData(filename, file_extension, lines, Error)
3047 if carriage_return_found and os.linesep != '\r\n':
3048 # Use 0 for linenum since outputing only one error for potentially
3049 # several lines.
3050 Error(filename, 0, 'whitespace/newline', 1,
3051 'One or more unexpected \\r (^M) found;'
3052 'better to use only a \\n')
3053
3054 sys.stderr.write('Done processing %s\n' % filename)
3055
3056
3057def PrintUsage(message):
3058 """Prints a brief usage string and exits, optionally with an error message.
3059
3060 Args:
3061 message: The optional error message.
3062 """
3063 sys.stderr.write(_USAGE)
3064 if message:
3065 sys.exit('\nFATAL ERROR: ' + message)
3066 else:
3067 sys.exit(1)
3068
3069
3070def PrintCategories():
3071 """Prints a list of all the error-categories used by error messages.
3072
3073 These are the categories used to filter messages via --filter.
3074 """
erg+personal@google.com05189642010-04-30 20:43:03 +00003075 sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003076 sys.exit(0)
3077
3078
3079def ParseArguments(args):
3080 """Parses the command line arguments.
3081
3082 This may set the output format and verbosity level as side-effects.
3083
3084 Args:
3085 args: The command line arguments:
3086
3087 Returns:
3088 The list of filenames to lint.
3089 """
3090 try:
3091 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.coma868d2d2009-10-09 21:18:45 +00003092 'counting=',
erg@google.com4e00b9a2009-01-12 23:05:11 +00003093 'filter='])
3094 except getopt.GetoptError:
3095 PrintUsage('Invalid arguments.')
3096
3097 verbosity = _VerboseLevel()
3098 output_format = _OutputFormat()
3099 filters = ''
erg@google.coma868d2d2009-10-09 21:18:45 +00003100 counting_style = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +00003101
3102 for (opt, val) in opts:
3103 if opt == '--help':
3104 PrintUsage(None)
3105 elif opt == '--output':
3106 if not val in ('emacs', 'vs7'):
3107 PrintUsage('The only allowed output formats are emacs and vs7.')
3108 output_format = val
3109 elif opt == '--verbose':
3110 verbosity = int(val)
3111 elif opt == '--filter':
3112 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00003113 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00003114 PrintCategories()
erg@google.coma868d2d2009-10-09 21:18:45 +00003115 elif opt == '--counting':
3116 if val not in ('total', 'toplevel', 'detailed'):
3117 PrintUsage('Valid counting options are total, toplevel, and detailed')
3118 counting_style = val
erg@google.com4e00b9a2009-01-12 23:05:11 +00003119
3120 if not filenames:
3121 PrintUsage('No files were specified.')
3122
3123 _SetOutputFormat(output_format)
3124 _SetVerboseLevel(verbosity)
3125 _SetFilters(filters)
erg@google.coma868d2d2009-10-09 21:18:45 +00003126 _SetCountingStyle(counting_style)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003127
3128 return filenames
3129
3130
3131def main():
3132 filenames = ParseArguments(sys.argv[1:])
3133
3134 # Change stderr to write with replacement characters so we don't die
3135 # if we try to print something containing non-ASCII characters.
3136 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
3137 codecs.getreader('utf8'),
3138 codecs.getwriter('utf8'),
3139 'replace')
3140
erg@google.coma868d2d2009-10-09 21:18:45 +00003141 _cpplint_state.ResetErrorCounts()
erg@google.com4e00b9a2009-01-12 23:05:11 +00003142 for filename in filenames:
3143 ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.coma868d2d2009-10-09 21:18:45 +00003144 _cpplint_state.PrintErrorCounts()
3145
erg@google.com4e00b9a2009-01-12 23:05:11 +00003146 sys.exit(_cpplint_state.error_count > 0)
3147
3148
3149if __name__ == '__main__':
3150 main()