blob: 4972f9db5dd5020e6bd67dcb6842e522d3f2a710 [file] [log] [blame]
erg@google.com4e00b9a2009-01-12 23:05:11 +00001#!/usr/bin/python2.4
2#
erg@google.com969161c2009-06-26 22:06:46 +00003# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004#
erg@google.com969161c2009-06-26 22:06:46 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
erg@google.com4e00b9a2009-01-12 23:05:11 +00008#
erg@google.com969161c2009-06-26 22:06:46 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
erg@google.com4e00b9a2009-01-12 23:05:11 +000018#
erg@google.com969161c2009-06-26 22:06:46 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com4e00b9a2009-01-12 23:05:11 +000030
31# Here are some issues that I've had people identify in my code during reviews,
32# that I think are possible to flag automatically in a lint tool. If these were
33# caught by lint, it would save time both for myself and that of my reviewers.
34# Most likely, some of these are beyond the scope of the current lint framework,
35# but I think it is valuable to retain these wish-list items even if they cannot
36# be immediately implemented.
37#
38# Suggestions
39# -----------
40# - Check for no 'explicit' for multi-arg ctor
41# - Check for boolean assign RHS in parens
42# - Check for ctor initializer-list colon position and spacing
43# - Check that if there's a ctor, there should be a dtor
44# - Check accessors that return non-pointer member variables are
45# declared const
46# - Check accessors that return non-const pointer member vars are
47# *not* declared const
48# - Check for using public includes for testing
49# - Check for spaces between brackets in one-line inline method
50# - Check for no assert()
51# - Check for spaces surrounding operators
52# - Check for 0 in pointer context (should be NULL)
53# - Check for 0 in char context (should be '\0')
54# - Check for camel-case method name conventions for methods
55# that are not simple inline getters and setters
56# - Check that base classes have virtual destructors
57# put " // namespace" after } that closes a namespace, with
58# namespace's name after 'namespace' if it is named.
59# - Do not indent namespace contents
60# - Avoid inlining non-trivial constructors in header files
61# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
62# - Check for old-school (void) cast for call-sites of functions
63# ignored return value
64# - Check gUnit usage of anonymous namespace
65# - Check for class declaration order (typedefs, consts, enums,
66# ctor(s?), dtor, friend declarations, methods, member vars)
67#
68
69"""Does google-lint on c++ files.
70
71The goal of this script is to identify places in the code that *may*
72be in non-compliance with google style. It does not attempt to fix
73up these problems -- the point is to educate. It does also not
74attempt to find all problems, or to ensure that everything it does
75find is legitimately a problem.
76
77In particular, we can get very confused by /* and // inside strings!
78We do a small hack, which is to ignore //'s with "'s after them on the
79same line, but it is far from perfect (in either direction).
80"""
81
82import codecs
83import getopt
84import math # for log
85import os
86import re
87import sre_compile
88import string
89import sys
90import unicodedata
91
92
93_USAGE = """
94Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.coma868d2d2009-10-09 21:18:45 +000095 [--counting=total|toplevel|detailed]
erg@google.com4e00b9a2009-01-12 23:05:11 +000096 <file> [file] ...
97
98 The style guidelines this tries to follow are those in
99 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
100
101 Every problem is given a confidence score from 1-5, with 5 meaning we are
102 certain of the problem, and 1 meaning it could be a legitimate construct.
103 This will miss some errors, and is not a substitute for a code review.
104
105 To prevent specific lines from being linted, add a '// NOLINT' comment to the
106 end of the line.
107
108 The files passed in will be linted; at least one file must be provided.
109 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
110
111 Flags:
112
113 output=vs7
114 By default, the output is formatted to ease emacs parsing. Visual Studio
115 compatible output (vs7) may also be used. Other formats are unsupported.
116
117 verbose=#
118 Specify a number 0-5 to restrict errors to certain verbosity levels.
119
120 filter=-x,+y,...
121 Specify a comma-separated list of category-filters to apply: only
122 error messages whose category names pass the filters will be printed.
123 (Category names are printed with the message and look like
124 "[whitespace/indent]".) Filters are evaluated left to right.
125 "-FOO" and "FOO" means "do not print categories that start with FOO".
126 "+FOO" means "do print categories that start with FOO".
127
128 Examples: --filter=-whitespace,+whitespace/braces
129 --filter=whitespace,runtime/printf,+runtime/printf_format
130 --filter=-,+build/include_what_you_use
131
132 To see a list of all the categories used in cpplint, pass no arg:
133 --filter=
erg@google.coma868d2d2009-10-09 21:18:45 +0000134
135 counting=total|toplevel|detailed
136 The total number of errors found is always printed. If
137 'toplevel' is provided, then the count of errors in each of
138 the top-level categories like 'build' and 'whitespace' will
139 also be printed. If 'detailed' is provided, then a count
140 is provided for each category like 'build/class'.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000141"""
142
143# We categorize each error message we print. Here are the categories.
144# We want an explicit list so we can list them all in cpplint --filter=.
145# If you add a new error message with a new category, add it to the list
146# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.coma87abb82009-02-24 01:41:01 +0000147# \ used for clearer layout -- pylint: disable-msg=C6013
148_ERROR_CATEGORIES = '''\
erg@google.com4e00b9a2009-01-12 23:05:11 +0000149 build/class
150 build/deprecated
151 build/endif_comment
152 build/forward_decl
153 build/header_guard
154 build/include
erg@google.coma868d2d2009-10-09 21:18:45 +0000155 build/include_alpha
erg@google.com4e00b9a2009-01-12 23:05:11 +0000156 build/include_order
157 build/include_what_you_use
158 build/namespaces
159 build/printf_format
160 build/storage_class
161 legal/copyright
162 readability/braces
163 readability/casting
164 readability/check
165 readability/constructors
166 readability/fn_size
167 readability/function
168 readability/multiline_comment
169 readability/multiline_string
170 readability/streams
171 readability/todo
172 readability/utf8
173 runtime/arrays
174 runtime/casting
175 runtime/explicit
176 runtime/int
177 runtime/init
erg@google.com36649102009-03-25 21:18:36 +0000178 runtime/invalid_increment
erg@google.coma868d2d2009-10-09 21:18:45 +0000179 runtime/member_string_references
erg@google.com4e00b9a2009-01-12 23:05:11 +0000180 runtime/memset
erg@google.coma868d2d2009-10-09 21:18:45 +0000181 runtime/operator
erg@google.com4e00b9a2009-01-12 23:05:11 +0000182 runtime/printf
183 runtime/printf_format
184 runtime/references
185 runtime/rtti
186 runtime/sizeof
187 runtime/string
188 runtime/threadsafe_fn
189 runtime/virtual
190 whitespace/blank_line
191 whitespace/braces
192 whitespace/comma
193 whitespace/comments
194 whitespace/end_of_line
195 whitespace/ending_newline
196 whitespace/indent
197 whitespace/labels
198 whitespace/line_length
199 whitespace/newline
200 whitespace/operators
201 whitespace/parens
202 whitespace/semicolon
203 whitespace/tab
204 whitespace/todo
erg@google.coma87abb82009-02-24 01:41:01 +0000205'''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000206
erg@google.come35f7652009-06-19 20:52:09 +0000207# The default state of the category filter. This is overrided by the --filter=
208# flag. By default all errors are on, so only add here categories that should be
209# off by default (i.e., categories that must be enabled by the --filter= flags).
210# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.coma868d2d2009-10-09 21:18:45 +0000211_DEFAULT_FILTERS = [ '-build/include_alpha' ]
erg@google.come35f7652009-06-19 20:52:09 +0000212
erg@google.com4e00b9a2009-01-12 23:05:11 +0000213# We used to check for high-bit characters, but after much discussion we
214# decided those were OK, as long as they were in UTF-8 and didn't represent
215# hard-coded international strings, which belong in a seperate i18n file.
216
217# Headers that we consider STL headers.
218_STL_HEADERS = frozenset([
219 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
220 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
221 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
222 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
223 'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
224 'utility', 'vector', 'vector.h',
225 ])
226
227
228# Non-STL C++ system headers.
229_CPP_HEADERS = frozenset([
230 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
231 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
232 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
233 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
234 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
235 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
236 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
237 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
238 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
239 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
240 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
241 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
242 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
243 ])
244
245
246# Assertion macros. These are defined in base/logging.h and
247# testing/base/gunit.h. Note that the _M versions need to come first
248# for substring matching to work.
249_CHECK_MACROS = [
erg@google.come35f7652009-06-19 20:52:09 +0000250 'DCHECK', 'CHECK',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000251 'EXPECT_TRUE_M', 'EXPECT_TRUE',
252 'ASSERT_TRUE_M', 'ASSERT_TRUE',
253 'EXPECT_FALSE_M', 'EXPECT_FALSE',
254 'ASSERT_FALSE_M', 'ASSERT_FALSE',
255 ]
256
erg@google.come35f7652009-06-19 20:52:09 +0000257# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com4e00b9a2009-01-12 23:05:11 +0000258_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
259
260for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
261 ('>=', 'GE'), ('>', 'GT'),
262 ('<=', 'LE'), ('<', 'LT')]:
erg@google.come35f7652009-06-19 20:52:09 +0000263 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com4e00b9a2009-01-12 23:05:11 +0000264 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
265 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
266 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
267 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
268 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
269
270for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
271 ('>=', 'LT'), ('>', 'LE'),
272 ('<=', 'GT'), ('<', 'GE')]:
273 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
274 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
275 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
276 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
277
278
279# These constants define types of headers for use with
280# _IncludeState.CheckNextIncludeOrder().
281_C_SYS_HEADER = 1
282_CPP_SYS_HEADER = 2
283_LIKELY_MY_HEADER = 3
284_POSSIBLE_MY_HEADER = 4
285_OTHER_HEADER = 5
286
287
288_regexp_compile_cache = {}
289
290
291def Match(pattern, s):
292 """Matches the string with the pattern, caching the compiled regexp."""
293 # The regexp compilation caching is inlined in both Match and Search for
294 # performance reasons; factoring it out into a separate function turns out
295 # to be noticeably expensive.
296 if not pattern in _regexp_compile_cache:
297 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
298 return _regexp_compile_cache[pattern].match(s)
299
300
301def Search(pattern, s):
302 """Searches the string for the pattern, caching the compiled regexp."""
303 if not pattern in _regexp_compile_cache:
304 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
305 return _regexp_compile_cache[pattern].search(s)
306
307
308class _IncludeState(dict):
309 """Tracks line numbers for includes, and the order in which includes appear.
310
311 As a dict, an _IncludeState object serves as a mapping between include
312 filename and line number on which that file was included.
313
314 Call CheckNextIncludeOrder() once for each header in the file, passing
315 in the type constants defined above. Calls in an illegal order will
316 raise an _IncludeError with an appropriate error message.
317
318 """
319 # self._section will move monotonically through this set. If it ever
320 # needs to move backwards, CheckNextIncludeOrder will raise an error.
321 _INITIAL_SECTION = 0
322 _MY_H_SECTION = 1
323 _C_SECTION = 2
324 _CPP_SECTION = 3
325 _OTHER_H_SECTION = 4
326
327 _TYPE_NAMES = {
328 _C_SYS_HEADER: 'C system header',
329 _CPP_SYS_HEADER: 'C++ system header',
330 _LIKELY_MY_HEADER: 'header this file implements',
331 _POSSIBLE_MY_HEADER: 'header this file may implement',
332 _OTHER_HEADER: 'other header',
333 }
334 _SECTION_NAMES = {
335 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
336 _MY_H_SECTION: 'a header this file implements',
337 _C_SECTION: 'C system header',
338 _CPP_SECTION: 'C++ system header',
339 _OTHER_H_SECTION: 'other header',
340 }
341
342 def __init__(self):
343 dict.__init__(self)
erg@google.coma868d2d2009-10-09 21:18:45 +0000344 # The name of the current section.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000345 self._section = self._INITIAL_SECTION
erg@google.coma868d2d2009-10-09 21:18:45 +0000346 # The path of last found header.
347 self._last_header = ''
348
349 def CanonicalizeAlphabeticalOrder(self, header_path):
350 """Returns a path canonicalized for alphabetical comparisson.
351
352 - replaces "-" with "_" so they both cmp the same.
353 - removes '-inl' since we don't require them to be after the main header.
354 - lowercase everything, just in case.
355
356 Args:
357 header_path: Path to be canonicalized.
358
359 Returns:
360 Canonicalized path.
361 """
362 return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
363
364 def IsInAlphabeticalOrder(self, header_path):
365 """Check if a header is in alphabetical order with the previous header.
366
367 Args:
368 header_path: Header to be checked.
369
370 Returns:
371 Returns true if the header is in alphabetical order.
372 """
373 canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
374 if self._last_header > canonical_header:
375 return False
376 self._last_header = canonical_header
377 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +0000378
379 def CheckNextIncludeOrder(self, header_type):
380 """Returns a non-empty error message if the next header is out of order.
381
382 This function also updates the internal state to be ready to check
383 the next include.
384
385 Args:
386 header_type: One of the _XXX_HEADER constants defined above.
387
388 Returns:
389 The empty string if the header is in the right order, or an
390 error message describing what's wrong.
391
392 """
393 error_message = ('Found %s after %s' %
394 (self._TYPE_NAMES[header_type],
395 self._SECTION_NAMES[self._section]))
396
erg@google.coma868d2d2009-10-09 21:18:45 +0000397 last_section = self._section
398
erg@google.com4e00b9a2009-01-12 23:05:11 +0000399 if header_type == _C_SYS_HEADER:
400 if self._section <= self._C_SECTION:
401 self._section = self._C_SECTION
402 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000403 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000404 return error_message
405 elif header_type == _CPP_SYS_HEADER:
406 if self._section <= self._CPP_SECTION:
407 self._section = self._CPP_SECTION
408 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000409 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000410 return error_message
411 elif header_type == _LIKELY_MY_HEADER:
412 if self._section <= self._MY_H_SECTION:
413 self._section = self._MY_H_SECTION
414 else:
415 self._section = self._OTHER_H_SECTION
416 elif header_type == _POSSIBLE_MY_HEADER:
417 if self._section <= self._MY_H_SECTION:
418 self._section = self._MY_H_SECTION
419 else:
420 # This will always be the fallback because we're not sure
421 # enough that the header is associated with this file.
422 self._section = self._OTHER_H_SECTION
423 else:
424 assert header_type == _OTHER_HEADER
425 self._section = self._OTHER_H_SECTION
426
erg@google.coma868d2d2009-10-09 21:18:45 +0000427 if last_section != self._section:
428 self._last_header = ''
429
erg@google.com4e00b9a2009-01-12 23:05:11 +0000430 return ''
431
432
433class _CppLintState(object):
434 """Maintains module-wide state.."""
435
436 def __init__(self):
437 self.verbose_level = 1 # global setting.
438 self.error_count = 0 # global count of reported errors
erg@google.come35f7652009-06-19 20:52:09 +0000439 # filters to apply when emitting error messages
440 self.filters = _DEFAULT_FILTERS[:]
erg@google.coma868d2d2009-10-09 21:18:45 +0000441 self.counting = 'total' # In what way are we counting errors?
442 self.errors_by_category = {} # string to int dict storing error counts
erg@google.com4e00b9a2009-01-12 23:05:11 +0000443
444 # output format:
445 # "emacs" - format that emacs can parse (default)
446 # "vs7" - format that Microsoft Visual Studio 7 can parse
447 self.output_format = 'emacs'
448
449 def SetOutputFormat(self, output_format):
450 """Sets the output format for errors."""
451 self.output_format = output_format
452
453 def SetVerboseLevel(self, level):
454 """Sets the module's verbosity, and returns the previous setting."""
455 last_verbose_level = self.verbose_level
456 self.verbose_level = level
457 return last_verbose_level
458
erg@google.coma868d2d2009-10-09 21:18:45 +0000459 def SetCountingStyle(self, counting_style):
460 """Sets the module's counting options."""
461 self.counting = counting_style
462
erg@google.com4e00b9a2009-01-12 23:05:11 +0000463 def SetFilters(self, filters):
464 """Sets the error-message filters.
465
466 These filters are applied when deciding whether to emit a given
467 error message.
468
469 Args:
470 filters: A string of comma-separated filters (eg "+whitespace/indent").
471 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000472
473 Raises:
474 ValueError: The comma-separated filters did not all start with '+' or '-'.
475 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000476 """
erg@google.come35f7652009-06-19 20:52:09 +0000477 # Default filters always have less priority than the flag ones.
478 self.filters = _DEFAULT_FILTERS[:]
479 for filt in filters.split(','):
480 clean_filt = filt.strip()
481 if clean_filt:
482 self.filters.append(clean_filt)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000483 for filt in self.filters:
484 if not (filt.startswith('+') or filt.startswith('-')):
485 raise ValueError('Every filter in --filters must start with + or -'
486 ' (%s does not)' % filt)
487
erg@google.coma868d2d2009-10-09 21:18:45 +0000488 def ResetErrorCounts(self):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000489 """Sets the module's error statistic back to zero."""
490 self.error_count = 0
erg@google.coma868d2d2009-10-09 21:18:45 +0000491 self.errors_by_category = {}
erg@google.com4e00b9a2009-01-12 23:05:11 +0000492
erg@google.coma868d2d2009-10-09 21:18:45 +0000493 def IncrementErrorCount(self, category):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000494 """Bumps the module's error statistic."""
495 self.error_count += 1
erg@google.coma868d2d2009-10-09 21:18:45 +0000496 if self.counting in ('toplevel', 'detailed'):
497 if self.counting != 'detailed':
498 category = category.split('/')[0]
499 if category not in self.errors_by_category:
500 self.errors_by_category[category] = 0
501 self.errors_by_category[category] += 1
erg@google.com4e00b9a2009-01-12 23:05:11 +0000502
erg@google.coma868d2d2009-10-09 21:18:45 +0000503 def PrintErrorCounts(self):
504 """Print a summary of errors by category, and the total."""
505 for category, count in self.errors_by_category.iteritems():
506 sys.stderr.write('Category \'%s\' errors found: %d\n' %
507 (category, count))
508 sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000509
510_cpplint_state = _CppLintState()
511
512
513def _OutputFormat():
514 """Gets the module's output format."""
515 return _cpplint_state.output_format
516
517
518def _SetOutputFormat(output_format):
519 """Sets the module's output format."""
520 _cpplint_state.SetOutputFormat(output_format)
521
522
523def _VerboseLevel():
524 """Returns the module's verbosity setting."""
525 return _cpplint_state.verbose_level
526
527
528def _SetVerboseLevel(level):
529 """Sets the module's verbosity, and returns the previous setting."""
530 return _cpplint_state.SetVerboseLevel(level)
531
532
erg@google.coma868d2d2009-10-09 21:18:45 +0000533def _SetCountingStyle(level):
534 """Sets the module's counting options."""
535 _cpplint_state.SetCountingStyle(level)
536
537
erg@google.com4e00b9a2009-01-12 23:05:11 +0000538def _Filters():
539 """Returns the module's list of output filters, as a list."""
540 return _cpplint_state.filters
541
542
543def _SetFilters(filters):
544 """Sets the module's error-message filters.
545
546 These filters are applied when deciding whether to emit a given
547 error message.
548
549 Args:
550 filters: A string of comma-separated filters (eg "whitespace/indent").
551 Each filter should start with + or -; else we die.
552 """
553 _cpplint_state.SetFilters(filters)
554
555
556class _FunctionState(object):
557 """Tracks current function name and the number of lines in its body."""
558
559 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
560 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
561
562 def __init__(self):
563 self.in_a_function = False
564 self.lines_in_function = 0
565 self.current_function = ''
566
567 def Begin(self, function_name):
568 """Start analyzing function body.
569
570 Args:
571 function_name: The name of the function being tracked.
572 """
573 self.in_a_function = True
574 self.lines_in_function = 0
575 self.current_function = function_name
576
577 def Count(self):
578 """Count line in current function body."""
579 if self.in_a_function:
580 self.lines_in_function += 1
581
582 def Check(self, error, filename, linenum):
583 """Report if too many lines in function body.
584
585 Args:
586 error: The function to call with any errors found.
587 filename: The name of the current file.
588 linenum: The number of the line to check.
589 """
590 if Match(r'T(EST|est)', self.current_function):
591 base_trigger = self._TEST_TRIGGER
592 else:
593 base_trigger = self._NORMAL_TRIGGER
594 trigger = base_trigger * 2**_VerboseLevel()
595
596 if self.lines_in_function > trigger:
597 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
598 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
599 if error_level > 5:
600 error_level = 5
601 error(filename, linenum, 'readability/fn_size', error_level,
602 'Small and focused functions are preferred:'
603 ' %s has %d non-comment lines'
604 ' (error triggered by exceeding %d lines).' % (
605 self.current_function, self.lines_in_function, trigger))
606
607 def End(self):
608 """Stop analizing function body."""
609 self.in_a_function = False
610
611
612class _IncludeError(Exception):
613 """Indicates a problem with the include order in a file."""
614 pass
615
616
617class FileInfo:
618 """Provides utility functions for filenames.
619
620 FileInfo provides easy access to the components of a file's path
621 relative to the project root.
622 """
623
624 def __init__(self, filename):
625 self._filename = filename
626
627 def FullName(self):
628 """Make Windows paths like Unix."""
629 return os.path.abspath(self._filename).replace('\\', '/')
630
631 def RepositoryName(self):
632 """FullName after removing the local path to the repository.
633
634 If we have a real absolute path name here we can try to do something smart:
635 detecting the root of the checkout and truncating /path/to/checkout from
636 the name so that we get header guards that don't include things like
637 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
638 people on different computers who have checked the source out to different
639 locations won't see bogus errors.
640 """
641 fullname = self.FullName()
642
643 if os.path.exists(fullname):
644 project_dir = os.path.dirname(fullname)
645
646 if os.path.exists(os.path.join(project_dir, ".svn")):
647 # If there's a .svn file in the current directory, we recursively look
648 # up the directory tree for the top of the SVN checkout
649 root_dir = project_dir
650 one_up_dir = os.path.dirname(root_dir)
651 while os.path.exists(os.path.join(one_up_dir, ".svn")):
652 root_dir = os.path.dirname(root_dir)
653 one_up_dir = os.path.dirname(one_up_dir)
654
655 prefix = os.path.commonprefix([root_dir, project_dir])
656 return fullname[len(prefix) + 1:]
657
erg@google.com5e169692010-01-28 20:17:01 +0000658 # Not SVN? Try to find a git or hg top level directory by searching up
659 # from the current path.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000660 root_dir = os.path.dirname(fullname)
661 while (root_dir != os.path.dirname(root_dir) and
erg@google.com5e169692010-01-28 20:17:01 +0000662 not os.path.exists(os.path.join(root_dir, ".git")) and
663 not os.path.exists(os.path.join(root_dir, ".hg"))):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000664 root_dir = os.path.dirname(root_dir)
erg@google.com5e169692010-01-28 20:17:01 +0000665 if (os.path.exists(os.path.join(root_dir, ".git")) or
666 os.path.exists(os.path.join(root_dir, ".hg"))):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000667 prefix = os.path.commonprefix([root_dir, project_dir])
668 return fullname[len(prefix) + 1:]
669
670 # Don't know what to do; header guard warnings may be wrong...
671 return fullname
672
673 def Split(self):
674 """Splits the file into the directory, basename, and extension.
675
676 For 'chrome/browser/browser.cc', Split() would
677 return ('chrome/browser', 'browser', '.cc')
678
679 Returns:
680 A tuple of (directory, basename, extension).
681 """
682
683 googlename = self.RepositoryName()
684 project, rest = os.path.split(googlename)
685 return (project,) + os.path.splitext(rest)
686
687 def BaseName(self):
688 """File base name - text after the final slash, before the final period."""
689 return self.Split()[1]
690
691 def Extension(self):
692 """File extension - text following the final period."""
693 return self.Split()[2]
694
695 def NoExtension(self):
696 """File has no source file extension."""
697 return '/'.join(self.Split()[0:2])
698
699 def IsSource(self):
700 """File has a source file extension."""
701 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
702
703
704def _ShouldPrintError(category, confidence):
705 """Returns true iff confidence >= verbose, and category passes filter."""
706 # There are two ways we might decide not to print an error message:
707 # the verbosity level isn't high enough, or the filters filter it out.
708 if confidence < _cpplint_state.verbose_level:
709 return False
710
711 is_filtered = False
712 for one_filter in _Filters():
713 if one_filter.startswith('-'):
714 if category.startswith(one_filter[1:]):
715 is_filtered = True
716 elif one_filter.startswith('+'):
717 if category.startswith(one_filter[1:]):
718 is_filtered = False
719 else:
720 assert False # should have been checked for in SetFilter.
721 if is_filtered:
722 return False
723
724 return True
725
726
727def Error(filename, linenum, category, confidence, message):
728 """Logs the fact we've found a lint error.
729
730 We log where the error was found, and also our confidence in the error,
731 that is, how certain we are this is a legitimate style regression, and
732 not a misidentification or a use that's sometimes justified.
733
734 Args:
735 filename: The name of the file containing the error.
736 linenum: The number of the line containing the error.
737 category: A string used to describe the "category" this bug
738 falls under: "whitespace", say, or "runtime". Categories
739 may have a hierarchy separated by slashes: "whitespace/indent".
740 confidence: A number from 1-5 representing a confidence score for
741 the error, with 5 meaning that we are certain of the problem,
742 and 1 meaning that it could be a legitimate construct.
743 message: The error message.
744 """
745 # There are two ways we might decide not to print an error message:
746 # the verbosity level isn't high enough, or the filters filter it out.
747 if _ShouldPrintError(category, confidence):
erg@google.coma868d2d2009-10-09 21:18:45 +0000748 _cpplint_state.IncrementErrorCount(category)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000749 if _cpplint_state.output_format == 'vs7':
750 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
751 filename, linenum, message, category, confidence))
752 else:
753 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
754 filename, linenum, message, category, confidence))
755
756
757# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
758_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
759 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
760# Matches strings. Escape codes should already be removed by ESCAPES.
761_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
762# Matches characters. Escape codes should already be removed by ESCAPES.
763_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
764# Matches multi-line C++ comments.
765# This RE is a little bit more complicated than one might expect, because we
766# have to take care of space removals tools so we can handle comments inside
767# statements better.
768# The current rule is: We only clear spaces from both sides when we're at the
769# end of the line. Otherwise, we try to remove spaces from the right side,
770# if this doesn't work we try on left side but only if there's a non-character
771# on the right.
772_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
773 r"""(\s*/\*.*\*/\s*$|
774 /\*.*\*/\s+|
775 \s+/\*.*\*/(?=\W)|
776 /\*.*\*/)""", re.VERBOSE)
777
778
779def IsCppString(line):
780 """Does line terminate so, that the next symbol is in string constant.
781
782 This function does not consider single-line nor multi-line comments.
783
784 Args:
785 line: is a partial line of code starting from the 0..n.
786
787 Returns:
788 True, if next character appended to 'line' is inside a
789 string constant.
790 """
791
792 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
793 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
794
795
796def FindNextMultiLineCommentStart(lines, lineix):
797 """Find the beginning marker for a multiline comment."""
798 while lineix < len(lines):
799 if lines[lineix].strip().startswith('/*'):
800 # Only return this marker if the comment goes beyond this line
801 if lines[lineix].strip().find('*/', 2) < 0:
802 return lineix
803 lineix += 1
804 return len(lines)
805
806
807def FindNextMultiLineCommentEnd(lines, lineix):
808 """We are inside a comment, find the end marker."""
809 while lineix < len(lines):
810 if lines[lineix].strip().endswith('*/'):
811 return lineix
812 lineix += 1
813 return len(lines)
814
815
816def RemoveMultiLineCommentsFromRange(lines, begin, end):
817 """Clears a range of lines for multi-line comments."""
818 # Having // dummy comments makes the lines non-empty, so we will not get
819 # unnecessary blank line warnings later in the code.
820 for i in range(begin, end):
821 lines[i] = '// dummy'
822
823
824def RemoveMultiLineComments(filename, lines, error):
825 """Removes multiline (c-style) comments from lines."""
826 lineix = 0
827 while lineix < len(lines):
828 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
829 if lineix_begin >= len(lines):
830 return
831 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
832 if lineix_end >= len(lines):
833 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
834 'Could not find end of multi-line comment')
835 return
836 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
837 lineix = lineix_end + 1
838
839
840def CleanseComments(line):
841 """Removes //-comments and single-line C-style /* */ comments.
842
843 Args:
844 line: A line of C++ source.
845
846 Returns:
847 The line with single-line comments removed.
848 """
849 commentpos = line.find('//')
850 if commentpos != -1 and not IsCppString(line[:commentpos]):
851 line = line[:commentpos]
852 # get rid of /* ... */
853 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
854
855
erg@google.coma87abb82009-02-24 01:41:01 +0000856class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000857 """Holds 3 copies of all lines with different preprocessing applied to them.
858
859 1) elided member contains lines without strings and comments,
860 2) lines member contains lines without comments, and
861 3) raw member contains all the lines without processing.
862 All these three members are of <type 'list'>, and of the same length.
863 """
864
865 def __init__(self, lines):
866 self.elided = []
867 self.lines = []
868 self.raw_lines = lines
869 self.num_lines = len(lines)
870 for linenum in range(len(lines)):
871 self.lines.append(CleanseComments(lines[linenum]))
872 elided = self._CollapseStrings(lines[linenum])
873 self.elided.append(CleanseComments(elided))
874
875 def NumLines(self):
876 """Returns the number of lines represented."""
877 return self.num_lines
878
879 @staticmethod
880 def _CollapseStrings(elided):
881 """Collapses strings and chars on a line to simple "" or '' blocks.
882
883 We nix strings first so we're not fooled by text like '"http://"'
884
885 Args:
886 elided: The line being processed.
887
888 Returns:
889 The line with collapsed strings.
890 """
891 if not _RE_PATTERN_INCLUDE.match(elided):
892 # Remove escaped characters first to make quote/single quote collapsing
893 # basic. Things that look like escaped characters shouldn't occur
894 # outside of strings and chars.
895 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
896 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
897 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
898 return elided
899
900
901def CloseExpression(clean_lines, linenum, pos):
902 """If input points to ( or { or [, finds the position that closes it.
903
904 If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
905 linenum/pos that correspond to the closing of the expression.
906
907 Args:
908 clean_lines: A CleansedLines instance containing the file.
909 linenum: The number of the line to check.
910 pos: A position on the line.
911
912 Returns:
913 A tuple (line, linenum, pos) pointer *past* the closing brace, or
914 (line, len(lines), -1) if we never find a close. Note we ignore
915 strings and comments when matching; and the line we return is the
916 'cleansed' line at linenum.
917 """
918
919 line = clean_lines.elided[linenum]
920 startchar = line[pos]
921 if startchar not in '({[':
922 return (line, clean_lines.NumLines(), -1)
923 if startchar == '(': endchar = ')'
924 if startchar == '[': endchar = ']'
925 if startchar == '{': endchar = '}'
926
927 num_open = line.count(startchar) - line.count(endchar)
928 while linenum < clean_lines.NumLines() and num_open > 0:
929 linenum += 1
930 line = clean_lines.elided[linenum]
931 num_open += line.count(startchar) - line.count(endchar)
932 # OK, now find the endchar that actually got us back to even
933 endpos = len(line)
934 while num_open >= 0:
935 endpos = line.rfind(')', 0, endpos)
936 num_open -= 1 # chopped off another )
937 return (line, linenum, endpos + 1)
938
939
940def CheckForCopyright(filename, lines, error):
941 """Logs an error if no Copyright message appears at the top of the file."""
942
943 # We'll say it should occur by line 10. Don't forget there's a
944 # dummy line at the front.
945 for line in xrange(1, min(len(lines), 11)):
946 if re.search(r'Copyright', lines[line], re.I): break
947 else: # means no copyright line was found
948 error(filename, 0, 'legal/copyright', 5,
949 'No copyright message found. '
950 'You should have a line: "Copyright [year] <Copyright Owner>"')
951
952
953def GetHeaderGuardCPPVariable(filename):
954 """Returns the CPP variable that should be used as a header guard.
955
956 Args:
957 filename: The name of a C++ header file.
958
959 Returns:
960 The CPP variable that should be used as a header guard in the
961 named file.
962
963 """
964
965 fileinfo = FileInfo(filename)
966 return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
967
968
969def CheckForHeaderGuard(filename, lines, error):
970 """Checks that the file contains a header guard.
971
erg@google.coma87abb82009-02-24 01:41:01 +0000972 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +0000973 headers, checks that the full pathname is used.
974
975 Args:
976 filename: The name of the C++ header file.
977 lines: An array of strings, each representing a line of the file.
978 error: The function to call with any errors found.
979 """
980
981 cppvar = GetHeaderGuardCPPVariable(filename)
982
983 ifndef = None
984 ifndef_linenum = 0
985 define = None
986 endif = None
987 endif_linenum = 0
988 for linenum, line in enumerate(lines):
989 linesplit = line.split()
990 if len(linesplit) >= 2:
991 # find the first occurrence of #ifndef and #define, save arg
992 if not ifndef and linesplit[0] == '#ifndef':
993 # set ifndef to the header guard presented on the #ifndef line.
994 ifndef = linesplit[1]
995 ifndef_linenum = linenum
996 if not define and linesplit[0] == '#define':
997 define = linesplit[1]
998 # find the last occurrence of #endif, save entire line
999 if line.startswith('#endif'):
1000 endif = line
1001 endif_linenum = linenum
1002
1003 if not ifndef or not define or ifndef != define:
1004 error(filename, 0, 'build/header_guard', 5,
1005 'No #ifndef header guard found, suggested CPP variable is: %s' %
1006 cppvar)
1007 return
1008
1009 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1010 # for backward compatibility.
erg@google.coma868d2d2009-10-09 21:18:45 +00001011 if ifndef != cppvar and not Search(r'\bNOLINT\b', lines[ifndef_linenum]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001012 error_level = 0
1013 if ifndef != cppvar + '_':
1014 error_level = 5
1015
1016 error(filename, ifndef_linenum, 'build/header_guard', error_level,
1017 '#ifndef header guard has wrong style, please use: %s' % cppvar)
1018
erg@google.coma868d2d2009-10-09 21:18:45 +00001019 if (endif != ('#endif // %s' % cppvar) and
1020 not Search(r'\bNOLINT\b', lines[endif_linenum])):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001021 error_level = 0
1022 if endif != ('#endif // %s' % (cppvar + '_')):
1023 error_level = 5
1024
1025 error(filename, endif_linenum, 'build/header_guard', error_level,
1026 '#endif line should be "#endif // %s"' % cppvar)
1027
1028
1029def CheckForUnicodeReplacementCharacters(filename, lines, error):
1030 """Logs an error for each line containing Unicode replacement characters.
1031
1032 These indicate that either the file contained invalid UTF-8 (likely)
1033 or Unicode replacement characters (which it shouldn't). Note that
1034 it's possible for this to throw off line numbering if the invalid
1035 UTF-8 occurred adjacent to a newline.
1036
1037 Args:
1038 filename: The name of the current file.
1039 lines: An array of strings, each representing a line of the file.
1040 error: The function to call with any errors found.
1041 """
1042 for linenum, line in enumerate(lines):
1043 if u'\ufffd' in line:
1044 error(filename, linenum, 'readability/utf8', 5,
1045 'Line contains invalid UTF-8 (or Unicode replacement character).')
1046
1047
1048def CheckForNewlineAtEOF(filename, lines, error):
1049 """Logs an error if there is no newline char at the end of the file.
1050
1051 Args:
1052 filename: The name of the current file.
1053 lines: An array of strings, each representing a line of the file.
1054 error: The function to call with any errors found.
1055 """
1056
1057 # The array lines() was created by adding two newlines to the
1058 # original file (go figure), then splitting on \n.
1059 # To verify that the file ends in \n, we just have to make sure the
1060 # last-but-two element of lines() exists and is empty.
1061 if len(lines) < 3 or lines[-2]:
1062 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1063 'Could not find a newline character at the end of the file.')
1064
1065
1066def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1067 """Logs an error if we see /* ... */ or "..." that extend past one line.
1068
1069 /* ... */ comments are legit inside macros, for one line.
1070 Otherwise, we prefer // comments, so it's ok to warn about the
1071 other. Likewise, it's ok for strings to extend across multiple
1072 lines, as long as a line continuation character (backslash)
1073 terminates each line. Although not currently prohibited by the C++
1074 style guide, it's ugly and unnecessary. We don't do well with either
1075 in this lint program, so we warn about both.
1076
1077 Args:
1078 filename: The name of the current file.
1079 clean_lines: A CleansedLines instance containing the file.
1080 linenum: The number of the line to check.
1081 error: The function to call with any errors found.
1082 """
1083 line = clean_lines.elided[linenum]
1084
1085 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1086 # second (escaped) slash may trigger later \" detection erroneously.
1087 line = line.replace('\\\\', '')
1088
1089 if line.count('/*') > line.count('*/'):
1090 error(filename, linenum, 'readability/multiline_comment', 5,
1091 'Complex multi-line /*...*/-style comment found. '
1092 'Lint may give bogus warnings. '
1093 'Consider replacing these with //-style comments, '
1094 'with #if 0...#endif, '
1095 'or with more clearly structured multi-line comments.')
1096
1097 if (line.count('"') - line.count('\\"')) % 2:
1098 error(filename, linenum, 'readability/multiline_string', 5,
1099 'Multi-line string ("...") found. This lint script doesn\'t '
1100 'do well with such strings, and may give bogus warnings. They\'re '
1101 'ugly and unnecessary, and you should use concatenation instead".')
1102
1103
1104threading_list = (
1105 ('asctime(', 'asctime_r('),
1106 ('ctime(', 'ctime_r('),
1107 ('getgrgid(', 'getgrgid_r('),
1108 ('getgrnam(', 'getgrnam_r('),
1109 ('getlogin(', 'getlogin_r('),
1110 ('getpwnam(', 'getpwnam_r('),
1111 ('getpwuid(', 'getpwuid_r('),
1112 ('gmtime(', 'gmtime_r('),
1113 ('localtime(', 'localtime_r('),
1114 ('rand(', 'rand_r('),
1115 ('readdir(', 'readdir_r('),
1116 ('strtok(', 'strtok_r('),
1117 ('ttyname(', 'ttyname_r('),
1118 )
1119
1120
1121def CheckPosixThreading(filename, clean_lines, linenum, error):
1122 """Checks for calls to thread-unsafe functions.
1123
1124 Much code has been originally written without consideration of
1125 multi-threading. Also, engineers are relying on their old experience;
1126 they have learned posix before threading extensions were added. These
1127 tests guide the engineers to use thread-safe functions (when using
1128 posix directly).
1129
1130 Args:
1131 filename: The name of the current file.
1132 clean_lines: A CleansedLines instance containing the file.
1133 linenum: The number of the line to check.
1134 error: The function to call with any errors found.
1135 """
1136 line = clean_lines.elided[linenum]
1137 for single_thread_function, multithread_safe_function in threading_list:
1138 ix = line.find(single_thread_function)
erg@google.coma87abb82009-02-24 01:41:01 +00001139 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001140 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1141 line[ix - 1] not in ('_', '.', '>'))):
1142 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1143 'Consider using ' + multithread_safe_function +
1144 '...) instead of ' + single_thread_function +
1145 '...) for improved thread safety.')
1146
1147
erg@google.coma868d2d2009-10-09 21:18:45 +00001148# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com36649102009-03-25 21:18:36 +00001149# incrementing a value.
erg@google.coma868d2d2009-10-09 21:18:45 +00001150_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com36649102009-03-25 21:18:36 +00001151 r'^\s*\*\w+(\+\+|--);')
1152
1153
1154def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.coma868d2d2009-10-09 21:18:45 +00001155 """Checks for invalid increment *count++.
erg@google.com36649102009-03-25 21:18:36 +00001156
1157 For example following function:
1158 void increment_counter(int* count) {
1159 *count++;
1160 }
1161 is invalid, because it effectively does count++, moving pointer, and should
1162 be replaced with ++*count, (*count)++ or *count += 1.
1163
1164 Args:
1165 filename: The name of the current file.
1166 clean_lines: A CleansedLines instance containing the file.
1167 linenum: The number of the line to check.
1168 error: The function to call with any errors found.
1169 """
1170 line = clean_lines.elided[linenum]
erg@google.coma868d2d2009-10-09 21:18:45 +00001171 if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com36649102009-03-25 21:18:36 +00001172 error(filename, linenum, 'runtime/invalid_increment', 5,
1173 'Changing pointer instead of value (or unused value of operator*).')
1174
1175
erg@google.com4e00b9a2009-01-12 23:05:11 +00001176class _ClassInfo(object):
1177 """Stores information about a class."""
1178
1179 def __init__(self, name, linenum):
1180 self.name = name
1181 self.linenum = linenum
1182 self.seen_open_brace = False
1183 self.is_derived = False
1184 self.virtual_method_linenumber = None
1185 self.has_virtual_destructor = False
1186 self.brace_depth = 0
1187
1188
1189class _ClassState(object):
1190 """Holds the current state of the parse relating to class declarations.
1191
1192 It maintains a stack of _ClassInfos representing the parser's guess
1193 as to the current nesting of class declarations. The innermost class
1194 is at the top (back) of the stack. Typically, the stack will either
1195 be empty or have exactly one entry.
1196 """
1197
1198 def __init__(self):
1199 self.classinfo_stack = []
1200
1201 def CheckFinished(self, filename, error):
1202 """Checks that all classes have been completely parsed.
1203
1204 Call this when all lines in a file have been processed.
1205 Args:
1206 filename: The name of the current file.
1207 error: The function to call with any errors found.
1208 """
1209 if self.classinfo_stack:
1210 # Note: This test can result in false positives if #ifdef constructs
1211 # get in the way of brace matching. See the testBuildClass test in
1212 # cpplint_unittest.py for an example of this.
1213 error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
1214 'Failed to find complete declaration of class %s' %
1215 self.classinfo_stack[0].name)
1216
1217
1218def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1219 class_state, error):
1220 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1221
1222 Complain about several constructs which gcc-2 accepts, but which are
1223 not standard C++. Warning about these in lint is one way to ease the
1224 transition to new compilers.
1225 - put storage class first (e.g. "static const" instead of "const static").
1226 - "%lld" instead of %qd" in printf-type functions.
1227 - "%1$d" is non-standard in printf-type functions.
1228 - "\%" is an undefined character escape sequence.
1229 - text after #endif is not allowed.
1230 - invalid inner-style forward declaration.
1231 - >? and <? operators, and their >?= and <?= cousins.
1232 - classes with virtual methods need virtual destructors (compiler warning
1233 available, but not turned on yet.)
1234
erg@google.coma868d2d2009-10-09 21:18:45 +00001235 Additionally, check for constructor/destructor style violations and reference
1236 members, as it is very convenient to do so while checking for
1237 gcc-2 compliance.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001238
1239 Args:
1240 filename: The name of the current file.
1241 clean_lines: A CleansedLines instance containing the file.
1242 linenum: The number of the line to check.
1243 class_state: A _ClassState instance which maintains information about
1244 the current stack of nested class declarations being parsed.
1245 error: A callable to which errors are reported, which takes 4 arguments:
1246 filename, line number, error level, and message
1247 """
1248
1249 # Remove comments from the line, but leave in strings for now.
1250 line = clean_lines.lines[linenum]
1251
1252 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1253 error(filename, linenum, 'runtime/printf_format', 3,
1254 '%q in format strings is deprecated. Use %ll instead.')
1255
1256 if Search(r'printf\s*\(.*".*%\d+\$', line):
1257 error(filename, linenum, 'runtime/printf_format', 2,
1258 '%N$ formats are unconventional. Try rewriting to avoid them.')
1259
1260 # Remove escaped backslashes before looking for undefined escapes.
1261 line = line.replace('\\\\', '')
1262
1263 if Search(r'("|\').*\\(%|\[|\(|{)', line):
1264 error(filename, linenum, 'build/printf_format', 3,
1265 '%, [, (, and { are undefined character escapes. Unescape them.')
1266
1267 # For the rest, work with both comments and strings removed.
1268 line = clean_lines.elided[linenum]
1269
1270 if Search(r'\b(const|volatile|void|char|short|int|long'
1271 r'|float|double|signed|unsigned'
1272 r'|schar|u?int8|u?int16|u?int32|u?int64)'
1273 r'\s+(auto|register|static|extern|typedef)\b',
1274 line):
1275 error(filename, linenum, 'build/storage_class', 5,
1276 'Storage class (static, extern, typedef, etc) should be first.')
1277
1278 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1279 error(filename, linenum, 'build/endif_comment', 5,
1280 'Uncommented text after #endif is non-standard. Use a comment.')
1281
1282 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1283 error(filename, linenum, 'build/forward_decl', 5,
1284 'Inner-style forward declarations are invalid. Remove this line.')
1285
1286 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1287 line):
1288 error(filename, linenum, 'build/deprecated', 3,
1289 '>? and <? (max and min) operators are non-standard and deprecated.')
1290
erg@google.coma868d2d2009-10-09 21:18:45 +00001291 if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
1292 # TODO(unknown): Could it be expanded safely to arbitrary references,
1293 # without triggering too many false positives? The first
1294 # attempt triggered 5 warnings for mostly benign code in the regtest, hence
1295 # the restriction.
1296 # Here's the original regexp, for the reference:
1297 # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
1298 # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
1299 error(filename, linenum, 'runtime/member_string_references', 2,
1300 'const string& members are dangerous. It is much better to use '
1301 'alternatives, such as pointers or simple constants.')
1302
erg@google.com4e00b9a2009-01-12 23:05:11 +00001303 # Track class entry and exit, and attempt to find cases within the
1304 # class declaration that don't meet the C++ style
1305 # guidelines. Tracking is very dependent on the code matching Google
1306 # style guidelines, but it seems to perform well enough in testing
1307 # to be a worthwhile addition to the checks.
1308 classinfo_stack = class_state.classinfo_stack
1309 # Look for a class declaration
1310 class_decl_match = Match(
1311 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1312 if class_decl_match:
1313 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
1314
1315 # Everything else in this function uses the top of the stack if it's
1316 # not empty.
1317 if not classinfo_stack:
1318 return
1319
1320 classinfo = classinfo_stack[-1]
1321
1322 # If the opening brace hasn't been seen look for it and also
1323 # parent class declarations.
1324 if not classinfo.seen_open_brace:
1325 # If the line has a ';' in it, assume it's a forward declaration or
1326 # a single-line class declaration, which we won't process.
1327 if line.find(';') != -1:
1328 classinfo_stack.pop()
1329 return
1330 classinfo.seen_open_brace = (line.find('{') != -1)
1331 # Look for a bare ':'
1332 if Search('(^|[^:]):($|[^:])', line):
1333 classinfo.is_derived = True
1334 if not classinfo.seen_open_brace:
1335 return # Everything else in this function is for after open brace
1336
1337 # The class may have been declared with namespace or classname qualifiers.
1338 # The constructor and destructor will not have those qualifiers.
1339 base_classname = classinfo.name.split('::')[-1]
1340
1341 # Look for single-argument constructors that aren't marked explicit.
1342 # Technically a valid construct, but against style.
1343 args = Match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1344 % re.escape(base_classname),
1345 line)
1346 if (args and
1347 args.group(1) != 'void' and
1348 not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1349 args.group(1).strip())):
1350 error(filename, linenum, 'runtime/explicit', 5,
1351 'Single-argument constructors should be marked explicit.')
1352
1353 # Look for methods declared virtual.
1354 if Search(r'\bvirtual\b', line):
1355 classinfo.virtual_method_linenumber = linenum
1356 # Only look for a destructor declaration on the same line. It would
1357 # be extremely unlikely for the destructor declaration to occupy
1358 # more than one line.
1359 if Search(r'~%s\s*\(' % base_classname, line):
1360 classinfo.has_virtual_destructor = True
1361
1362 # Look for class end.
1363 brace_depth = classinfo.brace_depth
1364 brace_depth = brace_depth + line.count('{') - line.count('}')
1365 if brace_depth <= 0:
1366 classinfo = classinfo_stack.pop()
1367 # Try to detect missing virtual destructor declarations.
1368 # For now, only warn if a non-derived class with virtual methods lacks
1369 # a virtual destructor. This is to make it less likely that people will
1370 # declare derived virtual destructors without declaring the base
1371 # destructor virtual.
1372 if ((classinfo.virtual_method_linenumber is not None) and
1373 (not classinfo.has_virtual_destructor) and
1374 (not classinfo.is_derived)): # Only warn for base classes
1375 error(filename, classinfo.linenum, 'runtime/virtual', 4,
1376 'The class %s probably needs a virtual destructor due to '
1377 'having virtual method(s), one declared at line %d.'
1378 % (classinfo.name, classinfo.virtual_method_linenumber))
1379 else:
1380 classinfo.brace_depth = brace_depth
1381
1382
1383def CheckSpacingForFunctionCall(filename, line, linenum, error):
1384 """Checks for the correctness of various spacing around function calls.
1385
1386 Args:
1387 filename: The name of the current file.
1388 line: The text of the line to check.
1389 linenum: The number of the line to check.
1390 error: The function to call with any errors found.
1391 """
1392
1393 # Since function calls often occur inside if/for/while/switch
1394 # expressions - which have their own, more liberal conventions - we
1395 # first see if we should be looking inside such an expression for a
1396 # function call, to which we can apply more strict standards.
1397 fncall = line # if there's no control flow construct, look at whole line
1398 for pattern in (r'\bif\s*\((.*)\)\s*{',
1399 r'\bfor\s*\((.*)\)\s*{',
1400 r'\bwhile\s*\((.*)\)\s*[{;]',
1401 r'\bswitch\s*\((.*)\)\s*{'):
1402 match = Search(pattern, line)
1403 if match:
1404 fncall = match.group(1) # look inside the parens for function calls
1405 break
1406
1407 # Except in if/for/while/switch, there should never be space
1408 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
1409 # for nested parens ( (a+b) + c ). Likewise, there should never be
1410 # a space before a ( when it's a function argument. I assume it's a
1411 # function argument when the char before the whitespace is legal in
1412 # a function name (alnum + _) and we're not starting a macro. Also ignore
1413 # pointers and references to arrays and functions coz they're too tricky:
1414 # we use a very simple way to recognize these:
1415 # " (something)(maybe-something)" or
1416 # " (something)(maybe-something," or
1417 # " (something)[something]"
1418 # Note that we assume the contents of [] to be short enough that
1419 # they'll never need to wrap.
1420 if ( # Ignore control structures.
1421 not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1422 # Ignore pointers/references to functions.
1423 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1424 # Ignore pointers/references to arrays.
1425 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com36649102009-03-25 21:18:36 +00001426 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
erg@google.com4e00b9a2009-01-12 23:05:11 +00001427 error(filename, linenum, 'whitespace/parens', 4,
1428 'Extra space after ( in function call')
erg@google.com36649102009-03-25 21:18:36 +00001429 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001430 error(filename, linenum, 'whitespace/parens', 2,
1431 'Extra space after (')
1432 if (Search(r'\w\s+\(', fncall) and
1433 not Search(r'#\s*define|typedef', fncall)):
1434 error(filename, linenum, 'whitespace/parens', 4,
1435 'Extra space before ( in function call')
1436 # If the ) is followed only by a newline or a { + newline, assume it's
1437 # part of a control statement (if/while/etc), and don't complain
1438 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1439 error(filename, linenum, 'whitespace/parens', 2,
1440 'Extra space before )')
1441
1442
1443def IsBlankLine(line):
1444 """Returns true if the given line is blank.
1445
1446 We consider a line to be blank if the line is empty or consists of
1447 only white spaces.
1448
1449 Args:
1450 line: A line of a string.
1451
1452 Returns:
1453 True, if the given line is blank.
1454 """
1455 return not line or line.isspace()
1456
1457
1458def CheckForFunctionLengths(filename, clean_lines, linenum,
1459 function_state, error):
1460 """Reports for long function bodies.
1461
1462 For an overview why this is done, see:
1463 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1464
1465 Uses a simplistic algorithm assuming other style guidelines
1466 (especially spacing) are followed.
1467 Only checks unindented functions, so class members are unchecked.
1468 Trivial bodies are unchecked, so constructors with huge initializer lists
1469 may be missed.
1470 Blank/comment lines are not counted so as to avoid encouraging the removal
1471 of vertical space and commments just to get through a lint check.
1472 NOLINT *on the last line of a function* disables this check.
1473
1474 Args:
1475 filename: The name of the current file.
1476 clean_lines: A CleansedLines instance containing the file.
1477 linenum: The number of the line to check.
1478 function_state: Current function name and lines in body so far.
1479 error: The function to call with any errors found.
1480 """
1481 lines = clean_lines.lines
1482 line = lines[linenum]
1483 raw = clean_lines.raw_lines
1484 raw_line = raw[linenum]
1485 joined_line = ''
1486
1487 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00001488 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00001489 match_result = Match(regexp, line)
1490 if match_result:
1491 # If the name is all caps and underscores, figure it's a macro and
1492 # ignore it, unless it's TEST or TEST_F.
1493 function_name = match_result.group(1).split()[-1]
1494 if function_name == 'TEST' or function_name == 'TEST_F' or (
1495 not Match(r'[A-Z_]+$', function_name)):
1496 starting_func = True
1497
1498 if starting_func:
1499 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00001500 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001501 start_line = lines[start_linenum]
1502 joined_line += ' ' + start_line.lstrip()
1503 if Search(r'(;|})', start_line): # Declarations and trivial functions
1504 body_found = True
1505 break # ... ignore
1506 elif Search(r'{', start_line):
1507 body_found = True
1508 function = Search(r'((\w|:)*)\(', line).group(1)
1509 if Match(r'TEST', function): # Handle TEST... macros
1510 parameter_regexp = Search(r'(\(.*\))', joined_line)
1511 if parameter_regexp: # Ignore bad syntax
1512 function += parameter_regexp.group(1)
1513 else:
1514 function += '()'
1515 function_state.Begin(function)
1516 break
1517 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00001518 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001519 error(filename, linenum, 'readability/fn_size', 5,
1520 'Lint failed to find start of function body.')
1521 elif Match(r'^\}\s*$', line): # function end
1522 if not Search(r'\bNOLINT\b', raw_line):
1523 function_state.Check(error, filename, linenum)
1524 function_state.End()
1525 elif not Match(r'^\s*$', line):
1526 function_state.Count() # Count non-blank/non-comment lines.
1527
1528
1529_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1530
1531
1532def CheckComment(comment, filename, linenum, error):
1533 """Checks for common mistakes in TODO comments.
1534
1535 Args:
1536 comment: The text of the comment from the line in question.
1537 filename: The name of the current file.
1538 linenum: The number of the line to check.
1539 error: The function to call with any errors found.
1540 """
1541 match = _RE_PATTERN_TODO.match(comment)
1542 if match:
1543 # One whitespace is correct; zero whitespace is handled elsewhere.
1544 leading_whitespace = match.group(1)
1545 if len(leading_whitespace) > 1:
1546 error(filename, linenum, 'whitespace/todo', 2,
1547 'Too many spaces before TODO')
1548
1549 username = match.group(2)
1550 if not username:
1551 error(filename, linenum, 'readability/todo', 2,
1552 'Missing username in TODO; it should look like '
1553 '"// TODO(my_username): Stuff."')
1554
1555 middle_whitespace = match.group(3)
erg@google.coma87abb82009-02-24 01:41:01 +00001556 # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001557 if middle_whitespace != ' ' and middle_whitespace != '':
1558 error(filename, linenum, 'whitespace/todo', 2,
1559 'TODO(my_username) should be followed by a space')
1560
1561
1562def CheckSpacing(filename, clean_lines, linenum, error):
1563 """Checks for the correctness of various spacing issues in the code.
1564
1565 Things we check for: spaces around operators, spaces after
1566 if/for/while/switch, no spaces around parens in function calls, two
1567 spaces between code and comment, don't start a block with a blank
1568 line, don't end a function with a blank line, don't have too many
1569 blank lines in a row.
1570
1571 Args:
1572 filename: The name of the current file.
1573 clean_lines: A CleansedLines instance containing the file.
1574 linenum: The number of the line to check.
1575 error: The function to call with any errors found.
1576 """
1577
1578 raw = clean_lines.raw_lines
1579 line = raw[linenum]
1580
1581 # Before nixing comments, check if the line is blank for no good
1582 # reason. This includes the first line after a block is opened, and
1583 # blank lines at the end of a function (ie, right before a line like '}'
1584 if IsBlankLine(line):
1585 elided = clean_lines.elided
1586 prev_line = elided[linenum - 1]
1587 prevbrace = prev_line.rfind('{')
1588 # TODO(unknown): Don't complain if line before blank line, and line after,
1589 # both start with alnums and are indented the same amount.
1590 # This ignores whitespace at the start of a namespace block
1591 # because those are not usually indented.
1592 if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
1593 and prev_line[:prevbrace].find('namespace') == -1):
1594 # OK, we have a blank line at the start of a code block. Before we
1595 # complain, we check if it is an exception to the rule: The previous
1596 # non-empty line has the paramters of a function header that are indented
1597 # 4 spaces (because they did not fit in a 80 column line when placed on
1598 # the same line as the function name). We also check for the case where
1599 # the previous line is indented 6 spaces, which may happen when the
1600 # initializers of a constructor do not fit into a 80 column line.
1601 exception = False
1602 if Match(r' {6}\w', prev_line): # Initializer list?
1603 # We are looking for the opening column of initializer list, which
1604 # should be indented 4 spaces to cause 6 space indentation afterwards.
1605 search_position = linenum-2
1606 while (search_position >= 0
1607 and Match(r' {6}\w', elided[search_position])):
1608 search_position -= 1
1609 exception = (search_position >= 0
1610 and elided[search_position][:5] == ' :')
1611 else:
1612 # Search for the function arguments or an initializer list. We use a
1613 # simple heuristic here: If the line is indented 4 spaces; and we have a
1614 # closing paren, without the opening paren, followed by an opening brace
1615 # or colon (for initializer lists) we assume that it is the last line of
1616 # a function header. If we have a colon indented 4 spaces, it is an
1617 # initializer list.
1618 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1619 prev_line)
1620 or Match(r' {4}:', prev_line))
1621
1622 if not exception:
1623 error(filename, linenum, 'whitespace/blank_line', 2,
1624 'Blank line at the start of a code block. Is this needed?')
1625 # This doesn't ignore whitespace at the end of a namespace block
1626 # because that is too hard without pairing open/close braces;
1627 # however, a special exception is made for namespace closing
1628 # brackets which have a comment containing "namespace".
1629 #
1630 # Also, ignore blank lines at the end of a block in a long if-else
1631 # chain, like this:
1632 # if (condition1) {
1633 # // Something followed by a blank line
1634 #
1635 # } else if (condition2) {
1636 # // Something else
1637 # }
1638 if linenum + 1 < clean_lines.NumLines():
1639 next_line = raw[linenum + 1]
1640 if (next_line
1641 and Match(r'\s*}', next_line)
1642 and next_line.find('namespace') == -1
1643 and next_line.find('} else ') == -1):
1644 error(filename, linenum, 'whitespace/blank_line', 3,
1645 'Blank line at the end of a code block. Is this needed?')
1646
1647 # Next, we complain if there's a comment too near the text
1648 commentpos = line.find('//')
1649 if commentpos != -1:
1650 # Check if the // may be in quotes. If so, ignore it
erg@google.coma87abb82009-02-24 01:41:01 +00001651 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001652 if (line.count('"', 0, commentpos) -
1653 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
1654 # Allow one space for new scopes, two spaces otherwise:
1655 if (not Match(r'^\s*{ //', line) and
1656 ((commentpos >= 1 and
1657 line[commentpos-1] not in string.whitespace) or
1658 (commentpos >= 2 and
1659 line[commentpos-2] not in string.whitespace))):
1660 error(filename, linenum, 'whitespace/comments', 2,
1661 'At least two spaces is best between code and comments')
1662 # There should always be a space between the // and the comment
1663 commentend = commentpos + 2
1664 if commentend < len(line) and not line[commentend] == ' ':
1665 # but some lines are exceptions -- e.g. if they're big
1666 # comment delimiters like:
1667 # //----------------------------------------------------------
erg@google.come35f7652009-06-19 20:52:09 +00001668 # or they begin with multiple slashes followed by a space:
1669 # //////// Header comment
1670 match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
1671 Search(r'^/+ ', line[commentend:]))
erg@google.com4e00b9a2009-01-12 23:05:11 +00001672 if not match:
1673 error(filename, linenum, 'whitespace/comments', 4,
1674 'Should have a space between // and comment')
1675 CheckComment(line[commentpos:], filename, linenum, error)
1676
1677 line = clean_lines.elided[linenum] # get rid of comments and strings
1678
1679 # Don't try to do spacing checks for operator methods
1680 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1681
1682 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1683 # Otherwise not. Note we only check for non-spaces on *both* sides;
1684 # sometimes people put non-spaces on one side when aligning ='s among
1685 # many lines (not that this is behavior that I approve of...)
1686 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
1687 error(filename, linenum, 'whitespace/operators', 4,
1688 'Missing spaces around =')
1689
1690 # It's ok not to have spaces around binary operators like + - * /, but if
1691 # there's too little whitespace, we get concerned. It's hard to tell,
1692 # though, so we punt on this one for now. TODO.
1693
1694 # You should always have whitespace around binary operators.
1695 # Alas, we can't test < or > because they're legitimately used sans spaces
1696 # (a->b, vector<int> a). The only time we can tell is a < with no >, and
1697 # only if it's not template params list spilling into the next line.
1698 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1699 if not match:
1700 # Note that while it seems that the '<[^<]*' term in the following
1701 # regexp could be simplified to '<.*', which would indeed match
1702 # the same class of strings, the [^<] means that searching for the
1703 # regexp takes linear rather than quadratic time.
1704 if not Search(r'<[^<]*,\s*$', line): # template params spill
1705 match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1706 if match:
1707 error(filename, linenum, 'whitespace/operators', 3,
1708 'Missing spaces around %s' % match.group(1))
1709 # We allow no-spaces around << and >> when used like this: 10<<20, but
1710 # not otherwise (particularly, not when used as streams)
1711 match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1712 if match:
1713 error(filename, linenum, 'whitespace/operators', 3,
1714 'Missing spaces around %s' % match.group(1))
1715
1716 # There shouldn't be space around unary operators
1717 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1718 if match:
1719 error(filename, linenum, 'whitespace/operators', 4,
1720 'Extra space for operator %s' % match.group(1))
1721
1722 # A pet peeve of mine: no spaces after an if, while, switch, or for
1723 match = Search(r' (if\(|for\(|while\(|switch\()', line)
1724 if match:
1725 error(filename, linenum, 'whitespace/parens', 5,
1726 'Missing space before ( in %s' % match.group(1))
1727
1728 # For if/for/while/switch, the left and right parens should be
1729 # consistent about how many spaces are inside the parens, and
1730 # there should either be zero or one spaces inside the parens.
1731 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.come35f7652009-06-19 20:52:09 +00001732 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001733 match = Search(r'\b(if|for|while|switch)\s*'
1734 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1735 line)
1736 if match:
1737 if len(match.group(2)) != len(match.group(4)):
1738 if not (match.group(3) == ';' and
erg@google.come35f7652009-06-19 20:52:09 +00001739 len(match.group(2)) == 1 + len(match.group(4)) or
1740 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001741 error(filename, linenum, 'whitespace/parens', 5,
1742 'Mismatching spaces inside () in %s' % match.group(1))
1743 if not len(match.group(2)) in [0, 1]:
1744 error(filename, linenum, 'whitespace/parens', 5,
1745 'Should have zero or one spaces inside ( and ) in %s' %
1746 match.group(1))
1747
1748 # You should always have a space after a comma (either as fn arg or operator)
1749 if Search(r',[^\s]', line):
1750 error(filename, linenum, 'whitespace/comma', 3,
1751 'Missing space after ,')
1752
1753 # Next we will look for issues with function calls.
1754 CheckSpacingForFunctionCall(filename, line, linenum, error)
1755
1756 # Except after an opening paren, you should have spaces before your braces.
1757 # And since you should never have braces at the beginning of a line, this is
1758 # an easy test.
1759 if Search(r'[^ (]{', line):
1760 error(filename, linenum, 'whitespace/braces', 5,
1761 'Missing space before {')
1762
1763 # Make sure '} else {' has spaces.
1764 if Search(r'}else', line):
1765 error(filename, linenum, 'whitespace/braces', 5,
1766 'Missing space before else')
1767
1768 # You shouldn't have spaces before your brackets, except maybe after
1769 # 'delete []' or 'new char * []'.
1770 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
1771 error(filename, linenum, 'whitespace/braces', 5,
1772 'Extra space before [')
1773
1774 # You shouldn't have a space before a semicolon at the end of the line.
1775 # There's a special case for "for" since the style guide allows space before
1776 # the semicolon there.
1777 if Search(r':\s*;\s*$', line):
1778 error(filename, linenum, 'whitespace/semicolon', 5,
1779 'Semicolon defining empty statement. Use { } instead.')
1780 elif Search(r'^\s*;\s*$', line):
1781 error(filename, linenum, 'whitespace/semicolon', 5,
1782 'Line contains only semicolon. If this should be an empty statement, '
1783 'use { } instead.')
1784 elif (Search(r'\s+;\s*$', line) and
1785 not Search(r'\bfor\b', line)):
1786 error(filename, linenum, 'whitespace/semicolon', 5,
1787 'Extra space before last semicolon. If this should be an empty '
1788 'statement, use { } instead.')
1789
1790
1791def GetPreviousNonBlankLine(clean_lines, linenum):
1792 """Return the most recent non-blank line and its line number.
1793
1794 Args:
1795 clean_lines: A CleansedLines instance containing the file contents.
1796 linenum: The number of the line to check.
1797
1798 Returns:
1799 A tuple with two elements. The first element is the contents of the last
1800 non-blank line before the current line, or the empty string if this is the
1801 first non-blank line. The second is the line number of that line, or -1
1802 if this is the first non-blank line.
1803 """
1804
1805 prevlinenum = linenum - 1
1806 while prevlinenum >= 0:
1807 prevline = clean_lines.elided[prevlinenum]
1808 if not IsBlankLine(prevline): # if not a blank line...
1809 return (prevline, prevlinenum)
1810 prevlinenum -= 1
1811 return ('', -1)
1812
1813
1814def CheckBraces(filename, clean_lines, linenum, error):
1815 """Looks for misplaced braces (e.g. at the end of line).
1816
1817 Args:
1818 filename: The name of the current file.
1819 clean_lines: A CleansedLines instance containing the file.
1820 linenum: The number of the line to check.
1821 error: The function to call with any errors found.
1822 """
1823
1824 line = clean_lines.elided[linenum] # get rid of comments and strings
1825
1826 if Match(r'\s*{\s*$', line):
1827 # We allow an open brace to start a line in the case where someone
1828 # is using braces in a block to explicitly create a new scope,
1829 # which is commonly used to control the lifetime of
1830 # stack-allocated variables. We don't detect this perfectly: we
1831 # just don't complain if the last non-whitespace character on the
1832 # previous non-blank line is ';', ':', '{', or '}'.
1833 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1834 if not Search(r'[;:}{]\s*$', prevline):
1835 error(filename, linenum, 'whitespace/braces', 4,
1836 '{ should almost always be at the end of the previous line')
1837
1838 # An else clause should be on the same line as the preceding closing brace.
1839 if Match(r'\s*else\s*', line):
1840 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1841 if Match(r'\s*}\s*$', prevline):
1842 error(filename, linenum, 'whitespace/newline', 4,
1843 'An else should appear on the same line as the preceding }')
1844
1845 # If braces come on one side of an else, they should be on both.
1846 # However, we have to worry about "else if" that spans multiple lines!
1847 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
1848 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
1849 # find the ( after the if
1850 pos = line.find('else if')
1851 pos = line.find('(', pos)
1852 if pos > 0:
1853 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
1854 if endline[endpos:].find('{') == -1: # must be brace after if
1855 error(filename, linenum, 'readability/braces', 5,
1856 'If an else has a brace on one side, it should have it on both')
1857 else: # common case: else not followed by a multi-line if
1858 error(filename, linenum, 'readability/braces', 5,
1859 'If an else has a brace on one side, it should have it on both')
1860
1861 # Likewise, an else should never have the else clause on the same line
1862 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
1863 error(filename, linenum, 'whitespace/newline', 4,
1864 'Else clause should never be on same line as else (use 2 lines)')
1865
1866 # In the same way, a do/while should never be on one line
1867 if Match(r'\s*do [^\s{]', line):
1868 error(filename, linenum, 'whitespace/newline', 4,
1869 'do/while clauses should not be on a single line')
1870
1871 # Braces shouldn't be followed by a ; unless they're defining a struct
1872 # or initializing an array.
1873 # We can't tell in general, but we can for some common cases.
1874 prevlinenum = linenum
1875 while True:
1876 (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
1877 if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
1878 line = prevline + line
1879 else:
1880 break
1881 if (Search(r'{.*}\s*;', line) and
1882 line.count('{') == line.count('}') and
1883 not Search(r'struct|class|enum|\s*=\s*{', line)):
1884 error(filename, linenum, 'readability/braces', 4,
1885 "You don't need a ; after a }")
1886
1887
1888def ReplaceableCheck(operator, macro, line):
1889 """Determine whether a basic CHECK can be replaced with a more specific one.
1890
1891 For example suggest using CHECK_EQ instead of CHECK(a == b) and
1892 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1893
1894 Args:
1895 operator: The C++ operator used in the CHECK.
1896 macro: The CHECK or EXPECT macro being called.
1897 line: The current source line.
1898
1899 Returns:
1900 True if the CHECK can be replaced with a more specific one.
1901 """
1902
1903 # This matches decimal and hex integers, strings, and chars (in that order).
1904 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
1905
1906 # Expression to match two sides of the operator with something that
1907 # looks like a literal, since CHECK(x == iterator) won't compile.
1908 # This means we can't catch all the cases where a more specific
1909 # CHECK is possible, but it's less annoying than dealing with
1910 # extraneous warnings.
1911 match_this = (r'\s*' + macro + r'\((\s*' +
1912 match_constant + r'\s*' + operator + r'[^<>].*|'
1913 r'.*[^<>]' + operator + r'\s*' + match_constant +
1914 r'\s*\))')
1915
1916 # Don't complain about CHECK(x == NULL) or similar because
1917 # CHECK_EQ(x, NULL) won't compile (requires a cast).
1918 # Also, don't complain about more complex boolean expressions
1919 # involving && or || such as CHECK(a == b || c == d).
1920 return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
1921
1922
1923def CheckCheck(filename, clean_lines, linenum, error):
1924 """Checks the use of CHECK and EXPECT macros.
1925
1926 Args:
1927 filename: The name of the current file.
1928 clean_lines: A CleansedLines instance containing the file.
1929 linenum: The number of the line to check.
1930 error: The function to call with any errors found.
1931 """
1932
1933 # Decide the set of replacement macros that should be suggested
1934 raw_lines = clean_lines.raw_lines
1935 current_macro = ''
1936 for macro in _CHECK_MACROS:
1937 if raw_lines[linenum].find(macro) >= 0:
1938 current_macro = macro
1939 break
1940 if not current_macro:
1941 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
1942 return
1943
1944 line = clean_lines.elided[linenum] # get rid of comments and strings
1945
1946 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
1947 for operator in ['==', '!=', '>=', '>', '<=', '<']:
1948 if ReplaceableCheck(operator, current_macro, line):
1949 error(filename, linenum, 'readability/check', 2,
1950 'Consider using %s instead of %s(a %s b)' % (
1951 _CHECK_REPLACEMENT[current_macro][operator],
1952 current_macro, operator))
1953 break
1954
1955
1956def GetLineWidth(line):
1957 """Determines the width of the line in column positions.
1958
1959 Args:
1960 line: A string, which may be a Unicode string.
1961
1962 Returns:
1963 The width of the line in column positions, accounting for Unicode
1964 combining characters and wide characters.
1965 """
1966 if isinstance(line, unicode):
1967 width = 0
1968 for c in unicodedata.normalize('NFC', line):
1969 if unicodedata.east_asian_width(c) in ('W', 'F'):
1970 width += 2
1971 elif not unicodedata.combining(c):
1972 width += 1
1973 return width
1974 else:
1975 return len(line)
1976
1977
1978def CheckStyle(filename, clean_lines, linenum, file_extension, error):
1979 """Checks rules from the 'C++ style rules' section of cppguide.html.
1980
1981 Most of these rules are hard to test (naming, comment style), but we
1982 do what we can. In particular we check for 2-space indents, line lengths,
1983 tab usage, spaces inside code, etc.
1984
1985 Args:
1986 filename: The name of the current file.
1987 clean_lines: A CleansedLines instance containing the file.
1988 linenum: The number of the line to check.
1989 file_extension: The extension (without the dot) of the filename.
1990 error: The function to call with any errors found.
1991 """
1992
1993 raw_lines = clean_lines.raw_lines
1994 line = raw_lines[linenum]
1995
1996 if line.find('\t') != -1:
1997 error(filename, linenum, 'whitespace/tab', 1,
1998 'Tab found; better to use spaces')
1999
2000 # One or three blank spaces at the beginning of the line is weird; it's
2001 # hard to reconcile that with 2-space indents.
2002 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
2003 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
2004 # if(RLENGTH > 20) complain = 0;
2005 # if(match($0, " +(error|private|public|protected):")) complain = 0;
2006 # if(match(prev, "&& *$")) complain = 0;
2007 # if(match(prev, "\\|\\| *$")) complain = 0;
2008 # if(match(prev, "[\",=><] *$")) complain = 0;
2009 # if(match($0, " <<")) complain = 0;
2010 # if(match(prev, " +for \\(")) complain = 0;
2011 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
2012 initial_spaces = 0
2013 cleansed_line = clean_lines.elided[linenum]
2014 while initial_spaces < len(line) and line[initial_spaces] == ' ':
2015 initial_spaces += 1
2016 if line and line[-1].isspace():
2017 error(filename, linenum, 'whitespace/end_of_line', 4,
2018 'Line ends in whitespace. Consider deleting these extra spaces.')
2019 # There are certain situations we allow one space, notably for labels
2020 elif ((initial_spaces == 1 or initial_spaces == 3) and
2021 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
2022 error(filename, linenum, 'whitespace/indent', 3,
2023 'Weird number of spaces at line-start. '
2024 'Are you using a 2-space indent?')
2025 # Labels should always be indented at least one space.
2026 elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
2027 line):
2028 error(filename, linenum, 'whitespace/labels', 4,
2029 'Labels should always be indented at least one space. '
2030 'If this is a member-initializer list in a constructor, '
2031 'the colon should be on the line after the definition header.')
2032
2033 # Check if the line is a header guard.
2034 is_header_guard = False
2035 if file_extension == 'h':
2036 cppvar = GetHeaderGuardCPPVariable(filename)
2037 if (line.startswith('#ifndef %s' % cppvar) or
2038 line.startswith('#define %s' % cppvar) or
2039 line.startswith('#endif // %s' % cppvar)):
2040 is_header_guard = True
2041 # #include lines and header guards can be long, since there's no clean way to
2042 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00002043 #
2044 # URLs can be long too. It's possible to split these, but it makes them
2045 # harder to cut&paste.
2046 if (not line.startswith('#include') and not is_header_guard and
erg@google.com36649102009-03-25 21:18:36 +00002047 not Match(r'^\s*//.*http(s?)://\S*$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002048 line_width = GetLineWidth(line)
2049 if line_width > 100:
2050 error(filename, linenum, 'whitespace/line_length', 4,
2051 'Lines should very rarely be longer than 100 characters')
2052 elif line_width > 80:
2053 error(filename, linenum, 'whitespace/line_length', 2,
2054 'Lines should be <= 80 characters long')
2055
2056 if (cleansed_line.count(';') > 1 and
2057 # for loops are allowed two ;'s (and may run over two lines).
2058 cleansed_line.find('for') == -1 and
2059 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
2060 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
2061 # It's ok to have many commands in a switch case that fits in 1 line
2062 not ((cleansed_line.find('case ') != -1 or
2063 cleansed_line.find('default:') != -1) and
2064 cleansed_line.find('break;') != -1)):
2065 error(filename, linenum, 'whitespace/newline', 4,
2066 'More than one command on the same line')
2067
2068 # Some more style checks
2069 CheckBraces(filename, clean_lines, linenum, error)
2070 CheckSpacing(filename, clean_lines, linenum, error)
2071 CheckCheck(filename, clean_lines, linenum, error)
2072
2073
2074_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2075_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2076# Matches the first component of a filename delimited by -s and _s. That is:
2077# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2078# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
2079# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
2080# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
2081_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2082
2083
2084def _DropCommonSuffixes(filename):
2085 """Drops common suffixes like _test.cc or -inl.h from filename.
2086
2087 For example:
2088 >>> _DropCommonSuffixes('foo/foo-inl.h')
2089 'foo/foo'
2090 >>> _DropCommonSuffixes('foo/bar/foo.cc')
2091 'foo/bar/foo'
2092 >>> _DropCommonSuffixes('foo/foo_internal.h')
2093 'foo/foo'
2094 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
2095 'foo/foo_unusualinternal'
2096
2097 Args:
2098 filename: The input filename.
2099
2100 Returns:
2101 The filename with the common suffix removed.
2102 """
2103 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
2104 'inl.h', 'impl.h', 'internal.h'):
2105 if (filename.endswith(suffix) and len(filename) > len(suffix) and
2106 filename[-len(suffix) - 1] in ('-', '_')):
2107 return filename[:-len(suffix) - 1]
2108 return os.path.splitext(filename)[0]
2109
2110
2111def _IsTestFilename(filename):
2112 """Determines if the given filename has a suffix that identifies it as a test.
2113
2114 Args:
2115 filename: The input filename.
2116
2117 Returns:
2118 True if 'filename' looks like a test, False otherwise.
2119 """
2120 if (filename.endswith('_test.cc') or
2121 filename.endswith('_unittest.cc') or
2122 filename.endswith('_regtest.cc')):
2123 return True
2124 else:
2125 return False
2126
2127
2128def _ClassifyInclude(fileinfo, include, is_system):
2129 """Figures out what kind of header 'include' is.
2130
2131 Args:
2132 fileinfo: The current file cpplint is running over. A FileInfo instance.
2133 include: The path to a #included file.
2134 is_system: True if the #include used <> rather than "".
2135
2136 Returns:
2137 One of the _XXX_HEADER constants.
2138
2139 For example:
2140 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
2141 _C_SYS_HEADER
2142 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
2143 _CPP_SYS_HEADER
2144 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
2145 _LIKELY_MY_HEADER
2146 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
2147 ... 'bar/foo_other_ext.h', False)
2148 _POSSIBLE_MY_HEADER
2149 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
2150 _OTHER_HEADER
2151 """
2152 # This is a list of all standard c++ header files, except
2153 # those already checked for above.
2154 is_stl_h = include in _STL_HEADERS
2155 is_cpp_h = is_stl_h or include in _CPP_HEADERS
2156
2157 if is_system:
2158 if is_cpp_h:
2159 return _CPP_SYS_HEADER
2160 else:
2161 return _C_SYS_HEADER
2162
2163 # If the target file and the include we're checking share a
2164 # basename when we drop common extensions, and the include
2165 # lives in . , then it's likely to be owned by the target file.
2166 target_dir, target_base = (
2167 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
2168 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
2169 if target_base == include_base and (
2170 include_dir == target_dir or
2171 include_dir == os.path.normpath(target_dir + '/../public')):
2172 return _LIKELY_MY_HEADER
2173
2174 # If the target and include share some initial basename
2175 # component, it's possible the target is implementing the
2176 # include, so it's allowed to be first, but we'll never
2177 # complain if it's not there.
2178 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
2179 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
2180 if (target_first_component and include_first_component and
2181 target_first_component.group(0) ==
2182 include_first_component.group(0)):
2183 return _POSSIBLE_MY_HEADER
2184
2185 return _OTHER_HEADER
2186
2187
erg@google.coma87abb82009-02-24 01:41:01 +00002188
erg@google.come35f7652009-06-19 20:52:09 +00002189def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
2190 """Check rules that are applicable to #include lines.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002191
erg@google.come35f7652009-06-19 20:52:09 +00002192 Strings on #include lines are NOT removed from elided line, to make
2193 certain tasks easier. However, to prevent false positives, checks
2194 applicable to #include lines in CheckLanguage must be put here.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002195
2196 Args:
2197 filename: The name of the current file.
2198 clean_lines: A CleansedLines instance containing the file.
2199 linenum: The number of the line to check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002200 include_state: An _IncludeState instance in which the headers are inserted.
2201 error: The function to call with any errors found.
2202 """
2203 fileinfo = FileInfo(filename)
2204
erg@google.come35f7652009-06-19 20:52:09 +00002205 line = clean_lines.lines[linenum]
erg@google.com4e00b9a2009-01-12 23:05:11 +00002206
2207 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.come35f7652009-06-19 20:52:09 +00002208 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002209 error(filename, linenum, 'build/include', 4,
2210 'Include the directory when naming .h files')
2211
2212 # we shouldn't include a file more than once. actually, there are a
2213 # handful of instances where doing so is okay, but in general it's
2214 # not.
erg@google.come35f7652009-06-19 20:52:09 +00002215 match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002216 if match:
2217 include = match.group(2)
2218 is_system = (match.group(1) == '<')
2219 if include in include_state:
2220 error(filename, linenum, 'build/include', 4,
2221 '"%s" already included at %s:%s' %
2222 (include, filename, include_state[include]))
2223 else:
2224 include_state[include] = linenum
2225
2226 # We want to ensure that headers appear in the right order:
2227 # 1) for foo.cc, foo.h (preferred location)
2228 # 2) c system files
2229 # 3) cpp system files
2230 # 4) for foo.cc, foo.h (deprecated location)
2231 # 5) other google headers
2232 #
2233 # We classify each include statement as one of those 5 types
2234 # using a number of techniques. The include_state object keeps
2235 # track of the highest type seen, and complains if we see a
2236 # lower type after that.
2237 error_message = include_state.CheckNextIncludeOrder(
2238 _ClassifyInclude(fileinfo, include, is_system))
2239 if error_message:
2240 error(filename, linenum, 'build/include_order', 4,
2241 '%s. Should be: %s.h, c system, c++ system, other.' %
2242 (error_message, fileinfo.BaseName()))
erg@google.coma868d2d2009-10-09 21:18:45 +00002243 if not include_state.IsInAlphabeticalOrder(include):
2244 error(filename, linenum, 'build/include_alpha', 4,
2245 'Include "%s" not in alphabetical order' % include)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002246
erg@google.come35f7652009-06-19 20:52:09 +00002247 # Look for any of the stream classes that are part of standard C++.
2248 match = _RE_PATTERN_INCLUDE.match(line)
2249 if match:
2250 include = match.group(2)
2251 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2252 # Many unit tests use cout, so we exempt them.
2253 if not _IsTestFilename(filename):
2254 error(filename, linenum, 'readability/streams', 3,
2255 'Streams are highly discouraged.')
2256
2257def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
2258 error):
2259 """Checks rules from the 'C++ language rules' section of cppguide.html.
2260
2261 Some of these rules are hard to test (function overloading, using
2262 uint32 inappropriately), but we do the best we can.
2263
2264 Args:
2265 filename: The name of the current file.
2266 clean_lines: A CleansedLines instance containing the file.
2267 linenum: The number of the line to check.
2268 file_extension: The extension (without the dot) of the filename.
2269 include_state: An _IncludeState instance in which the headers are inserted.
2270 error: The function to call with any errors found.
2271 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00002272 # If the line is empty or consists of entirely a comment, no need to
2273 # check it.
2274 line = clean_lines.elided[linenum]
2275 if not line:
2276 return
2277
erg@google.come35f7652009-06-19 20:52:09 +00002278 match = _RE_PATTERN_INCLUDE.search(line)
2279 if match:
2280 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
2281 return
2282
erg@google.com4e00b9a2009-01-12 23:05:11 +00002283 # Create an extended_line, which is the concatenation of the current and
2284 # next lines, for more effective checking of code that may span more than one
2285 # line.
2286 if linenum + 1 < clean_lines.NumLines():
2287 extended_line = line + clean_lines.elided[linenum + 1]
2288 else:
2289 extended_line = line
2290
2291 # Make Windows paths like Unix.
2292 fullname = os.path.abspath(filename).replace('\\', '/')
2293
2294 # TODO(unknown): figure out if they're using default arguments in fn proto.
2295
erg@google.com4e00b9a2009-01-12 23:05:11 +00002296 # Check for non-const references in functions. This is tricky because &
2297 # is also used to take the address of something. We allow <> for templates,
2298 # (ignoring whatever is between the braces) and : for classes.
2299 # These are complicated re's. They try to capture the following:
2300 # paren (for fn-prototype start), typename, &, varname. For the const
2301 # version, we're willing for const to be before typename or after
2302 # Don't check the implemention on same line.
2303 fnline = line.split('{', 1)[0]
2304 if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
2305 len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
2306 r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
2307 len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
2308 fnline))):
2309
2310 # We allow non-const references in a few standard places, like functions
2311 # called "swap()" or iostream operators like "<<" or ">>".
2312 if not Search(
2313 r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
2314 fnline):
2315 error(filename, linenum, 'runtime/references', 2,
2316 'Is this a non-const reference? '
2317 'If so, make const or use a pointer.')
2318
2319 # Check to see if they're using an conversion function cast.
2320 # I just try to capture the most common basic types, though there are more.
2321 # Parameterless conversion functions, such as bool(), are allowed as they are
2322 # probably a member operator declaration or default constructor.
2323 match = Search(
erg@google.coma868d2d2009-10-09 21:18:45 +00002324 r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
2325 r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002326 if match:
2327 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2328 # where type may be float(), int(string), etc. Without context they are
2329 # virtually indistinguishable from int(x) casts.
erg@google.coma868d2d2009-10-09 21:18:45 +00002330 if (match.group(1) is None and # If new operator, then this isn't a cast
2331 not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002332 error(filename, linenum, 'readability/casting', 4,
2333 'Using deprecated casting style. '
2334 'Use static_cast<%s>(...) instead' %
erg@google.coma868d2d2009-10-09 21:18:45 +00002335 match.group(2))
erg@google.com4e00b9a2009-01-12 23:05:11 +00002336
2337 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2338 'static_cast',
2339 r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2340 error)
2341 # This doesn't catch all cases. Consider (const char * const)"hello".
2342 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2343 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2344
2345 # In addition, we look for people taking the address of a cast. This
2346 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2347 # point where you think.
2348 if Search(
2349 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2350 error(filename, linenum, 'runtime/casting', 4,
2351 ('Are you taking an address of a cast? '
2352 'This is dangerous: could be a temp var. '
2353 'Take the address before doing the cast, rather than after'))
2354
2355 # Check for people declaring static/global STL strings at the top level.
2356 # This is dangerous because the C++ language does not guarantee that
2357 # globals with constructors are initialized before the first access.
2358 match = Match(
2359 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2360 line)
2361 # Make sure it's not a function.
2362 # Function template specialization looks like: "string foo<Type>(...".
2363 # Class template definitions look like: "string Foo<Type>::Method(...".
2364 if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2365 match.group(3)):
2366 error(filename, linenum, 'runtime/string', 4,
2367 'For a static/global string constant, use a C style string instead: '
2368 '"%schar %s[]".' %
2369 (match.group(1), match.group(2)))
2370
2371 # Check that we're not using RTTI outside of testing code.
2372 if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
2373 error(filename, linenum, 'runtime/rtti', 5,
2374 'Do not use dynamic_cast<>. If you need to cast within a class '
2375 "hierarchy, use static_cast<> to upcast. Google doesn't support "
2376 'RTTI.')
2377
2378 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2379 error(filename, linenum, 'runtime/init', 4,
2380 'You seem to be initializing a member variable with itself.')
2381
2382 if file_extension == 'h':
2383 # TODO(unknown): check that 1-arg constructors are explicit.
2384 # How to tell it's a constructor?
2385 # (handled in CheckForNonStandardConstructs for now)
2386 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
2387 # (level 1 error)
2388 pass
2389
2390 # Check if people are using the verboten C basic types. The only exception
2391 # we regularly allow is "unsigned short port" for port.
2392 if Search(r'\bshort port\b', line):
2393 if not Search(r'\bunsigned short port\b', line):
2394 error(filename, linenum, 'runtime/int', 4,
2395 'Use "unsigned short" for ports, not "short"')
2396 else:
2397 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
2398 if match:
2399 error(filename, linenum, 'runtime/int', 4,
2400 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
2401
2402 # When snprintf is used, the second argument shouldn't be a literal.
2403 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2404 if match:
2405 error(filename, linenum, 'runtime/printf', 3,
2406 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2407 'to snprintf.' % (match.group(1), match.group(2)))
2408
2409 # Check if some verboten C functions are being used.
2410 if Search(r'\bsprintf\b', line):
2411 error(filename, linenum, 'runtime/printf', 5,
2412 'Never use sprintf. Use snprintf instead.')
2413 match = Search(r'\b(strcpy|strcat)\b', line)
2414 if match:
2415 error(filename, linenum, 'runtime/printf', 4,
2416 'Almost always, snprintf is better than %s' % match.group(1))
2417
2418 if Search(r'\bsscanf\b', line):
2419 error(filename, linenum, 'runtime/printf', 1,
2420 'sscanf can be ok, but is slow and can overflow buffers.')
2421
erg@google.coma868d2d2009-10-09 21:18:45 +00002422 # Check if some verboten operator overloading is going on
2423 # TODO(unknown): catch out-of-line unary operator&:
2424 # class X {};
2425 # int operator&(const X& x) { return 42; } // unary operator&
2426 # The trick is it's hard to tell apart from binary operator&:
2427 # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
2428 if Search(r'\boperator\s*&\s*\(\s*\)', line):
2429 error(filename, linenum, 'runtime/operator', 4,
2430 'Unary operator& is dangerous. Do not use it.')
2431
erg@google.com4e00b9a2009-01-12 23:05:11 +00002432 # Check for suspicious usage of "if" like
2433 # } if (a == b) {
2434 if Search(r'\}\s*if\s*\(', line):
2435 error(filename, linenum, 'readability/braces', 4,
2436 'Did you mean "else if"? If not, start a new line for "if".')
2437
2438 # Check for potential format string bugs like printf(foo).
2439 # We constrain the pattern not to pick things like DocidForPrintf(foo).
2440 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2441 match = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2442 if match:
2443 error(filename, linenum, 'runtime/printf', 4,
2444 'Potential format string bug. Do %s("%%s", %s) instead.'
2445 % (match.group(1), match.group(2)))
2446
2447 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2448 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2449 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
2450 error(filename, linenum, 'runtime/memset', 4,
2451 'Did you mean "memset(%s, 0, %s)"?'
2452 % (match.group(1), match.group(2)))
2453
2454 if Search(r'\busing namespace\b', line):
2455 error(filename, linenum, 'build/namespaces', 5,
2456 'Do not use namespace using-directives. '
2457 'Use using-declarations instead.')
2458
2459 # Detect variable-length arrays.
2460 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2461 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
2462 match.group(3).find(']') == -1):
2463 # Split the size using space and arithmetic operators as delimiters.
2464 # If any of the resulting tokens are not compile time constants then
2465 # report the error.
2466 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2467 is_const = True
2468 skip_next = False
2469 for tok in tokens:
2470 if skip_next:
2471 skip_next = False
2472 continue
2473
2474 if Search(r'sizeof\(.+\)', tok): continue
2475 if Search(r'arraysize\(\w+\)', tok): continue
2476
2477 tok = tok.lstrip('(')
2478 tok = tok.rstrip(')')
2479 if not tok: continue
2480 if Match(r'\d+', tok): continue
2481 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
2482 if Match(r'k[A-Z0-9]\w*', tok): continue
2483 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
2484 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
2485 # A catch all for tricky sizeof cases, including 'sizeof expression',
2486 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2487 # requires skipping the next token becasue we split on ' ' and '*'.
2488 if tok.startswith('sizeof'):
2489 skip_next = True
2490 continue
2491 is_const = False
2492 break
2493 if not is_const:
2494 error(filename, linenum, 'runtime/arrays', 1,
2495 'Do not use variable-length arrays. Use an appropriately named '
2496 "('k' followed by CamelCase) compile-time constant for the size.")
2497
2498 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
2499 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
2500 # in the class declaration.
2501 match = Match(
2502 (r'\s*'
2503 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
2504 r'\(.*\);$'),
2505 line)
2506 if match and linenum + 1 < clean_lines.NumLines():
2507 next_line = clean_lines.elided[linenum + 1]
2508 if not Search(r'^\s*};', next_line):
2509 error(filename, linenum, 'readability/constructors', 3,
2510 match.group(1) + ' should be the last thing in the class')
2511
2512 # Check for use of unnamed namespaces in header files. Registration
2513 # macros are typically OK, so we allow use of "namespace {" on lines
2514 # that end with backslashes.
2515 if (file_extension == 'h'
2516 and Search(r'\bnamespace\s*{', line)
2517 and line[-1] != '\\'):
2518 error(filename, linenum, 'build/namespaces', 4,
2519 'Do not use unnamed namespaces in header files. See '
2520 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2521 ' for more information.')
2522
2523
2524def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
2525 error):
2526 """Checks for a C-style cast by looking for the pattern.
2527
2528 This also handles sizeof(type) warnings, due to similarity of content.
2529
2530 Args:
2531 filename: The name of the current file.
2532 linenum: The number of the line to check.
2533 line: The line of code to check.
2534 raw_line: The raw line of code to check, with comments.
2535 cast_type: The string for the C++ cast to recommend. This is either
2536 reinterpret_cast or static_cast, depending.
2537 pattern: The regular expression used to find C-style casts.
2538 error: The function to call with any errors found.
2539 """
2540 match = Search(pattern, line)
2541 if not match:
2542 return
2543
2544 # e.g., sizeof(int)
2545 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
2546 if sizeof_match:
2547 error(filename, linenum, 'runtime/sizeof', 1,
2548 'Using sizeof(type). Use sizeof(varname) instead if possible')
2549 return
2550
2551 remainder = line[match.end(0):]
2552
2553 # The close paren is for function pointers as arguments to a function.
2554 # eg, void foo(void (*bar)(int));
2555 # The semicolon check is a more basic function check; also possibly a
2556 # function pointer typedef.
2557 # eg, void foo(int); or void foo(int) const;
2558 # The equals check is for function pointer assignment.
2559 # eg, void *(*foo)(int) = ...
2560 #
2561 # Right now, this will only catch cases where there's a single argument, and
2562 # it's unnamed. It should probably be expanded to check for multiple
2563 # arguments with some unnamed.
2564 function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2565 if function_match:
2566 if (not function_match.group(3) or
2567 function_match.group(3) == ';' or
2568 raw_line.find('/*') < 0):
2569 error(filename, linenum, 'readability/function', 3,
2570 'All parameters should be named in a function')
2571 return
2572
2573 # At this point, all that should be left is actual casts.
2574 error(filename, linenum, 'readability/casting', 4,
2575 'Using C-style cast. Use %s<%s>(...) instead' %
2576 (cast_type, match.group(1)))
2577
2578
2579_HEADERS_CONTAINING_TEMPLATES = (
2580 ('<deque>', ('deque',)),
2581 ('<functional>', ('unary_function', 'binary_function',
2582 'plus', 'minus', 'multiplies', 'divides', 'modulus',
2583 'negate',
2584 'equal_to', 'not_equal_to', 'greater', 'less',
2585 'greater_equal', 'less_equal',
2586 'logical_and', 'logical_or', 'logical_not',
2587 'unary_negate', 'not1', 'binary_negate', 'not2',
2588 'bind1st', 'bind2nd',
2589 'pointer_to_unary_function',
2590 'pointer_to_binary_function',
2591 'ptr_fun',
2592 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2593 'mem_fun_ref_t',
2594 'const_mem_fun_t', 'const_mem_fun1_t',
2595 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2596 'mem_fun_ref',
2597 )),
2598 ('<limits>', ('numeric_limits',)),
2599 ('<list>', ('list',)),
2600 ('<map>', ('map', 'multimap',)),
2601 ('<memory>', ('allocator',)),
2602 ('<queue>', ('queue', 'priority_queue',)),
2603 ('<set>', ('set', 'multiset',)),
2604 ('<stack>', ('stack',)),
2605 ('<string>', ('char_traits', 'basic_string',)),
2606 ('<utility>', ('pair',)),
2607 ('<vector>', ('vector',)),
2608
2609 # gcc extensions.
2610 # Note: std::hash is their hash, ::hash is our hash
2611 ('<hash_map>', ('hash_map', 'hash_multimap',)),
2612 ('<hash_set>', ('hash_set', 'hash_multiset',)),
2613 ('<slist>', ('slist',)),
2614 )
2615
2616_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2617 # We can trust with reasonable confidence that map gives us pair<>, too.
2618 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2619}
2620
2621_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2622
2623_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00002624for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2625 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002626 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2627 # type::max().
2628 _re_pattern_algorithm_header.append(
2629 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2630 _template,
2631 '<algorithm>'))
2632
2633_re_pattern_templates = []
2634for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2635 for _template in _templates:
2636 _re_pattern_templates.append(
2637 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2638 _template + '<>',
2639 _header))
2640
2641
erg@google.come35f7652009-06-19 20:52:09 +00002642def FilesBelongToSameModule(filename_cc, filename_h):
2643 """Check if these two filenames belong to the same module.
2644
2645 The concept of a 'module' here is a as follows:
2646 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
2647 same 'module' if they are in the same directory.
2648 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
2649 to belong to the same module here.
2650
2651 If the filename_cc contains a longer path than the filename_h, for example,
2652 '/absolute/path/to/base/sysinfo.cc', and this file would include
2653 'base/sysinfo.h', this function also produces the prefix needed to open the
2654 header. This is used by the caller of this function to more robustly open the
2655 header file. We don't have access to the real include paths in this context,
2656 so we need this guesswork here.
2657
2658 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
2659 according to this implementation. Because of this, this function gives
2660 some false positives. This should be sufficiently rare in practice.
2661
2662 Args:
2663 filename_cc: is the path for the .cc file
2664 filename_h: is the path for the header path
2665
2666 Returns:
2667 Tuple with a bool and a string:
2668 bool: True if filename_cc and filename_h belong to the same module.
2669 string: the additional prefix needed to open the header file.
2670 """
2671
2672 if not filename_cc.endswith('.cc'):
2673 return (False, '')
2674 filename_cc = filename_cc[:-len('.cc')]
2675 if filename_cc.endswith('_unittest'):
2676 filename_cc = filename_cc[:-len('_unittest')]
2677 elif filename_cc.endswith('_test'):
2678 filename_cc = filename_cc[:-len('_test')]
2679 filename_cc = filename_cc.replace('/public/', '/')
2680 filename_cc = filename_cc.replace('/internal/', '/')
2681
2682 if not filename_h.endswith('.h'):
2683 return (False, '')
2684 filename_h = filename_h[:-len('.h')]
2685 if filename_h.endswith('-inl'):
2686 filename_h = filename_h[:-len('-inl')]
2687 filename_h = filename_h.replace('/public/', '/')
2688 filename_h = filename_h.replace('/internal/', '/')
2689
2690 files_belong_to_same_module = filename_cc.endswith(filename_h)
2691 common_path = ''
2692 if files_belong_to_same_module:
2693 common_path = filename_cc[:-len(filename_h)]
2694 return files_belong_to_same_module, common_path
2695
2696
2697def UpdateIncludeState(filename, include_state, io=codecs):
2698 """Fill up the include_state with new includes found from the file.
2699
2700 Args:
2701 filename: the name of the header to read.
2702 include_state: an _IncludeState instance in which the headers are inserted.
2703 io: The io factory to use to read the file. Provided for testability.
2704
2705 Returns:
2706 True if a header was succesfully added. False otherwise.
2707 """
2708 headerfile = None
2709 try:
2710 headerfile = io.open(filename, 'r', 'utf8', 'replace')
2711 except IOError:
2712 return False
2713 linenum = 0
2714 for line in headerfile:
2715 linenum += 1
2716 clean_line = CleanseComments(line)
2717 match = _RE_PATTERN_INCLUDE.search(clean_line)
2718 if match:
2719 include = match.group(2)
2720 # The value formatting is cute, but not really used right now.
2721 # What matters here is that the key is in include_state.
2722 include_state.setdefault(include, '%s:%d' % (filename, linenum))
2723 return True
2724
2725
2726def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
2727 io=codecs):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002728 """Reports for missing stl includes.
2729
2730 This function will output warnings to make sure you are including the headers
2731 necessary for the stl containers and functions that you use. We only give one
2732 reason to include a header. For example, if you use both equal_to<> and
2733 less<> in a .h file, only one (the latter in the file) of these will be
2734 reported as a reason to include the <functional>.
2735
erg@google.com4e00b9a2009-01-12 23:05:11 +00002736 Args:
2737 filename: The name of the current file.
2738 clean_lines: A CleansedLines instance containing the file.
2739 include_state: An _IncludeState instance.
2740 error: The function to call with any errors found.
erg@google.come35f7652009-06-19 20:52:09 +00002741 io: The IO factory to use to read the header file. Provided for unittest
2742 injection.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002743 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00002744 required = {} # A map of header name to linenumber and the template entity.
2745 # Example of required: { '<functional>': (1219, 'less<>') }
2746
2747 for linenum in xrange(clean_lines.NumLines()):
2748 line = clean_lines.elided[linenum]
2749 if not line or line[0] == '#':
2750 continue
2751
2752 # String is special -- it is a non-templatized type in STL.
2753 if _RE_PATTERN_STRING.search(line):
2754 required['<string>'] = (linenum, 'string')
2755
2756 for pattern, template, header in _re_pattern_algorithm_header:
2757 if pattern.search(line):
2758 required[header] = (linenum, template)
2759
2760 # The following function is just a speed up, no semantics are changed.
2761 if not '<' in line: # Reduces the cpu time usage by skipping lines.
2762 continue
2763
2764 for pattern, template, header in _re_pattern_templates:
2765 if pattern.search(line):
2766 required[header] = (linenum, template)
2767
erg@google.come35f7652009-06-19 20:52:09 +00002768 # The policy is that if you #include something in foo.h you don't need to
2769 # include it again in foo.cc. Here, we will look at possible includes.
2770 # Let's copy the include_state so it is only messed up within this function.
2771 include_state = include_state.copy()
2772
2773 # Did we find the header for this file (if any) and succesfully load it?
2774 header_found = False
2775
2776 # Use the absolute path so that matching works properly.
2777 abs_filename = os.path.abspath(filename)
2778
2779 # For Emacs's flymake.
2780 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
2781 # by flymake and that file name might end with '_flymake.cc'. In that case,
2782 # restore original file name here so that the corresponding header file can be
2783 # found.
2784 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
2785 # instead of 'foo_flymake.h'
2786 emacs_flymake_suffix = '_flymake.cc'
2787 if abs_filename.endswith(emacs_flymake_suffix):
2788 abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cc'
2789
2790 # include_state is modified during iteration, so we iterate over a copy of
2791 # the keys.
2792 for header in include_state.keys(): #NOLINT
2793 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
2794 fullpath = common_path + header
2795 if same_module and UpdateIncludeState(fullpath, include_state, io):
2796 header_found = True
2797
2798 # If we can't find the header file for a .cc, assume it's because we don't
2799 # know where to look. In that case we'll give up as we're not sure they
2800 # didn't include it in the .h file.
2801 # TODO(unknown): Do a better job of finding .h files so we are confident that
2802 # not having the .h file means there isn't one.
2803 if filename.endswith('.cc') and not header_found:
2804 return
2805
erg@google.com4e00b9a2009-01-12 23:05:11 +00002806 # All the lines have been processed, report the errors found.
2807 for required_header_unstripped in required:
2808 template = required[required_header_unstripped][1]
2809 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2810 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2811 if [True for header in headers if header in include_state]:
2812 continue
2813 if required_header_unstripped.strip('<>"') not in include_state:
2814 error(filename, required[required_header_unstripped][0],
2815 'build/include_what_you_use', 4,
2816 'Add #include ' + required_header_unstripped + ' for ' + template)
2817
2818
2819def ProcessLine(filename, file_extension,
2820 clean_lines, line, include_state, function_state,
2821 class_state, error):
2822 """Processes a single line in the file.
2823
2824 Args:
2825 filename: Filename of the file that is being processed.
2826 file_extension: The extension (dot not included) of the file.
2827 clean_lines: An array of strings, each representing a line of the file,
2828 with comments stripped.
2829 line: Number of line being processed.
2830 include_state: An _IncludeState instance in which the headers are inserted.
2831 function_state: A _FunctionState instance which counts function lines, etc.
2832 class_state: A _ClassState instance which maintains information about
2833 the current stack of nested class declarations being parsed.
2834 error: A callable to which errors are reported, which takes 4 arguments:
2835 filename, line number, error level, and message
2836
2837 """
2838 raw_lines = clean_lines.raw_lines
2839 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
2840 if Search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines
2841 return
2842 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
2843 CheckStyle(filename, clean_lines, line, file_extension, error)
2844 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
2845 error)
2846 CheckForNonStandardConstructs(filename, clean_lines, line,
2847 class_state, error)
2848 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com36649102009-03-25 21:18:36 +00002849 CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002850
2851
2852def ProcessFileData(filename, file_extension, lines, error):
2853 """Performs lint checks and reports any errors to the given error function.
2854
2855 Args:
2856 filename: Filename of the file that is being processed.
2857 file_extension: The extension (dot not included) of the file.
2858 lines: An array of strings, each representing a line of the file, with the
2859 last element being empty if the file is termined with a newline.
2860 error: A callable to which errors are reported, which takes 4 arguments:
2861 """
2862 lines = (['// marker so line numbers and indices both start at 1'] + lines +
2863 ['// marker so line numbers end in a known way'])
2864
2865 include_state = _IncludeState()
2866 function_state = _FunctionState()
2867 class_state = _ClassState()
2868
2869 CheckForCopyright(filename, lines, error)
2870
2871 if file_extension == 'h':
2872 CheckForHeaderGuard(filename, lines, error)
2873
2874 RemoveMultiLineComments(filename, lines, error)
2875 clean_lines = CleansedLines(lines)
2876 for line in xrange(clean_lines.NumLines()):
2877 ProcessLine(filename, file_extension, clean_lines, line,
2878 include_state, function_state, class_state, error)
2879 class_state.CheckFinished(filename, error)
2880
2881 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
2882
2883 # We check here rather than inside ProcessLine so that we see raw
2884 # lines rather than "cleaned" lines.
2885 CheckForUnicodeReplacementCharacters(filename, lines, error)
2886
2887 CheckForNewlineAtEOF(filename, lines, error)
2888
2889
2890def ProcessFile(filename, vlevel):
2891 """Does google-lint on a single file.
2892
2893 Args:
2894 filename: The name of the file to parse.
2895
2896 vlevel: The level of errors to report. Every error of confidence
2897 >= verbose_level will be reported. 0 is a good default.
2898 """
2899
2900 _SetVerboseLevel(vlevel)
2901
2902 try:
2903 # Support the UNIX convention of using "-" for stdin. Note that
2904 # we are not opening the file with universal newline support
2905 # (which codecs doesn't support anyway), so the resulting lines do
2906 # contain trailing '\r' characters if we are reading a file that
2907 # has CRLF endings.
2908 # If after the split a trailing '\r' is present, it is removed
2909 # below. If it is not expected to be present (i.e. os.linesep !=
2910 # '\r\n' as in Windows), a warning is issued below if this file
2911 # is processed.
2912
2913 if filename == '-':
2914 lines = codecs.StreamReaderWriter(sys.stdin,
2915 codecs.getreader('utf8'),
2916 codecs.getwriter('utf8'),
2917 'replace').read().split('\n')
2918 else:
2919 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
2920
2921 carriage_return_found = False
2922 # Remove trailing '\r'.
2923 for linenum in range(len(lines)):
2924 if lines[linenum].endswith('\r'):
2925 lines[linenum] = lines[linenum].rstrip('\r')
2926 carriage_return_found = True
2927
2928 except IOError:
2929 sys.stderr.write(
2930 "Skipping input '%s': Can't open for reading\n" % filename)
2931 return
2932
2933 # Note, if no dot is found, this will give the entire filename as the ext.
2934 file_extension = filename[filename.rfind('.') + 1:]
2935
2936 # When reading from stdin, the extension is unknown, so no cpplint tests
2937 # should rely on the extension.
2938 if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
2939 and file_extension != 'cpp'):
2940 sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
2941 else:
2942 ProcessFileData(filename, file_extension, lines, Error)
2943 if carriage_return_found and os.linesep != '\r\n':
2944 # Use 0 for linenum since outputing only one error for potentially
2945 # several lines.
2946 Error(filename, 0, 'whitespace/newline', 1,
2947 'One or more unexpected \\r (^M) found;'
2948 'better to use only a \\n')
2949
2950 sys.stderr.write('Done processing %s\n' % filename)
2951
2952
2953def PrintUsage(message):
2954 """Prints a brief usage string and exits, optionally with an error message.
2955
2956 Args:
2957 message: The optional error message.
2958 """
2959 sys.stderr.write(_USAGE)
2960 if message:
2961 sys.exit('\nFATAL ERROR: ' + message)
2962 else:
2963 sys.exit(1)
2964
2965
2966def PrintCategories():
2967 """Prints a list of all the error-categories used by error messages.
2968
2969 These are the categories used to filter messages via --filter.
2970 """
2971 sys.stderr.write(_ERROR_CATEGORIES)
2972 sys.exit(0)
2973
2974
2975def ParseArguments(args):
2976 """Parses the command line arguments.
2977
2978 This may set the output format and verbosity level as side-effects.
2979
2980 Args:
2981 args: The command line arguments:
2982
2983 Returns:
2984 The list of filenames to lint.
2985 """
2986 try:
2987 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.coma868d2d2009-10-09 21:18:45 +00002988 'counting=',
erg@google.com4e00b9a2009-01-12 23:05:11 +00002989 'filter='])
2990 except getopt.GetoptError:
2991 PrintUsage('Invalid arguments.')
2992
2993 verbosity = _VerboseLevel()
2994 output_format = _OutputFormat()
2995 filters = ''
erg@google.coma868d2d2009-10-09 21:18:45 +00002996 counting_style = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +00002997
2998 for (opt, val) in opts:
2999 if opt == '--help':
3000 PrintUsage(None)
3001 elif opt == '--output':
3002 if not val in ('emacs', 'vs7'):
3003 PrintUsage('The only allowed output formats are emacs and vs7.')
3004 output_format = val
3005 elif opt == '--verbose':
3006 verbosity = int(val)
3007 elif opt == '--filter':
3008 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00003009 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00003010 PrintCategories()
erg@google.coma868d2d2009-10-09 21:18:45 +00003011 elif opt == '--counting':
3012 if val not in ('total', 'toplevel', 'detailed'):
3013 PrintUsage('Valid counting options are total, toplevel, and detailed')
3014 counting_style = val
erg@google.com4e00b9a2009-01-12 23:05:11 +00003015
3016 if not filenames:
3017 PrintUsage('No files were specified.')
3018
3019 _SetOutputFormat(output_format)
3020 _SetVerboseLevel(verbosity)
3021 _SetFilters(filters)
erg@google.coma868d2d2009-10-09 21:18:45 +00003022 _SetCountingStyle(counting_style)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003023
3024 return filenames
3025
3026
3027def main():
3028 filenames = ParseArguments(sys.argv[1:])
3029
3030 # Change stderr to write with replacement characters so we don't die
3031 # if we try to print something containing non-ASCII characters.
3032 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
3033 codecs.getreader('utf8'),
3034 codecs.getwriter('utf8'),
3035 'replace')
3036
erg@google.coma868d2d2009-10-09 21:18:45 +00003037 _cpplint_state.ResetErrorCounts()
erg@google.com4e00b9a2009-01-12 23:05:11 +00003038 for filename in filenames:
3039 ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.coma868d2d2009-10-09 21:18:45 +00003040 _cpplint_state.PrintErrorCounts()
3041
erg@google.com4e00b9a2009-01-12 23:05:11 +00003042 sys.exit(_cpplint_state.error_count > 0)
3043
3044
3045if __name__ == '__main__':
3046 main()