blob: bc46f09709687637a9da631105c1c564052bb314 [file] [log] [blame]
erg@google.com4e00b9a2009-01-12 23:05:11 +00001#!/usr/bin/python2.4
2#
erg@google.com969161c2009-06-26 22:06:46 +00003# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004#
erg@google.com969161c2009-06-26 22:06:46 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
erg@google.com4e00b9a2009-01-12 23:05:11 +00008#
erg@google.com969161c2009-06-26 22:06:46 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
erg@google.com4e00b9a2009-01-12 23:05:11 +000018#
erg@google.com969161c2009-06-26 22:06:46 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com4e00b9a2009-01-12 23:05:11 +000030
31# Here are some issues that I've had people identify in my code during reviews,
32# that I think are possible to flag automatically in a lint tool. If these were
33# caught by lint, it would save time both for myself and that of my reviewers.
34# Most likely, some of these are beyond the scope of the current lint framework,
35# but I think it is valuable to retain these wish-list items even if they cannot
36# be immediately implemented.
37#
38# Suggestions
39# -----------
40# - Check for no 'explicit' for multi-arg ctor
41# - Check for boolean assign RHS in parens
42# - Check for ctor initializer-list colon position and spacing
43# - Check that if there's a ctor, there should be a dtor
44# - Check accessors that return non-pointer member variables are
45# declared const
46# - Check accessors that return non-const pointer member vars are
47# *not* declared const
48# - Check for using public includes for testing
49# - Check for spaces between brackets in one-line inline method
50# - Check for no assert()
51# - Check for spaces surrounding operators
52# - Check for 0 in pointer context (should be NULL)
53# - Check for 0 in char context (should be '\0')
54# - Check for camel-case method name conventions for methods
55# that are not simple inline getters and setters
56# - Check that base classes have virtual destructors
57# put " // namespace" after } that closes a namespace, with
58# namespace's name after 'namespace' if it is named.
59# - Do not indent namespace contents
60# - Avoid inlining non-trivial constructors in header files
61# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
62# - Check for old-school (void) cast for call-sites of functions
63# ignored return value
64# - Check gUnit usage of anonymous namespace
65# - Check for class declaration order (typedefs, consts, enums,
66# ctor(s?), dtor, friend declarations, methods, member vars)
67#
68
69"""Does google-lint on c++ files.
70
71The goal of this script is to identify places in the code that *may*
72be in non-compliance with google style. It does not attempt to fix
73up these problems -- the point is to educate. It does also not
74attempt to find all problems, or to ensure that everything it does
75find is legitimately a problem.
76
77In particular, we can get very confused by /* and // inside strings!
78We do a small hack, which is to ignore //'s with "'s after them on the
79same line, but it is far from perfect (in either direction).
80"""
81
82import codecs
83import getopt
84import math # for log
85import os
86import re
87import sre_compile
88import string
89import sys
90import unicodedata
91
92
93_USAGE = """
94Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
95 <file> [file] ...
96
97 The style guidelines this tries to follow are those in
98 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
99
100 Every problem is given a confidence score from 1-5, with 5 meaning we are
101 certain of the problem, and 1 meaning it could be a legitimate construct.
102 This will miss some errors, and is not a substitute for a code review.
103
104 To prevent specific lines from being linted, add a '// NOLINT' comment to the
105 end of the line.
106
107 The files passed in will be linted; at least one file must be provided.
108 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
109
110 Flags:
111
112 output=vs7
113 By default, the output is formatted to ease emacs parsing. Visual Studio
114 compatible output (vs7) may also be used. Other formats are unsupported.
115
116 verbose=#
117 Specify a number 0-5 to restrict errors to certain verbosity levels.
118
119 filter=-x,+y,...
120 Specify a comma-separated list of category-filters to apply: only
121 error messages whose category names pass the filters will be printed.
122 (Category names are printed with the message and look like
123 "[whitespace/indent]".) Filters are evaluated left to right.
124 "-FOO" and "FOO" means "do not print categories that start with FOO".
125 "+FOO" means "do print categories that start with FOO".
126
127 Examples: --filter=-whitespace,+whitespace/braces
128 --filter=whitespace,runtime/printf,+runtime/printf_format
129 --filter=-,+build/include_what_you_use
130
131 To see a list of all the categories used in cpplint, pass no arg:
132 --filter=
133"""
134
135# We categorize each error message we print. Here are the categories.
136# We want an explicit list so we can list them all in cpplint --filter=.
137# If you add a new error message with a new category, add it to the list
138# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.coma87abb82009-02-24 01:41:01 +0000139# \ used for clearer layout -- pylint: disable-msg=C6013
140_ERROR_CATEGORIES = '''\
erg@google.com4e00b9a2009-01-12 23:05:11 +0000141 build/class
142 build/deprecated
143 build/endif_comment
144 build/forward_decl
145 build/header_guard
146 build/include
147 build/include_order
148 build/include_what_you_use
149 build/namespaces
150 build/printf_format
151 build/storage_class
152 legal/copyright
153 readability/braces
154 readability/casting
155 readability/check
156 readability/constructors
157 readability/fn_size
158 readability/function
159 readability/multiline_comment
160 readability/multiline_string
161 readability/streams
162 readability/todo
163 readability/utf8
164 runtime/arrays
165 runtime/casting
166 runtime/explicit
167 runtime/int
168 runtime/init
erg@google.com36649102009-03-25 21:18:36 +0000169 runtime/invalid_increment
erg@google.com4e00b9a2009-01-12 23:05:11 +0000170 runtime/memset
171 runtime/printf
172 runtime/printf_format
173 runtime/references
174 runtime/rtti
175 runtime/sizeof
176 runtime/string
177 runtime/threadsafe_fn
178 runtime/virtual
179 whitespace/blank_line
180 whitespace/braces
181 whitespace/comma
182 whitespace/comments
183 whitespace/end_of_line
184 whitespace/ending_newline
185 whitespace/indent
186 whitespace/labels
187 whitespace/line_length
188 whitespace/newline
189 whitespace/operators
190 whitespace/parens
191 whitespace/semicolon
192 whitespace/tab
193 whitespace/todo
erg@google.coma87abb82009-02-24 01:41:01 +0000194'''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000195
erg@google.come35f7652009-06-19 20:52:09 +0000196# The default state of the category filter. This is overrided by the --filter=
197# flag. By default all errors are on, so only add here categories that should be
198# off by default (i.e., categories that must be enabled by the --filter= flags).
199# All entries here should start with a '-' or '+', as in the --filter= flag.
200_DEFAULT_FILTERS = []
201
erg@google.com4e00b9a2009-01-12 23:05:11 +0000202# We used to check for high-bit characters, but after much discussion we
203# decided those were OK, as long as they were in UTF-8 and didn't represent
204# hard-coded international strings, which belong in a seperate i18n file.
205
206# Headers that we consider STL headers.
207_STL_HEADERS = frozenset([
208 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
209 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
210 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
211 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
212 'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
213 'utility', 'vector', 'vector.h',
214 ])
215
216
217# Non-STL C++ system headers.
218_CPP_HEADERS = frozenset([
219 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
220 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
221 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
222 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
223 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
224 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
225 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
226 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
227 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
228 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
229 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
230 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
231 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
232 ])
233
234
235# Assertion macros. These are defined in base/logging.h and
236# testing/base/gunit.h. Note that the _M versions need to come first
237# for substring matching to work.
238_CHECK_MACROS = [
erg@google.come35f7652009-06-19 20:52:09 +0000239 'DCHECK', 'CHECK',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000240 'EXPECT_TRUE_M', 'EXPECT_TRUE',
241 'ASSERT_TRUE_M', 'ASSERT_TRUE',
242 'EXPECT_FALSE_M', 'EXPECT_FALSE',
243 'ASSERT_FALSE_M', 'ASSERT_FALSE',
244 ]
245
erg@google.come35f7652009-06-19 20:52:09 +0000246# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com4e00b9a2009-01-12 23:05:11 +0000247_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
248
249for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
250 ('>=', 'GE'), ('>', 'GT'),
251 ('<=', 'LE'), ('<', 'LT')]:
erg@google.come35f7652009-06-19 20:52:09 +0000252 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com4e00b9a2009-01-12 23:05:11 +0000253 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
254 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
255 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
256 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
257 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
258
259for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
260 ('>=', 'LT'), ('>', 'LE'),
261 ('<=', 'GT'), ('<', 'GE')]:
262 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
263 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
264 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
265 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
266
267
268# These constants define types of headers for use with
269# _IncludeState.CheckNextIncludeOrder().
270_C_SYS_HEADER = 1
271_CPP_SYS_HEADER = 2
272_LIKELY_MY_HEADER = 3
273_POSSIBLE_MY_HEADER = 4
274_OTHER_HEADER = 5
275
276
277_regexp_compile_cache = {}
278
279
280def Match(pattern, s):
281 """Matches the string with the pattern, caching the compiled regexp."""
282 # The regexp compilation caching is inlined in both Match and Search for
283 # performance reasons; factoring it out into a separate function turns out
284 # to be noticeably expensive.
285 if not pattern in _regexp_compile_cache:
286 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
287 return _regexp_compile_cache[pattern].match(s)
288
289
290def Search(pattern, s):
291 """Searches the string for the pattern, caching the compiled regexp."""
292 if not pattern in _regexp_compile_cache:
293 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
294 return _regexp_compile_cache[pattern].search(s)
295
296
297class _IncludeState(dict):
298 """Tracks line numbers for includes, and the order in which includes appear.
299
300 As a dict, an _IncludeState object serves as a mapping between include
301 filename and line number on which that file was included.
302
303 Call CheckNextIncludeOrder() once for each header in the file, passing
304 in the type constants defined above. Calls in an illegal order will
305 raise an _IncludeError with an appropriate error message.
306
307 """
308 # self._section will move monotonically through this set. If it ever
309 # needs to move backwards, CheckNextIncludeOrder will raise an error.
310 _INITIAL_SECTION = 0
311 _MY_H_SECTION = 1
312 _C_SECTION = 2
313 _CPP_SECTION = 3
314 _OTHER_H_SECTION = 4
315
316 _TYPE_NAMES = {
317 _C_SYS_HEADER: 'C system header',
318 _CPP_SYS_HEADER: 'C++ system header',
319 _LIKELY_MY_HEADER: 'header this file implements',
320 _POSSIBLE_MY_HEADER: 'header this file may implement',
321 _OTHER_HEADER: 'other header',
322 }
323 _SECTION_NAMES = {
324 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
325 _MY_H_SECTION: 'a header this file implements',
326 _C_SECTION: 'C system header',
327 _CPP_SECTION: 'C++ system header',
328 _OTHER_H_SECTION: 'other header',
329 }
330
331 def __init__(self):
332 dict.__init__(self)
333 self._section = self._INITIAL_SECTION
334
335 def CheckNextIncludeOrder(self, header_type):
336 """Returns a non-empty error message if the next header is out of order.
337
338 This function also updates the internal state to be ready to check
339 the next include.
340
341 Args:
342 header_type: One of the _XXX_HEADER constants defined above.
343
344 Returns:
345 The empty string if the header is in the right order, or an
346 error message describing what's wrong.
347
348 """
349 error_message = ('Found %s after %s' %
350 (self._TYPE_NAMES[header_type],
351 self._SECTION_NAMES[self._section]))
352
353 if header_type == _C_SYS_HEADER:
354 if self._section <= self._C_SECTION:
355 self._section = self._C_SECTION
356 else:
357 return error_message
358 elif header_type == _CPP_SYS_HEADER:
359 if self._section <= self._CPP_SECTION:
360 self._section = self._CPP_SECTION
361 else:
362 return error_message
363 elif header_type == _LIKELY_MY_HEADER:
364 if self._section <= self._MY_H_SECTION:
365 self._section = self._MY_H_SECTION
366 else:
367 self._section = self._OTHER_H_SECTION
368 elif header_type == _POSSIBLE_MY_HEADER:
369 if self._section <= self._MY_H_SECTION:
370 self._section = self._MY_H_SECTION
371 else:
372 # This will always be the fallback because we're not sure
373 # enough that the header is associated with this file.
374 self._section = self._OTHER_H_SECTION
375 else:
376 assert header_type == _OTHER_HEADER
377 self._section = self._OTHER_H_SECTION
378
379 return ''
380
381
382class _CppLintState(object):
383 """Maintains module-wide state.."""
384
385 def __init__(self):
386 self.verbose_level = 1 # global setting.
387 self.error_count = 0 # global count of reported errors
erg@google.come35f7652009-06-19 20:52:09 +0000388 # filters to apply when emitting error messages
389 self.filters = _DEFAULT_FILTERS[:]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000390
391 # output format:
392 # "emacs" - format that emacs can parse (default)
393 # "vs7" - format that Microsoft Visual Studio 7 can parse
394 self.output_format = 'emacs'
395
396 def SetOutputFormat(self, output_format):
397 """Sets the output format for errors."""
398 self.output_format = output_format
399
400 def SetVerboseLevel(self, level):
401 """Sets the module's verbosity, and returns the previous setting."""
402 last_verbose_level = self.verbose_level
403 self.verbose_level = level
404 return last_verbose_level
405
406 def SetFilters(self, filters):
407 """Sets the error-message filters.
408
409 These filters are applied when deciding whether to emit a given
410 error message.
411
412 Args:
413 filters: A string of comma-separated filters (eg "+whitespace/indent").
414 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000415
416 Raises:
417 ValueError: The comma-separated filters did not all start with '+' or '-'.
418 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000419 """
erg@google.come35f7652009-06-19 20:52:09 +0000420 # Default filters always have less priority than the flag ones.
421 self.filters = _DEFAULT_FILTERS[:]
422 for filt in filters.split(','):
423 clean_filt = filt.strip()
424 if clean_filt:
425 self.filters.append(clean_filt)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000426 for filt in self.filters:
427 if not (filt.startswith('+') or filt.startswith('-')):
428 raise ValueError('Every filter in --filters must start with + or -'
429 ' (%s does not)' % filt)
430
431 def ResetErrorCount(self):
432 """Sets the module's error statistic back to zero."""
433 self.error_count = 0
434
435 def IncrementErrorCount(self):
436 """Bumps the module's error statistic."""
437 self.error_count += 1
438
439
440_cpplint_state = _CppLintState()
441
442
443def _OutputFormat():
444 """Gets the module's output format."""
445 return _cpplint_state.output_format
446
447
448def _SetOutputFormat(output_format):
449 """Sets the module's output format."""
450 _cpplint_state.SetOutputFormat(output_format)
451
452
453def _VerboseLevel():
454 """Returns the module's verbosity setting."""
455 return _cpplint_state.verbose_level
456
457
458def _SetVerboseLevel(level):
459 """Sets the module's verbosity, and returns the previous setting."""
460 return _cpplint_state.SetVerboseLevel(level)
461
462
463def _Filters():
464 """Returns the module's list of output filters, as a list."""
465 return _cpplint_state.filters
466
467
468def _SetFilters(filters):
469 """Sets the module's error-message filters.
470
471 These filters are applied when deciding whether to emit a given
472 error message.
473
474 Args:
475 filters: A string of comma-separated filters (eg "whitespace/indent").
476 Each filter should start with + or -; else we die.
477 """
478 _cpplint_state.SetFilters(filters)
479
480
481class _FunctionState(object):
482 """Tracks current function name and the number of lines in its body."""
483
484 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
485 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
486
487 def __init__(self):
488 self.in_a_function = False
489 self.lines_in_function = 0
490 self.current_function = ''
491
492 def Begin(self, function_name):
493 """Start analyzing function body.
494
495 Args:
496 function_name: The name of the function being tracked.
497 """
498 self.in_a_function = True
499 self.lines_in_function = 0
500 self.current_function = function_name
501
502 def Count(self):
503 """Count line in current function body."""
504 if self.in_a_function:
505 self.lines_in_function += 1
506
507 def Check(self, error, filename, linenum):
508 """Report if too many lines in function body.
509
510 Args:
511 error: The function to call with any errors found.
512 filename: The name of the current file.
513 linenum: The number of the line to check.
514 """
515 if Match(r'T(EST|est)', self.current_function):
516 base_trigger = self._TEST_TRIGGER
517 else:
518 base_trigger = self._NORMAL_TRIGGER
519 trigger = base_trigger * 2**_VerboseLevel()
520
521 if self.lines_in_function > trigger:
522 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
523 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
524 if error_level > 5:
525 error_level = 5
526 error(filename, linenum, 'readability/fn_size', error_level,
527 'Small and focused functions are preferred:'
528 ' %s has %d non-comment lines'
529 ' (error triggered by exceeding %d lines).' % (
530 self.current_function, self.lines_in_function, trigger))
531
532 def End(self):
533 """Stop analizing function body."""
534 self.in_a_function = False
535
536
537class _IncludeError(Exception):
538 """Indicates a problem with the include order in a file."""
539 pass
540
541
542class FileInfo:
543 """Provides utility functions for filenames.
544
545 FileInfo provides easy access to the components of a file's path
546 relative to the project root.
547 """
548
549 def __init__(self, filename):
550 self._filename = filename
551
552 def FullName(self):
553 """Make Windows paths like Unix."""
554 return os.path.abspath(self._filename).replace('\\', '/')
555
556 def RepositoryName(self):
557 """FullName after removing the local path to the repository.
558
559 If we have a real absolute path name here we can try to do something smart:
560 detecting the root of the checkout and truncating /path/to/checkout from
561 the name so that we get header guards that don't include things like
562 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
563 people on different computers who have checked the source out to different
564 locations won't see bogus errors.
565 """
566 fullname = self.FullName()
567
568 if os.path.exists(fullname):
569 project_dir = os.path.dirname(fullname)
570
571 if os.path.exists(os.path.join(project_dir, ".svn")):
572 # If there's a .svn file in the current directory, we recursively look
573 # up the directory tree for the top of the SVN checkout
574 root_dir = project_dir
575 one_up_dir = os.path.dirname(root_dir)
576 while os.path.exists(os.path.join(one_up_dir, ".svn")):
577 root_dir = os.path.dirname(root_dir)
578 one_up_dir = os.path.dirname(one_up_dir)
579
580 prefix = os.path.commonprefix([root_dir, project_dir])
581 return fullname[len(prefix) + 1:]
582
583 # Not SVN? Try to find a git top level directory by searching up from the
584 # current path.
585 root_dir = os.path.dirname(fullname)
586 while (root_dir != os.path.dirname(root_dir) and
587 not os.path.exists(os.path.join(root_dir, ".git"))):
588 root_dir = os.path.dirname(root_dir)
589 if os.path.exists(os.path.join(root_dir, ".git")):
590 prefix = os.path.commonprefix([root_dir, project_dir])
591 return fullname[len(prefix) + 1:]
592
593 # Don't know what to do; header guard warnings may be wrong...
594 return fullname
595
596 def Split(self):
597 """Splits the file into the directory, basename, and extension.
598
599 For 'chrome/browser/browser.cc', Split() would
600 return ('chrome/browser', 'browser', '.cc')
601
602 Returns:
603 A tuple of (directory, basename, extension).
604 """
605
606 googlename = self.RepositoryName()
607 project, rest = os.path.split(googlename)
608 return (project,) + os.path.splitext(rest)
609
610 def BaseName(self):
611 """File base name - text after the final slash, before the final period."""
612 return self.Split()[1]
613
614 def Extension(self):
615 """File extension - text following the final period."""
616 return self.Split()[2]
617
618 def NoExtension(self):
619 """File has no source file extension."""
620 return '/'.join(self.Split()[0:2])
621
622 def IsSource(self):
623 """File has a source file extension."""
624 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
625
626
627def _ShouldPrintError(category, confidence):
628 """Returns true iff confidence >= verbose, and category passes filter."""
629 # There are two ways we might decide not to print an error message:
630 # the verbosity level isn't high enough, or the filters filter it out.
631 if confidence < _cpplint_state.verbose_level:
632 return False
633
634 is_filtered = False
635 for one_filter in _Filters():
636 if one_filter.startswith('-'):
637 if category.startswith(one_filter[1:]):
638 is_filtered = True
639 elif one_filter.startswith('+'):
640 if category.startswith(one_filter[1:]):
641 is_filtered = False
642 else:
643 assert False # should have been checked for in SetFilter.
644 if is_filtered:
645 return False
646
647 return True
648
649
650def Error(filename, linenum, category, confidence, message):
651 """Logs the fact we've found a lint error.
652
653 We log where the error was found, and also our confidence in the error,
654 that is, how certain we are this is a legitimate style regression, and
655 not a misidentification or a use that's sometimes justified.
656
657 Args:
658 filename: The name of the file containing the error.
659 linenum: The number of the line containing the error.
660 category: A string used to describe the "category" this bug
661 falls under: "whitespace", say, or "runtime". Categories
662 may have a hierarchy separated by slashes: "whitespace/indent".
663 confidence: A number from 1-5 representing a confidence score for
664 the error, with 5 meaning that we are certain of the problem,
665 and 1 meaning that it could be a legitimate construct.
666 message: The error message.
667 """
668 # There are two ways we might decide not to print an error message:
669 # the verbosity level isn't high enough, or the filters filter it out.
670 if _ShouldPrintError(category, confidence):
671 _cpplint_state.IncrementErrorCount()
672 if _cpplint_state.output_format == 'vs7':
673 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
674 filename, linenum, message, category, confidence))
675 else:
676 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
677 filename, linenum, message, category, confidence))
678
679
680# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
681_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
682 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
683# Matches strings. Escape codes should already be removed by ESCAPES.
684_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
685# Matches characters. Escape codes should already be removed by ESCAPES.
686_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
687# Matches multi-line C++ comments.
688# This RE is a little bit more complicated than one might expect, because we
689# have to take care of space removals tools so we can handle comments inside
690# statements better.
691# The current rule is: We only clear spaces from both sides when we're at the
692# end of the line. Otherwise, we try to remove spaces from the right side,
693# if this doesn't work we try on left side but only if there's a non-character
694# on the right.
695_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
696 r"""(\s*/\*.*\*/\s*$|
697 /\*.*\*/\s+|
698 \s+/\*.*\*/(?=\W)|
699 /\*.*\*/)""", re.VERBOSE)
700
701
702def IsCppString(line):
703 """Does line terminate so, that the next symbol is in string constant.
704
705 This function does not consider single-line nor multi-line comments.
706
707 Args:
708 line: is a partial line of code starting from the 0..n.
709
710 Returns:
711 True, if next character appended to 'line' is inside a
712 string constant.
713 """
714
715 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
716 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
717
718
719def FindNextMultiLineCommentStart(lines, lineix):
720 """Find the beginning marker for a multiline comment."""
721 while lineix < len(lines):
722 if lines[lineix].strip().startswith('/*'):
723 # Only return this marker if the comment goes beyond this line
724 if lines[lineix].strip().find('*/', 2) < 0:
725 return lineix
726 lineix += 1
727 return len(lines)
728
729
730def FindNextMultiLineCommentEnd(lines, lineix):
731 """We are inside a comment, find the end marker."""
732 while lineix < len(lines):
733 if lines[lineix].strip().endswith('*/'):
734 return lineix
735 lineix += 1
736 return len(lines)
737
738
739def RemoveMultiLineCommentsFromRange(lines, begin, end):
740 """Clears a range of lines for multi-line comments."""
741 # Having // dummy comments makes the lines non-empty, so we will not get
742 # unnecessary blank line warnings later in the code.
743 for i in range(begin, end):
744 lines[i] = '// dummy'
745
746
747def RemoveMultiLineComments(filename, lines, error):
748 """Removes multiline (c-style) comments from lines."""
749 lineix = 0
750 while lineix < len(lines):
751 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
752 if lineix_begin >= len(lines):
753 return
754 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
755 if lineix_end >= len(lines):
756 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
757 'Could not find end of multi-line comment')
758 return
759 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
760 lineix = lineix_end + 1
761
762
763def CleanseComments(line):
764 """Removes //-comments and single-line C-style /* */ comments.
765
766 Args:
767 line: A line of C++ source.
768
769 Returns:
770 The line with single-line comments removed.
771 """
772 commentpos = line.find('//')
773 if commentpos != -1 and not IsCppString(line[:commentpos]):
774 line = line[:commentpos]
775 # get rid of /* ... */
776 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
777
778
erg@google.coma87abb82009-02-24 01:41:01 +0000779class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000780 """Holds 3 copies of all lines with different preprocessing applied to them.
781
782 1) elided member contains lines without strings and comments,
783 2) lines member contains lines without comments, and
784 3) raw member contains all the lines without processing.
785 All these three members are of <type 'list'>, and of the same length.
786 """
787
788 def __init__(self, lines):
789 self.elided = []
790 self.lines = []
791 self.raw_lines = lines
792 self.num_lines = len(lines)
793 for linenum in range(len(lines)):
794 self.lines.append(CleanseComments(lines[linenum]))
795 elided = self._CollapseStrings(lines[linenum])
796 self.elided.append(CleanseComments(elided))
797
798 def NumLines(self):
799 """Returns the number of lines represented."""
800 return self.num_lines
801
802 @staticmethod
803 def _CollapseStrings(elided):
804 """Collapses strings and chars on a line to simple "" or '' blocks.
805
806 We nix strings first so we're not fooled by text like '"http://"'
807
808 Args:
809 elided: The line being processed.
810
811 Returns:
812 The line with collapsed strings.
813 """
814 if not _RE_PATTERN_INCLUDE.match(elided):
815 # Remove escaped characters first to make quote/single quote collapsing
816 # basic. Things that look like escaped characters shouldn't occur
817 # outside of strings and chars.
818 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
819 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
820 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
821 return elided
822
823
824def CloseExpression(clean_lines, linenum, pos):
825 """If input points to ( or { or [, finds the position that closes it.
826
827 If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
828 linenum/pos that correspond to the closing of the expression.
829
830 Args:
831 clean_lines: A CleansedLines instance containing the file.
832 linenum: The number of the line to check.
833 pos: A position on the line.
834
835 Returns:
836 A tuple (line, linenum, pos) pointer *past* the closing brace, or
837 (line, len(lines), -1) if we never find a close. Note we ignore
838 strings and comments when matching; and the line we return is the
839 'cleansed' line at linenum.
840 """
841
842 line = clean_lines.elided[linenum]
843 startchar = line[pos]
844 if startchar not in '({[':
845 return (line, clean_lines.NumLines(), -1)
846 if startchar == '(': endchar = ')'
847 if startchar == '[': endchar = ']'
848 if startchar == '{': endchar = '}'
849
850 num_open = line.count(startchar) - line.count(endchar)
851 while linenum < clean_lines.NumLines() and num_open > 0:
852 linenum += 1
853 line = clean_lines.elided[linenum]
854 num_open += line.count(startchar) - line.count(endchar)
855 # OK, now find the endchar that actually got us back to even
856 endpos = len(line)
857 while num_open >= 0:
858 endpos = line.rfind(')', 0, endpos)
859 num_open -= 1 # chopped off another )
860 return (line, linenum, endpos + 1)
861
862
863def CheckForCopyright(filename, lines, error):
864 """Logs an error if no Copyright message appears at the top of the file."""
865
866 # We'll say it should occur by line 10. Don't forget there's a
867 # dummy line at the front.
868 for line in xrange(1, min(len(lines), 11)):
869 if re.search(r'Copyright', lines[line], re.I): break
870 else: # means no copyright line was found
871 error(filename, 0, 'legal/copyright', 5,
872 'No copyright message found. '
873 'You should have a line: "Copyright [year] <Copyright Owner>"')
874
875
876def GetHeaderGuardCPPVariable(filename):
877 """Returns the CPP variable that should be used as a header guard.
878
879 Args:
880 filename: The name of a C++ header file.
881
882 Returns:
883 The CPP variable that should be used as a header guard in the
884 named file.
885
886 """
887
888 fileinfo = FileInfo(filename)
889 return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
890
891
892def CheckForHeaderGuard(filename, lines, error):
893 """Checks that the file contains a header guard.
894
erg@google.coma87abb82009-02-24 01:41:01 +0000895 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +0000896 headers, checks that the full pathname is used.
897
898 Args:
899 filename: The name of the C++ header file.
900 lines: An array of strings, each representing a line of the file.
901 error: The function to call with any errors found.
902 """
903
904 cppvar = GetHeaderGuardCPPVariable(filename)
905
906 ifndef = None
907 ifndef_linenum = 0
908 define = None
909 endif = None
910 endif_linenum = 0
911 for linenum, line in enumerate(lines):
912 linesplit = line.split()
913 if len(linesplit) >= 2:
914 # find the first occurrence of #ifndef and #define, save arg
915 if not ifndef and linesplit[0] == '#ifndef':
916 # set ifndef to the header guard presented on the #ifndef line.
917 ifndef = linesplit[1]
918 ifndef_linenum = linenum
919 if not define and linesplit[0] == '#define':
920 define = linesplit[1]
921 # find the last occurrence of #endif, save entire line
922 if line.startswith('#endif'):
923 endif = line
924 endif_linenum = linenum
925
926 if not ifndef or not define or ifndef != define:
927 error(filename, 0, 'build/header_guard', 5,
928 'No #ifndef header guard found, suggested CPP variable is: %s' %
929 cppvar)
930 return
931
932 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
933 # for backward compatibility.
934 if ifndef != cppvar:
935 error_level = 0
936 if ifndef != cppvar + '_':
937 error_level = 5
938
939 error(filename, ifndef_linenum, 'build/header_guard', error_level,
940 '#ifndef header guard has wrong style, please use: %s' % cppvar)
941
942 if endif != ('#endif // %s' % cppvar):
943 error_level = 0
944 if endif != ('#endif // %s' % (cppvar + '_')):
945 error_level = 5
946
947 error(filename, endif_linenum, 'build/header_guard', error_level,
948 '#endif line should be "#endif // %s"' % cppvar)
949
950
951def CheckForUnicodeReplacementCharacters(filename, lines, error):
952 """Logs an error for each line containing Unicode replacement characters.
953
954 These indicate that either the file contained invalid UTF-8 (likely)
955 or Unicode replacement characters (which it shouldn't). Note that
956 it's possible for this to throw off line numbering if the invalid
957 UTF-8 occurred adjacent to a newline.
958
959 Args:
960 filename: The name of the current file.
961 lines: An array of strings, each representing a line of the file.
962 error: The function to call with any errors found.
963 """
964 for linenum, line in enumerate(lines):
965 if u'\ufffd' in line:
966 error(filename, linenum, 'readability/utf8', 5,
967 'Line contains invalid UTF-8 (or Unicode replacement character).')
968
969
970def CheckForNewlineAtEOF(filename, lines, error):
971 """Logs an error if there is no newline char at the end of the file.
972
973 Args:
974 filename: The name of the current file.
975 lines: An array of strings, each representing a line of the file.
976 error: The function to call with any errors found.
977 """
978
979 # The array lines() was created by adding two newlines to the
980 # original file (go figure), then splitting on \n.
981 # To verify that the file ends in \n, we just have to make sure the
982 # last-but-two element of lines() exists and is empty.
983 if len(lines) < 3 or lines[-2]:
984 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
985 'Could not find a newline character at the end of the file.')
986
987
988def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
989 """Logs an error if we see /* ... */ or "..." that extend past one line.
990
991 /* ... */ comments are legit inside macros, for one line.
992 Otherwise, we prefer // comments, so it's ok to warn about the
993 other. Likewise, it's ok for strings to extend across multiple
994 lines, as long as a line continuation character (backslash)
995 terminates each line. Although not currently prohibited by the C++
996 style guide, it's ugly and unnecessary. We don't do well with either
997 in this lint program, so we warn about both.
998
999 Args:
1000 filename: The name of the current file.
1001 clean_lines: A CleansedLines instance containing the file.
1002 linenum: The number of the line to check.
1003 error: The function to call with any errors found.
1004 """
1005 line = clean_lines.elided[linenum]
1006
1007 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1008 # second (escaped) slash may trigger later \" detection erroneously.
1009 line = line.replace('\\\\', '')
1010
1011 if line.count('/*') > line.count('*/'):
1012 error(filename, linenum, 'readability/multiline_comment', 5,
1013 'Complex multi-line /*...*/-style comment found. '
1014 'Lint may give bogus warnings. '
1015 'Consider replacing these with //-style comments, '
1016 'with #if 0...#endif, '
1017 'or with more clearly structured multi-line comments.')
1018
1019 if (line.count('"') - line.count('\\"')) % 2:
1020 error(filename, linenum, 'readability/multiline_string', 5,
1021 'Multi-line string ("...") found. This lint script doesn\'t '
1022 'do well with such strings, and may give bogus warnings. They\'re '
1023 'ugly and unnecessary, and you should use concatenation instead".')
1024
1025
1026threading_list = (
1027 ('asctime(', 'asctime_r('),
1028 ('ctime(', 'ctime_r('),
1029 ('getgrgid(', 'getgrgid_r('),
1030 ('getgrnam(', 'getgrnam_r('),
1031 ('getlogin(', 'getlogin_r('),
1032 ('getpwnam(', 'getpwnam_r('),
1033 ('getpwuid(', 'getpwuid_r('),
1034 ('gmtime(', 'gmtime_r('),
1035 ('localtime(', 'localtime_r('),
1036 ('rand(', 'rand_r('),
1037 ('readdir(', 'readdir_r('),
1038 ('strtok(', 'strtok_r('),
1039 ('ttyname(', 'ttyname_r('),
1040 )
1041
1042
1043def CheckPosixThreading(filename, clean_lines, linenum, error):
1044 """Checks for calls to thread-unsafe functions.
1045
1046 Much code has been originally written without consideration of
1047 multi-threading. Also, engineers are relying on their old experience;
1048 they have learned posix before threading extensions were added. These
1049 tests guide the engineers to use thread-safe functions (when using
1050 posix directly).
1051
1052 Args:
1053 filename: The name of the current file.
1054 clean_lines: A CleansedLines instance containing the file.
1055 linenum: The number of the line to check.
1056 error: The function to call with any errors found.
1057 """
1058 line = clean_lines.elided[linenum]
1059 for single_thread_function, multithread_safe_function in threading_list:
1060 ix = line.find(single_thread_function)
erg@google.coma87abb82009-02-24 01:41:01 +00001061 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001062 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1063 line[ix - 1] not in ('_', '.', '>'))):
1064 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1065 'Consider using ' + multithread_safe_function +
1066 '...) instead of ' + single_thread_function +
1067 '...) for improved thread safety.')
1068
1069
erg@google.com36649102009-03-25 21:18:36 +00001070# Matches invalid increment: *count++, which moves pointer insead of
1071# incrementing a value.
1072_RE_PATTERN_IVALID_INCREMENT = re.compile(
1073 r'^\s*\*\w+(\+\+|--);')
1074
1075
1076def CheckInvalidIncrement(filename, clean_lines, linenum, error):
1077 """Checks for invalud increment *count++.
1078
1079 For example following function:
1080 void increment_counter(int* count) {
1081 *count++;
1082 }
1083 is invalid, because it effectively does count++, moving pointer, and should
1084 be replaced with ++*count, (*count)++ or *count += 1.
1085
1086 Args:
1087 filename: The name of the current file.
1088 clean_lines: A CleansedLines instance containing the file.
1089 linenum: The number of the line to check.
1090 error: The function to call with any errors found.
1091 """
1092 line = clean_lines.elided[linenum]
1093 if _RE_PATTERN_IVALID_INCREMENT.match(line):
1094 error(filename, linenum, 'runtime/invalid_increment', 5,
1095 'Changing pointer instead of value (or unused value of operator*).')
1096
1097
erg@google.com4e00b9a2009-01-12 23:05:11 +00001098class _ClassInfo(object):
1099 """Stores information about a class."""
1100
1101 def __init__(self, name, linenum):
1102 self.name = name
1103 self.linenum = linenum
1104 self.seen_open_brace = False
1105 self.is_derived = False
1106 self.virtual_method_linenumber = None
1107 self.has_virtual_destructor = False
1108 self.brace_depth = 0
1109
1110
1111class _ClassState(object):
1112 """Holds the current state of the parse relating to class declarations.
1113
1114 It maintains a stack of _ClassInfos representing the parser's guess
1115 as to the current nesting of class declarations. The innermost class
1116 is at the top (back) of the stack. Typically, the stack will either
1117 be empty or have exactly one entry.
1118 """
1119
1120 def __init__(self):
1121 self.classinfo_stack = []
1122
1123 def CheckFinished(self, filename, error):
1124 """Checks that all classes have been completely parsed.
1125
1126 Call this when all lines in a file have been processed.
1127 Args:
1128 filename: The name of the current file.
1129 error: The function to call with any errors found.
1130 """
1131 if self.classinfo_stack:
1132 # Note: This test can result in false positives if #ifdef constructs
1133 # get in the way of brace matching. See the testBuildClass test in
1134 # cpplint_unittest.py for an example of this.
1135 error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
1136 'Failed to find complete declaration of class %s' %
1137 self.classinfo_stack[0].name)
1138
1139
1140def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1141 class_state, error):
1142 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1143
1144 Complain about several constructs which gcc-2 accepts, but which are
1145 not standard C++. Warning about these in lint is one way to ease the
1146 transition to new compilers.
1147 - put storage class first (e.g. "static const" instead of "const static").
1148 - "%lld" instead of %qd" in printf-type functions.
1149 - "%1$d" is non-standard in printf-type functions.
1150 - "\%" is an undefined character escape sequence.
1151 - text after #endif is not allowed.
1152 - invalid inner-style forward declaration.
1153 - >? and <? operators, and their >?= and <?= cousins.
1154 - classes with virtual methods need virtual destructors (compiler warning
1155 available, but not turned on yet.)
1156
1157 Additionally, check for constructor/destructor style violations as it
1158 is very convenient to do so while checking for gcc-2 compliance.
1159
1160 Args:
1161 filename: The name of the current file.
1162 clean_lines: A CleansedLines instance containing the file.
1163 linenum: The number of the line to check.
1164 class_state: A _ClassState instance which maintains information about
1165 the current stack of nested class declarations being parsed.
1166 error: A callable to which errors are reported, which takes 4 arguments:
1167 filename, line number, error level, and message
1168 """
1169
1170 # Remove comments from the line, but leave in strings for now.
1171 line = clean_lines.lines[linenum]
1172
1173 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1174 error(filename, linenum, 'runtime/printf_format', 3,
1175 '%q in format strings is deprecated. Use %ll instead.')
1176
1177 if Search(r'printf\s*\(.*".*%\d+\$', line):
1178 error(filename, linenum, 'runtime/printf_format', 2,
1179 '%N$ formats are unconventional. Try rewriting to avoid them.')
1180
1181 # Remove escaped backslashes before looking for undefined escapes.
1182 line = line.replace('\\\\', '')
1183
1184 if Search(r'("|\').*\\(%|\[|\(|{)', line):
1185 error(filename, linenum, 'build/printf_format', 3,
1186 '%, [, (, and { are undefined character escapes. Unescape them.')
1187
1188 # For the rest, work with both comments and strings removed.
1189 line = clean_lines.elided[linenum]
1190
1191 if Search(r'\b(const|volatile|void|char|short|int|long'
1192 r'|float|double|signed|unsigned'
1193 r'|schar|u?int8|u?int16|u?int32|u?int64)'
1194 r'\s+(auto|register|static|extern|typedef)\b',
1195 line):
1196 error(filename, linenum, 'build/storage_class', 5,
1197 'Storage class (static, extern, typedef, etc) should be first.')
1198
1199 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1200 error(filename, linenum, 'build/endif_comment', 5,
1201 'Uncommented text after #endif is non-standard. Use a comment.')
1202
1203 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1204 error(filename, linenum, 'build/forward_decl', 5,
1205 'Inner-style forward declarations are invalid. Remove this line.')
1206
1207 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1208 line):
1209 error(filename, linenum, 'build/deprecated', 3,
1210 '>? and <? (max and min) operators are non-standard and deprecated.')
1211
1212 # Track class entry and exit, and attempt to find cases within the
1213 # class declaration that don't meet the C++ style
1214 # guidelines. Tracking is very dependent on the code matching Google
1215 # style guidelines, but it seems to perform well enough in testing
1216 # to be a worthwhile addition to the checks.
1217 classinfo_stack = class_state.classinfo_stack
1218 # Look for a class declaration
1219 class_decl_match = Match(
1220 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1221 if class_decl_match:
1222 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
1223
1224 # Everything else in this function uses the top of the stack if it's
1225 # not empty.
1226 if not classinfo_stack:
1227 return
1228
1229 classinfo = classinfo_stack[-1]
1230
1231 # If the opening brace hasn't been seen look for it and also
1232 # parent class declarations.
1233 if not classinfo.seen_open_brace:
1234 # If the line has a ';' in it, assume it's a forward declaration or
1235 # a single-line class declaration, which we won't process.
1236 if line.find(';') != -1:
1237 classinfo_stack.pop()
1238 return
1239 classinfo.seen_open_brace = (line.find('{') != -1)
1240 # Look for a bare ':'
1241 if Search('(^|[^:]):($|[^:])', line):
1242 classinfo.is_derived = True
1243 if not classinfo.seen_open_brace:
1244 return # Everything else in this function is for after open brace
1245
1246 # The class may have been declared with namespace or classname qualifiers.
1247 # The constructor and destructor will not have those qualifiers.
1248 base_classname = classinfo.name.split('::')[-1]
1249
1250 # Look for single-argument constructors that aren't marked explicit.
1251 # Technically a valid construct, but against style.
1252 args = Match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1253 % re.escape(base_classname),
1254 line)
1255 if (args and
1256 args.group(1) != 'void' and
1257 not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1258 args.group(1).strip())):
1259 error(filename, linenum, 'runtime/explicit', 5,
1260 'Single-argument constructors should be marked explicit.')
1261
1262 # Look for methods declared virtual.
1263 if Search(r'\bvirtual\b', line):
1264 classinfo.virtual_method_linenumber = linenum
1265 # Only look for a destructor declaration on the same line. It would
1266 # be extremely unlikely for the destructor declaration to occupy
1267 # more than one line.
1268 if Search(r'~%s\s*\(' % base_classname, line):
1269 classinfo.has_virtual_destructor = True
1270
1271 # Look for class end.
1272 brace_depth = classinfo.brace_depth
1273 brace_depth = brace_depth + line.count('{') - line.count('}')
1274 if brace_depth <= 0:
1275 classinfo = classinfo_stack.pop()
1276 # Try to detect missing virtual destructor declarations.
1277 # For now, only warn if a non-derived class with virtual methods lacks
1278 # a virtual destructor. This is to make it less likely that people will
1279 # declare derived virtual destructors without declaring the base
1280 # destructor virtual.
1281 if ((classinfo.virtual_method_linenumber is not None) and
1282 (not classinfo.has_virtual_destructor) and
1283 (not classinfo.is_derived)): # Only warn for base classes
1284 error(filename, classinfo.linenum, 'runtime/virtual', 4,
1285 'The class %s probably needs a virtual destructor due to '
1286 'having virtual method(s), one declared at line %d.'
1287 % (classinfo.name, classinfo.virtual_method_linenumber))
1288 else:
1289 classinfo.brace_depth = brace_depth
1290
1291
1292def CheckSpacingForFunctionCall(filename, line, linenum, error):
1293 """Checks for the correctness of various spacing around function calls.
1294
1295 Args:
1296 filename: The name of the current file.
1297 line: The text of the line to check.
1298 linenum: The number of the line to check.
1299 error: The function to call with any errors found.
1300 """
1301
1302 # Since function calls often occur inside if/for/while/switch
1303 # expressions - which have their own, more liberal conventions - we
1304 # first see if we should be looking inside such an expression for a
1305 # function call, to which we can apply more strict standards.
1306 fncall = line # if there's no control flow construct, look at whole line
1307 for pattern in (r'\bif\s*\((.*)\)\s*{',
1308 r'\bfor\s*\((.*)\)\s*{',
1309 r'\bwhile\s*\((.*)\)\s*[{;]',
1310 r'\bswitch\s*\((.*)\)\s*{'):
1311 match = Search(pattern, line)
1312 if match:
1313 fncall = match.group(1) # look inside the parens for function calls
1314 break
1315
1316 # Except in if/for/while/switch, there should never be space
1317 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
1318 # for nested parens ( (a+b) + c ). Likewise, there should never be
1319 # a space before a ( when it's a function argument. I assume it's a
1320 # function argument when the char before the whitespace is legal in
1321 # a function name (alnum + _) and we're not starting a macro. Also ignore
1322 # pointers and references to arrays and functions coz they're too tricky:
1323 # we use a very simple way to recognize these:
1324 # " (something)(maybe-something)" or
1325 # " (something)(maybe-something," or
1326 # " (something)[something]"
1327 # Note that we assume the contents of [] to be short enough that
1328 # they'll never need to wrap.
1329 if ( # Ignore control structures.
1330 not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1331 # Ignore pointers/references to functions.
1332 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1333 # Ignore pointers/references to arrays.
1334 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com36649102009-03-25 21:18:36 +00001335 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
erg@google.com4e00b9a2009-01-12 23:05:11 +00001336 error(filename, linenum, 'whitespace/parens', 4,
1337 'Extra space after ( in function call')
erg@google.com36649102009-03-25 21:18:36 +00001338 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001339 error(filename, linenum, 'whitespace/parens', 2,
1340 'Extra space after (')
1341 if (Search(r'\w\s+\(', fncall) and
1342 not Search(r'#\s*define|typedef', fncall)):
1343 error(filename, linenum, 'whitespace/parens', 4,
1344 'Extra space before ( in function call')
1345 # If the ) is followed only by a newline or a { + newline, assume it's
1346 # part of a control statement (if/while/etc), and don't complain
1347 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1348 error(filename, linenum, 'whitespace/parens', 2,
1349 'Extra space before )')
1350
1351
1352def IsBlankLine(line):
1353 """Returns true if the given line is blank.
1354
1355 We consider a line to be blank if the line is empty or consists of
1356 only white spaces.
1357
1358 Args:
1359 line: A line of a string.
1360
1361 Returns:
1362 True, if the given line is blank.
1363 """
1364 return not line or line.isspace()
1365
1366
1367def CheckForFunctionLengths(filename, clean_lines, linenum,
1368 function_state, error):
1369 """Reports for long function bodies.
1370
1371 For an overview why this is done, see:
1372 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1373
1374 Uses a simplistic algorithm assuming other style guidelines
1375 (especially spacing) are followed.
1376 Only checks unindented functions, so class members are unchecked.
1377 Trivial bodies are unchecked, so constructors with huge initializer lists
1378 may be missed.
1379 Blank/comment lines are not counted so as to avoid encouraging the removal
1380 of vertical space and commments just to get through a lint check.
1381 NOLINT *on the last line of a function* disables this check.
1382
1383 Args:
1384 filename: The name of the current file.
1385 clean_lines: A CleansedLines instance containing the file.
1386 linenum: The number of the line to check.
1387 function_state: Current function name and lines in body so far.
1388 error: The function to call with any errors found.
1389 """
1390 lines = clean_lines.lines
1391 line = lines[linenum]
1392 raw = clean_lines.raw_lines
1393 raw_line = raw[linenum]
1394 joined_line = ''
1395
1396 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00001397 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00001398 match_result = Match(regexp, line)
1399 if match_result:
1400 # If the name is all caps and underscores, figure it's a macro and
1401 # ignore it, unless it's TEST or TEST_F.
1402 function_name = match_result.group(1).split()[-1]
1403 if function_name == 'TEST' or function_name == 'TEST_F' or (
1404 not Match(r'[A-Z_]+$', function_name)):
1405 starting_func = True
1406
1407 if starting_func:
1408 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00001409 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001410 start_line = lines[start_linenum]
1411 joined_line += ' ' + start_line.lstrip()
1412 if Search(r'(;|})', start_line): # Declarations and trivial functions
1413 body_found = True
1414 break # ... ignore
1415 elif Search(r'{', start_line):
1416 body_found = True
1417 function = Search(r'((\w|:)*)\(', line).group(1)
1418 if Match(r'TEST', function): # Handle TEST... macros
1419 parameter_regexp = Search(r'(\(.*\))', joined_line)
1420 if parameter_regexp: # Ignore bad syntax
1421 function += parameter_regexp.group(1)
1422 else:
1423 function += '()'
1424 function_state.Begin(function)
1425 break
1426 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00001427 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001428 error(filename, linenum, 'readability/fn_size', 5,
1429 'Lint failed to find start of function body.')
1430 elif Match(r'^\}\s*$', line): # function end
1431 if not Search(r'\bNOLINT\b', raw_line):
1432 function_state.Check(error, filename, linenum)
1433 function_state.End()
1434 elif not Match(r'^\s*$', line):
1435 function_state.Count() # Count non-blank/non-comment lines.
1436
1437
1438_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1439
1440
1441def CheckComment(comment, filename, linenum, error):
1442 """Checks for common mistakes in TODO comments.
1443
1444 Args:
1445 comment: The text of the comment from the line in question.
1446 filename: The name of the current file.
1447 linenum: The number of the line to check.
1448 error: The function to call with any errors found.
1449 """
1450 match = _RE_PATTERN_TODO.match(comment)
1451 if match:
1452 # One whitespace is correct; zero whitespace is handled elsewhere.
1453 leading_whitespace = match.group(1)
1454 if len(leading_whitespace) > 1:
1455 error(filename, linenum, 'whitespace/todo', 2,
1456 'Too many spaces before TODO')
1457
1458 username = match.group(2)
1459 if not username:
1460 error(filename, linenum, 'readability/todo', 2,
1461 'Missing username in TODO; it should look like '
1462 '"// TODO(my_username): Stuff."')
1463
1464 middle_whitespace = match.group(3)
erg@google.coma87abb82009-02-24 01:41:01 +00001465 # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001466 if middle_whitespace != ' ' and middle_whitespace != '':
1467 error(filename, linenum, 'whitespace/todo', 2,
1468 'TODO(my_username) should be followed by a space')
1469
1470
1471def CheckSpacing(filename, clean_lines, linenum, error):
1472 """Checks for the correctness of various spacing issues in the code.
1473
1474 Things we check for: spaces around operators, spaces after
1475 if/for/while/switch, no spaces around parens in function calls, two
1476 spaces between code and comment, don't start a block with a blank
1477 line, don't end a function with a blank line, don't have too many
1478 blank lines in a row.
1479
1480 Args:
1481 filename: The name of the current file.
1482 clean_lines: A CleansedLines instance containing the file.
1483 linenum: The number of the line to check.
1484 error: The function to call with any errors found.
1485 """
1486
1487 raw = clean_lines.raw_lines
1488 line = raw[linenum]
1489
1490 # Before nixing comments, check if the line is blank for no good
1491 # reason. This includes the first line after a block is opened, and
1492 # blank lines at the end of a function (ie, right before a line like '}'
1493 if IsBlankLine(line):
1494 elided = clean_lines.elided
1495 prev_line = elided[linenum - 1]
1496 prevbrace = prev_line.rfind('{')
1497 # TODO(unknown): Don't complain if line before blank line, and line after,
1498 # both start with alnums and are indented the same amount.
1499 # This ignores whitespace at the start of a namespace block
1500 # because those are not usually indented.
1501 if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
1502 and prev_line[:prevbrace].find('namespace') == -1):
1503 # OK, we have a blank line at the start of a code block. Before we
1504 # complain, we check if it is an exception to the rule: The previous
1505 # non-empty line has the paramters of a function header that are indented
1506 # 4 spaces (because they did not fit in a 80 column line when placed on
1507 # the same line as the function name). We also check for the case where
1508 # the previous line is indented 6 spaces, which may happen when the
1509 # initializers of a constructor do not fit into a 80 column line.
1510 exception = False
1511 if Match(r' {6}\w', prev_line): # Initializer list?
1512 # We are looking for the opening column of initializer list, which
1513 # should be indented 4 spaces to cause 6 space indentation afterwards.
1514 search_position = linenum-2
1515 while (search_position >= 0
1516 and Match(r' {6}\w', elided[search_position])):
1517 search_position -= 1
1518 exception = (search_position >= 0
1519 and elided[search_position][:5] == ' :')
1520 else:
1521 # Search for the function arguments or an initializer list. We use a
1522 # simple heuristic here: If the line is indented 4 spaces; and we have a
1523 # closing paren, without the opening paren, followed by an opening brace
1524 # or colon (for initializer lists) we assume that it is the last line of
1525 # a function header. If we have a colon indented 4 spaces, it is an
1526 # initializer list.
1527 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1528 prev_line)
1529 or Match(r' {4}:', prev_line))
1530
1531 if not exception:
1532 error(filename, linenum, 'whitespace/blank_line', 2,
1533 'Blank line at the start of a code block. Is this needed?')
1534 # This doesn't ignore whitespace at the end of a namespace block
1535 # because that is too hard without pairing open/close braces;
1536 # however, a special exception is made for namespace closing
1537 # brackets which have a comment containing "namespace".
1538 #
1539 # Also, ignore blank lines at the end of a block in a long if-else
1540 # chain, like this:
1541 # if (condition1) {
1542 # // Something followed by a blank line
1543 #
1544 # } else if (condition2) {
1545 # // Something else
1546 # }
1547 if linenum + 1 < clean_lines.NumLines():
1548 next_line = raw[linenum + 1]
1549 if (next_line
1550 and Match(r'\s*}', next_line)
1551 and next_line.find('namespace') == -1
1552 and next_line.find('} else ') == -1):
1553 error(filename, linenum, 'whitespace/blank_line', 3,
1554 'Blank line at the end of a code block. Is this needed?')
1555
1556 # Next, we complain if there's a comment too near the text
1557 commentpos = line.find('//')
1558 if commentpos != -1:
1559 # Check if the // may be in quotes. If so, ignore it
erg@google.coma87abb82009-02-24 01:41:01 +00001560 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001561 if (line.count('"', 0, commentpos) -
1562 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
1563 # Allow one space for new scopes, two spaces otherwise:
1564 if (not Match(r'^\s*{ //', line) and
1565 ((commentpos >= 1 and
1566 line[commentpos-1] not in string.whitespace) or
1567 (commentpos >= 2 and
1568 line[commentpos-2] not in string.whitespace))):
1569 error(filename, linenum, 'whitespace/comments', 2,
1570 'At least two spaces is best between code and comments')
1571 # There should always be a space between the // and the comment
1572 commentend = commentpos + 2
1573 if commentend < len(line) and not line[commentend] == ' ':
1574 # but some lines are exceptions -- e.g. if they're big
1575 # comment delimiters like:
1576 # //----------------------------------------------------------
erg@google.come35f7652009-06-19 20:52:09 +00001577 # or they begin with multiple slashes followed by a space:
1578 # //////// Header comment
1579 match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
1580 Search(r'^/+ ', line[commentend:]))
erg@google.com4e00b9a2009-01-12 23:05:11 +00001581 if not match:
1582 error(filename, linenum, 'whitespace/comments', 4,
1583 'Should have a space between // and comment')
1584 CheckComment(line[commentpos:], filename, linenum, error)
1585
1586 line = clean_lines.elided[linenum] # get rid of comments and strings
1587
1588 # Don't try to do spacing checks for operator methods
1589 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1590
1591 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1592 # Otherwise not. Note we only check for non-spaces on *both* sides;
1593 # sometimes people put non-spaces on one side when aligning ='s among
1594 # many lines (not that this is behavior that I approve of...)
1595 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
1596 error(filename, linenum, 'whitespace/operators', 4,
1597 'Missing spaces around =')
1598
1599 # It's ok not to have spaces around binary operators like + - * /, but if
1600 # there's too little whitespace, we get concerned. It's hard to tell,
1601 # though, so we punt on this one for now. TODO.
1602
1603 # You should always have whitespace around binary operators.
1604 # Alas, we can't test < or > because they're legitimately used sans spaces
1605 # (a->b, vector<int> a). The only time we can tell is a < with no >, and
1606 # only if it's not template params list spilling into the next line.
1607 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1608 if not match:
1609 # Note that while it seems that the '<[^<]*' term in the following
1610 # regexp could be simplified to '<.*', which would indeed match
1611 # the same class of strings, the [^<] means that searching for the
1612 # regexp takes linear rather than quadratic time.
1613 if not Search(r'<[^<]*,\s*$', line): # template params spill
1614 match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1615 if match:
1616 error(filename, linenum, 'whitespace/operators', 3,
1617 'Missing spaces around %s' % match.group(1))
1618 # We allow no-spaces around << and >> when used like this: 10<<20, but
1619 # not otherwise (particularly, not when used as streams)
1620 match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1621 if match:
1622 error(filename, linenum, 'whitespace/operators', 3,
1623 'Missing spaces around %s' % match.group(1))
1624
1625 # There shouldn't be space around unary operators
1626 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1627 if match:
1628 error(filename, linenum, 'whitespace/operators', 4,
1629 'Extra space for operator %s' % match.group(1))
1630
1631 # A pet peeve of mine: no spaces after an if, while, switch, or for
1632 match = Search(r' (if\(|for\(|while\(|switch\()', line)
1633 if match:
1634 error(filename, linenum, 'whitespace/parens', 5,
1635 'Missing space before ( in %s' % match.group(1))
1636
1637 # For if/for/while/switch, the left and right parens should be
1638 # consistent about how many spaces are inside the parens, and
1639 # there should either be zero or one spaces inside the parens.
1640 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.come35f7652009-06-19 20:52:09 +00001641 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001642 match = Search(r'\b(if|for|while|switch)\s*'
1643 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1644 line)
1645 if match:
1646 if len(match.group(2)) != len(match.group(4)):
1647 if not (match.group(3) == ';' and
erg@google.come35f7652009-06-19 20:52:09 +00001648 len(match.group(2)) == 1 + len(match.group(4)) or
1649 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001650 error(filename, linenum, 'whitespace/parens', 5,
1651 'Mismatching spaces inside () in %s' % match.group(1))
1652 if not len(match.group(2)) in [0, 1]:
1653 error(filename, linenum, 'whitespace/parens', 5,
1654 'Should have zero or one spaces inside ( and ) in %s' %
1655 match.group(1))
1656
1657 # You should always have a space after a comma (either as fn arg or operator)
1658 if Search(r',[^\s]', line):
1659 error(filename, linenum, 'whitespace/comma', 3,
1660 'Missing space after ,')
1661
1662 # Next we will look for issues with function calls.
1663 CheckSpacingForFunctionCall(filename, line, linenum, error)
1664
1665 # Except after an opening paren, you should have spaces before your braces.
1666 # And since you should never have braces at the beginning of a line, this is
1667 # an easy test.
1668 if Search(r'[^ (]{', line):
1669 error(filename, linenum, 'whitespace/braces', 5,
1670 'Missing space before {')
1671
1672 # Make sure '} else {' has spaces.
1673 if Search(r'}else', line):
1674 error(filename, linenum, 'whitespace/braces', 5,
1675 'Missing space before else')
1676
1677 # You shouldn't have spaces before your brackets, except maybe after
1678 # 'delete []' or 'new char * []'.
1679 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
1680 error(filename, linenum, 'whitespace/braces', 5,
1681 'Extra space before [')
1682
1683 # You shouldn't have a space before a semicolon at the end of the line.
1684 # There's a special case for "for" since the style guide allows space before
1685 # the semicolon there.
1686 if Search(r':\s*;\s*$', line):
1687 error(filename, linenum, 'whitespace/semicolon', 5,
1688 'Semicolon defining empty statement. Use { } instead.')
1689 elif Search(r'^\s*;\s*$', line):
1690 error(filename, linenum, 'whitespace/semicolon', 5,
1691 'Line contains only semicolon. If this should be an empty statement, '
1692 'use { } instead.')
1693 elif (Search(r'\s+;\s*$', line) and
1694 not Search(r'\bfor\b', line)):
1695 error(filename, linenum, 'whitespace/semicolon', 5,
1696 'Extra space before last semicolon. If this should be an empty '
1697 'statement, use { } instead.')
1698
1699
1700def GetPreviousNonBlankLine(clean_lines, linenum):
1701 """Return the most recent non-blank line and its line number.
1702
1703 Args:
1704 clean_lines: A CleansedLines instance containing the file contents.
1705 linenum: The number of the line to check.
1706
1707 Returns:
1708 A tuple with two elements. The first element is the contents of the last
1709 non-blank line before the current line, or the empty string if this is the
1710 first non-blank line. The second is the line number of that line, or -1
1711 if this is the first non-blank line.
1712 """
1713
1714 prevlinenum = linenum - 1
1715 while prevlinenum >= 0:
1716 prevline = clean_lines.elided[prevlinenum]
1717 if not IsBlankLine(prevline): # if not a blank line...
1718 return (prevline, prevlinenum)
1719 prevlinenum -= 1
1720 return ('', -1)
1721
1722
1723def CheckBraces(filename, clean_lines, linenum, error):
1724 """Looks for misplaced braces (e.g. at the end of line).
1725
1726 Args:
1727 filename: The name of the current file.
1728 clean_lines: A CleansedLines instance containing the file.
1729 linenum: The number of the line to check.
1730 error: The function to call with any errors found.
1731 """
1732
1733 line = clean_lines.elided[linenum] # get rid of comments and strings
1734
1735 if Match(r'\s*{\s*$', line):
1736 # We allow an open brace to start a line in the case where someone
1737 # is using braces in a block to explicitly create a new scope,
1738 # which is commonly used to control the lifetime of
1739 # stack-allocated variables. We don't detect this perfectly: we
1740 # just don't complain if the last non-whitespace character on the
1741 # previous non-blank line is ';', ':', '{', or '}'.
1742 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1743 if not Search(r'[;:}{]\s*$', prevline):
1744 error(filename, linenum, 'whitespace/braces', 4,
1745 '{ should almost always be at the end of the previous line')
1746
1747 # An else clause should be on the same line as the preceding closing brace.
1748 if Match(r'\s*else\s*', line):
1749 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1750 if Match(r'\s*}\s*$', prevline):
1751 error(filename, linenum, 'whitespace/newline', 4,
1752 'An else should appear on the same line as the preceding }')
1753
1754 # If braces come on one side of an else, they should be on both.
1755 # However, we have to worry about "else if" that spans multiple lines!
1756 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
1757 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
1758 # find the ( after the if
1759 pos = line.find('else if')
1760 pos = line.find('(', pos)
1761 if pos > 0:
1762 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
1763 if endline[endpos:].find('{') == -1: # must be brace after if
1764 error(filename, linenum, 'readability/braces', 5,
1765 'If an else has a brace on one side, it should have it on both')
1766 else: # common case: else not followed by a multi-line if
1767 error(filename, linenum, 'readability/braces', 5,
1768 'If an else has a brace on one side, it should have it on both')
1769
1770 # Likewise, an else should never have the else clause on the same line
1771 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
1772 error(filename, linenum, 'whitespace/newline', 4,
1773 'Else clause should never be on same line as else (use 2 lines)')
1774
1775 # In the same way, a do/while should never be on one line
1776 if Match(r'\s*do [^\s{]', line):
1777 error(filename, linenum, 'whitespace/newline', 4,
1778 'do/while clauses should not be on a single line')
1779
1780 # Braces shouldn't be followed by a ; unless they're defining a struct
1781 # or initializing an array.
1782 # We can't tell in general, but we can for some common cases.
1783 prevlinenum = linenum
1784 while True:
1785 (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
1786 if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
1787 line = prevline + line
1788 else:
1789 break
1790 if (Search(r'{.*}\s*;', line) and
1791 line.count('{') == line.count('}') and
1792 not Search(r'struct|class|enum|\s*=\s*{', line)):
1793 error(filename, linenum, 'readability/braces', 4,
1794 "You don't need a ; after a }")
1795
1796
1797def ReplaceableCheck(operator, macro, line):
1798 """Determine whether a basic CHECK can be replaced with a more specific one.
1799
1800 For example suggest using CHECK_EQ instead of CHECK(a == b) and
1801 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1802
1803 Args:
1804 operator: The C++ operator used in the CHECK.
1805 macro: The CHECK or EXPECT macro being called.
1806 line: The current source line.
1807
1808 Returns:
1809 True if the CHECK can be replaced with a more specific one.
1810 """
1811
1812 # This matches decimal and hex integers, strings, and chars (in that order).
1813 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
1814
1815 # Expression to match two sides of the operator with something that
1816 # looks like a literal, since CHECK(x == iterator) won't compile.
1817 # This means we can't catch all the cases where a more specific
1818 # CHECK is possible, but it's less annoying than dealing with
1819 # extraneous warnings.
1820 match_this = (r'\s*' + macro + r'\((\s*' +
1821 match_constant + r'\s*' + operator + r'[^<>].*|'
1822 r'.*[^<>]' + operator + r'\s*' + match_constant +
1823 r'\s*\))')
1824
1825 # Don't complain about CHECK(x == NULL) or similar because
1826 # CHECK_EQ(x, NULL) won't compile (requires a cast).
1827 # Also, don't complain about more complex boolean expressions
1828 # involving && or || such as CHECK(a == b || c == d).
1829 return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
1830
1831
1832def CheckCheck(filename, clean_lines, linenum, error):
1833 """Checks the use of CHECK and EXPECT macros.
1834
1835 Args:
1836 filename: The name of the current file.
1837 clean_lines: A CleansedLines instance containing the file.
1838 linenum: The number of the line to check.
1839 error: The function to call with any errors found.
1840 """
1841
1842 # Decide the set of replacement macros that should be suggested
1843 raw_lines = clean_lines.raw_lines
1844 current_macro = ''
1845 for macro in _CHECK_MACROS:
1846 if raw_lines[linenum].find(macro) >= 0:
1847 current_macro = macro
1848 break
1849 if not current_macro:
1850 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
1851 return
1852
1853 line = clean_lines.elided[linenum] # get rid of comments and strings
1854
1855 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
1856 for operator in ['==', '!=', '>=', '>', '<=', '<']:
1857 if ReplaceableCheck(operator, current_macro, line):
1858 error(filename, linenum, 'readability/check', 2,
1859 'Consider using %s instead of %s(a %s b)' % (
1860 _CHECK_REPLACEMENT[current_macro][operator],
1861 current_macro, operator))
1862 break
1863
1864
1865def GetLineWidth(line):
1866 """Determines the width of the line in column positions.
1867
1868 Args:
1869 line: A string, which may be a Unicode string.
1870
1871 Returns:
1872 The width of the line in column positions, accounting for Unicode
1873 combining characters and wide characters.
1874 """
1875 if isinstance(line, unicode):
1876 width = 0
1877 for c in unicodedata.normalize('NFC', line):
1878 if unicodedata.east_asian_width(c) in ('W', 'F'):
1879 width += 2
1880 elif not unicodedata.combining(c):
1881 width += 1
1882 return width
1883 else:
1884 return len(line)
1885
1886
1887def CheckStyle(filename, clean_lines, linenum, file_extension, error):
1888 """Checks rules from the 'C++ style rules' section of cppguide.html.
1889
1890 Most of these rules are hard to test (naming, comment style), but we
1891 do what we can. In particular we check for 2-space indents, line lengths,
1892 tab usage, spaces inside code, etc.
1893
1894 Args:
1895 filename: The name of the current file.
1896 clean_lines: A CleansedLines instance containing the file.
1897 linenum: The number of the line to check.
1898 file_extension: The extension (without the dot) of the filename.
1899 error: The function to call with any errors found.
1900 """
1901
1902 raw_lines = clean_lines.raw_lines
1903 line = raw_lines[linenum]
1904
1905 if line.find('\t') != -1:
1906 error(filename, linenum, 'whitespace/tab', 1,
1907 'Tab found; better to use spaces')
1908
1909 # One or three blank spaces at the beginning of the line is weird; it's
1910 # hard to reconcile that with 2-space indents.
1911 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
1912 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
1913 # if(RLENGTH > 20) complain = 0;
1914 # if(match($0, " +(error|private|public|protected):")) complain = 0;
1915 # if(match(prev, "&& *$")) complain = 0;
1916 # if(match(prev, "\\|\\| *$")) complain = 0;
1917 # if(match(prev, "[\",=><] *$")) complain = 0;
1918 # if(match($0, " <<")) complain = 0;
1919 # if(match(prev, " +for \\(")) complain = 0;
1920 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
1921 initial_spaces = 0
1922 cleansed_line = clean_lines.elided[linenum]
1923 while initial_spaces < len(line) and line[initial_spaces] == ' ':
1924 initial_spaces += 1
1925 if line and line[-1].isspace():
1926 error(filename, linenum, 'whitespace/end_of_line', 4,
1927 'Line ends in whitespace. Consider deleting these extra spaces.')
1928 # There are certain situations we allow one space, notably for labels
1929 elif ((initial_spaces == 1 or initial_spaces == 3) and
1930 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
1931 error(filename, linenum, 'whitespace/indent', 3,
1932 'Weird number of spaces at line-start. '
1933 'Are you using a 2-space indent?')
1934 # Labels should always be indented at least one space.
1935 elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
1936 line):
1937 error(filename, linenum, 'whitespace/labels', 4,
1938 'Labels should always be indented at least one space. '
1939 'If this is a member-initializer list in a constructor, '
1940 'the colon should be on the line after the definition header.')
1941
1942 # Check if the line is a header guard.
1943 is_header_guard = False
1944 if file_extension == 'h':
1945 cppvar = GetHeaderGuardCPPVariable(filename)
1946 if (line.startswith('#ifndef %s' % cppvar) or
1947 line.startswith('#define %s' % cppvar) or
1948 line.startswith('#endif // %s' % cppvar)):
1949 is_header_guard = True
1950 # #include lines and header guards can be long, since there's no clean way to
1951 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00001952 #
1953 # URLs can be long too. It's possible to split these, but it makes them
1954 # harder to cut&paste.
1955 if (not line.startswith('#include') and not is_header_guard and
erg@google.com36649102009-03-25 21:18:36 +00001956 not Match(r'^\s*//.*http(s?)://\S*$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001957 line_width = GetLineWidth(line)
1958 if line_width > 100:
1959 error(filename, linenum, 'whitespace/line_length', 4,
1960 'Lines should very rarely be longer than 100 characters')
1961 elif line_width > 80:
1962 error(filename, linenum, 'whitespace/line_length', 2,
1963 'Lines should be <= 80 characters long')
1964
1965 if (cleansed_line.count(';') > 1 and
1966 # for loops are allowed two ;'s (and may run over two lines).
1967 cleansed_line.find('for') == -1 and
1968 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
1969 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
1970 # It's ok to have many commands in a switch case that fits in 1 line
1971 not ((cleansed_line.find('case ') != -1 or
1972 cleansed_line.find('default:') != -1) and
1973 cleansed_line.find('break;') != -1)):
1974 error(filename, linenum, 'whitespace/newline', 4,
1975 'More than one command on the same line')
1976
1977 # Some more style checks
1978 CheckBraces(filename, clean_lines, linenum, error)
1979 CheckSpacing(filename, clean_lines, linenum, error)
1980 CheckCheck(filename, clean_lines, linenum, error)
1981
1982
1983_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
1984_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
1985# Matches the first component of a filename delimited by -s and _s. That is:
1986# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
1987# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
1988# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
1989# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
1990_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
1991
1992
1993def _DropCommonSuffixes(filename):
1994 """Drops common suffixes like _test.cc or -inl.h from filename.
1995
1996 For example:
1997 >>> _DropCommonSuffixes('foo/foo-inl.h')
1998 'foo/foo'
1999 >>> _DropCommonSuffixes('foo/bar/foo.cc')
2000 'foo/bar/foo'
2001 >>> _DropCommonSuffixes('foo/foo_internal.h')
2002 'foo/foo'
2003 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
2004 'foo/foo_unusualinternal'
2005
2006 Args:
2007 filename: The input filename.
2008
2009 Returns:
2010 The filename with the common suffix removed.
2011 """
2012 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
2013 'inl.h', 'impl.h', 'internal.h'):
2014 if (filename.endswith(suffix) and len(filename) > len(suffix) and
2015 filename[-len(suffix) - 1] in ('-', '_')):
2016 return filename[:-len(suffix) - 1]
2017 return os.path.splitext(filename)[0]
2018
2019
2020def _IsTestFilename(filename):
2021 """Determines if the given filename has a suffix that identifies it as a test.
2022
2023 Args:
2024 filename: The input filename.
2025
2026 Returns:
2027 True if 'filename' looks like a test, False otherwise.
2028 """
2029 if (filename.endswith('_test.cc') or
2030 filename.endswith('_unittest.cc') or
2031 filename.endswith('_regtest.cc')):
2032 return True
2033 else:
2034 return False
2035
2036
2037def _ClassifyInclude(fileinfo, include, is_system):
2038 """Figures out what kind of header 'include' is.
2039
2040 Args:
2041 fileinfo: The current file cpplint is running over. A FileInfo instance.
2042 include: The path to a #included file.
2043 is_system: True if the #include used <> rather than "".
2044
2045 Returns:
2046 One of the _XXX_HEADER constants.
2047
2048 For example:
2049 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
2050 _C_SYS_HEADER
2051 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
2052 _CPP_SYS_HEADER
2053 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
2054 _LIKELY_MY_HEADER
2055 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
2056 ... 'bar/foo_other_ext.h', False)
2057 _POSSIBLE_MY_HEADER
2058 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
2059 _OTHER_HEADER
2060 """
2061 # This is a list of all standard c++ header files, except
2062 # those already checked for above.
2063 is_stl_h = include in _STL_HEADERS
2064 is_cpp_h = is_stl_h or include in _CPP_HEADERS
2065
2066 if is_system:
2067 if is_cpp_h:
2068 return _CPP_SYS_HEADER
2069 else:
2070 return _C_SYS_HEADER
2071
2072 # If the target file and the include we're checking share a
2073 # basename when we drop common extensions, and the include
2074 # lives in . , then it's likely to be owned by the target file.
2075 target_dir, target_base = (
2076 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
2077 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
2078 if target_base == include_base and (
2079 include_dir == target_dir or
2080 include_dir == os.path.normpath(target_dir + '/../public')):
2081 return _LIKELY_MY_HEADER
2082
2083 # If the target and include share some initial basename
2084 # component, it's possible the target is implementing the
2085 # include, so it's allowed to be first, but we'll never
2086 # complain if it's not there.
2087 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
2088 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
2089 if (target_first_component and include_first_component and
2090 target_first_component.group(0) ==
2091 include_first_component.group(0)):
2092 return _POSSIBLE_MY_HEADER
2093
2094 return _OTHER_HEADER
2095
2096
erg@google.coma87abb82009-02-24 01:41:01 +00002097
erg@google.come35f7652009-06-19 20:52:09 +00002098def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
2099 """Check rules that are applicable to #include lines.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002100
erg@google.come35f7652009-06-19 20:52:09 +00002101 Strings on #include lines are NOT removed from elided line, to make
2102 certain tasks easier. However, to prevent false positives, checks
2103 applicable to #include lines in CheckLanguage must be put here.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002104
2105 Args:
2106 filename: The name of the current file.
2107 clean_lines: A CleansedLines instance containing the file.
2108 linenum: The number of the line to check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002109 include_state: An _IncludeState instance in which the headers are inserted.
2110 error: The function to call with any errors found.
2111 """
2112 fileinfo = FileInfo(filename)
2113
erg@google.come35f7652009-06-19 20:52:09 +00002114 line = clean_lines.lines[linenum]
erg@google.com4e00b9a2009-01-12 23:05:11 +00002115
2116 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.come35f7652009-06-19 20:52:09 +00002117 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002118 error(filename, linenum, 'build/include', 4,
2119 'Include the directory when naming .h files')
2120
2121 # we shouldn't include a file more than once. actually, there are a
2122 # handful of instances where doing so is okay, but in general it's
2123 # not.
erg@google.come35f7652009-06-19 20:52:09 +00002124 match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002125 if match:
2126 include = match.group(2)
2127 is_system = (match.group(1) == '<')
2128 if include in include_state:
2129 error(filename, linenum, 'build/include', 4,
2130 '"%s" already included at %s:%s' %
2131 (include, filename, include_state[include]))
2132 else:
2133 include_state[include] = linenum
2134
2135 # We want to ensure that headers appear in the right order:
2136 # 1) for foo.cc, foo.h (preferred location)
2137 # 2) c system files
2138 # 3) cpp system files
2139 # 4) for foo.cc, foo.h (deprecated location)
2140 # 5) other google headers
2141 #
2142 # We classify each include statement as one of those 5 types
2143 # using a number of techniques. The include_state object keeps
2144 # track of the highest type seen, and complains if we see a
2145 # lower type after that.
2146 error_message = include_state.CheckNextIncludeOrder(
2147 _ClassifyInclude(fileinfo, include, is_system))
2148 if error_message:
2149 error(filename, linenum, 'build/include_order', 4,
2150 '%s. Should be: %s.h, c system, c++ system, other.' %
2151 (error_message, fileinfo.BaseName()))
2152
erg@google.come35f7652009-06-19 20:52:09 +00002153 # Look for any of the stream classes that are part of standard C++.
2154 match = _RE_PATTERN_INCLUDE.match(line)
2155 if match:
2156 include = match.group(2)
2157 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2158 # Many unit tests use cout, so we exempt them.
2159 if not _IsTestFilename(filename):
2160 error(filename, linenum, 'readability/streams', 3,
2161 'Streams are highly discouraged.')
2162
2163def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
2164 error):
2165 """Checks rules from the 'C++ language rules' section of cppguide.html.
2166
2167 Some of these rules are hard to test (function overloading, using
2168 uint32 inappropriately), but we do the best we can.
2169
2170 Args:
2171 filename: The name of the current file.
2172 clean_lines: A CleansedLines instance containing the file.
2173 linenum: The number of the line to check.
2174 file_extension: The extension (without the dot) of the filename.
2175 include_state: An _IncludeState instance in which the headers are inserted.
2176 error: The function to call with any errors found.
2177 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00002178 # If the line is empty or consists of entirely a comment, no need to
2179 # check it.
2180 line = clean_lines.elided[linenum]
2181 if not line:
2182 return
2183
erg@google.come35f7652009-06-19 20:52:09 +00002184 match = _RE_PATTERN_INCLUDE.search(line)
2185 if match:
2186 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
2187 return
2188
erg@google.com4e00b9a2009-01-12 23:05:11 +00002189 # Create an extended_line, which is the concatenation of the current and
2190 # next lines, for more effective checking of code that may span more than one
2191 # line.
2192 if linenum + 1 < clean_lines.NumLines():
2193 extended_line = line + clean_lines.elided[linenum + 1]
2194 else:
2195 extended_line = line
2196
2197 # Make Windows paths like Unix.
2198 fullname = os.path.abspath(filename).replace('\\', '/')
2199
2200 # TODO(unknown): figure out if they're using default arguments in fn proto.
2201
erg@google.com4e00b9a2009-01-12 23:05:11 +00002202 # Check for non-const references in functions. This is tricky because &
2203 # is also used to take the address of something. We allow <> for templates,
2204 # (ignoring whatever is between the braces) and : for classes.
2205 # These are complicated re's. They try to capture the following:
2206 # paren (for fn-prototype start), typename, &, varname. For the const
2207 # version, we're willing for const to be before typename or after
2208 # Don't check the implemention on same line.
2209 fnline = line.split('{', 1)[0]
2210 if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
2211 len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
2212 r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
2213 len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
2214 fnline))):
2215
2216 # We allow non-const references in a few standard places, like functions
2217 # called "swap()" or iostream operators like "<<" or ">>".
2218 if not Search(
2219 r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
2220 fnline):
2221 error(filename, linenum, 'runtime/references', 2,
2222 'Is this a non-const reference? '
2223 'If so, make const or use a pointer.')
2224
2225 # Check to see if they're using an conversion function cast.
2226 # I just try to capture the most common basic types, though there are more.
2227 # Parameterless conversion functions, such as bool(), are allowed as they are
2228 # probably a member operator declaration or default constructor.
2229 match = Search(
2230 r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
2231 if match:
2232 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2233 # where type may be float(), int(string), etc. Without context they are
2234 # virtually indistinguishable from int(x) casts.
2235 if not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
2236 error(filename, linenum, 'readability/casting', 4,
2237 'Using deprecated casting style. '
2238 'Use static_cast<%s>(...) instead' %
2239 match.group(1))
2240
2241 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2242 'static_cast',
2243 r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2244 error)
2245 # This doesn't catch all cases. Consider (const char * const)"hello".
2246 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2247 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2248
2249 # In addition, we look for people taking the address of a cast. This
2250 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2251 # point where you think.
2252 if Search(
2253 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2254 error(filename, linenum, 'runtime/casting', 4,
2255 ('Are you taking an address of a cast? '
2256 'This is dangerous: could be a temp var. '
2257 'Take the address before doing the cast, rather than after'))
2258
2259 # Check for people declaring static/global STL strings at the top level.
2260 # This is dangerous because the C++ language does not guarantee that
2261 # globals with constructors are initialized before the first access.
2262 match = Match(
2263 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2264 line)
2265 # Make sure it's not a function.
2266 # Function template specialization looks like: "string foo<Type>(...".
2267 # Class template definitions look like: "string Foo<Type>::Method(...".
2268 if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2269 match.group(3)):
2270 error(filename, linenum, 'runtime/string', 4,
2271 'For a static/global string constant, use a C style string instead: '
2272 '"%schar %s[]".' %
2273 (match.group(1), match.group(2)))
2274
2275 # Check that we're not using RTTI outside of testing code.
2276 if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
2277 error(filename, linenum, 'runtime/rtti', 5,
2278 'Do not use dynamic_cast<>. If you need to cast within a class '
2279 "hierarchy, use static_cast<> to upcast. Google doesn't support "
2280 'RTTI.')
2281
2282 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2283 error(filename, linenum, 'runtime/init', 4,
2284 'You seem to be initializing a member variable with itself.')
2285
2286 if file_extension == 'h':
2287 # TODO(unknown): check that 1-arg constructors are explicit.
2288 # How to tell it's a constructor?
2289 # (handled in CheckForNonStandardConstructs for now)
2290 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
2291 # (level 1 error)
2292 pass
2293
2294 # Check if people are using the verboten C basic types. The only exception
2295 # we regularly allow is "unsigned short port" for port.
2296 if Search(r'\bshort port\b', line):
2297 if not Search(r'\bunsigned short port\b', line):
2298 error(filename, linenum, 'runtime/int', 4,
2299 'Use "unsigned short" for ports, not "short"')
2300 else:
2301 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
2302 if match:
2303 error(filename, linenum, 'runtime/int', 4,
2304 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
2305
2306 # When snprintf is used, the second argument shouldn't be a literal.
2307 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2308 if match:
2309 error(filename, linenum, 'runtime/printf', 3,
2310 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2311 'to snprintf.' % (match.group(1), match.group(2)))
2312
2313 # Check if some verboten C functions are being used.
2314 if Search(r'\bsprintf\b', line):
2315 error(filename, linenum, 'runtime/printf', 5,
2316 'Never use sprintf. Use snprintf instead.')
2317 match = Search(r'\b(strcpy|strcat)\b', line)
2318 if match:
2319 error(filename, linenum, 'runtime/printf', 4,
2320 'Almost always, snprintf is better than %s' % match.group(1))
2321
2322 if Search(r'\bsscanf\b', line):
2323 error(filename, linenum, 'runtime/printf', 1,
2324 'sscanf can be ok, but is slow and can overflow buffers.')
2325
2326 # Check for suspicious usage of "if" like
2327 # } if (a == b) {
2328 if Search(r'\}\s*if\s*\(', line):
2329 error(filename, linenum, 'readability/braces', 4,
2330 'Did you mean "else if"? If not, start a new line for "if".')
2331
2332 # Check for potential format string bugs like printf(foo).
2333 # We constrain the pattern not to pick things like DocidForPrintf(foo).
2334 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2335 match = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2336 if match:
2337 error(filename, linenum, 'runtime/printf', 4,
2338 'Potential format string bug. Do %s("%%s", %s) instead.'
2339 % (match.group(1), match.group(2)))
2340
2341 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2342 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2343 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
2344 error(filename, linenum, 'runtime/memset', 4,
2345 'Did you mean "memset(%s, 0, %s)"?'
2346 % (match.group(1), match.group(2)))
2347
2348 if Search(r'\busing namespace\b', line):
2349 error(filename, linenum, 'build/namespaces', 5,
2350 'Do not use namespace using-directives. '
2351 'Use using-declarations instead.')
2352
2353 # Detect variable-length arrays.
2354 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2355 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
2356 match.group(3).find(']') == -1):
2357 # Split the size using space and arithmetic operators as delimiters.
2358 # If any of the resulting tokens are not compile time constants then
2359 # report the error.
2360 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2361 is_const = True
2362 skip_next = False
2363 for tok in tokens:
2364 if skip_next:
2365 skip_next = False
2366 continue
2367
2368 if Search(r'sizeof\(.+\)', tok): continue
2369 if Search(r'arraysize\(\w+\)', tok): continue
2370
2371 tok = tok.lstrip('(')
2372 tok = tok.rstrip(')')
2373 if not tok: continue
2374 if Match(r'\d+', tok): continue
2375 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
2376 if Match(r'k[A-Z0-9]\w*', tok): continue
2377 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
2378 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
2379 # A catch all for tricky sizeof cases, including 'sizeof expression',
2380 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2381 # requires skipping the next token becasue we split on ' ' and '*'.
2382 if tok.startswith('sizeof'):
2383 skip_next = True
2384 continue
2385 is_const = False
2386 break
2387 if not is_const:
2388 error(filename, linenum, 'runtime/arrays', 1,
2389 'Do not use variable-length arrays. Use an appropriately named '
2390 "('k' followed by CamelCase) compile-time constant for the size.")
2391
2392 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
2393 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
2394 # in the class declaration.
2395 match = Match(
2396 (r'\s*'
2397 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
2398 r'\(.*\);$'),
2399 line)
2400 if match and linenum + 1 < clean_lines.NumLines():
2401 next_line = clean_lines.elided[linenum + 1]
2402 if not Search(r'^\s*};', next_line):
2403 error(filename, linenum, 'readability/constructors', 3,
2404 match.group(1) + ' should be the last thing in the class')
2405
2406 # Check for use of unnamed namespaces in header files. Registration
2407 # macros are typically OK, so we allow use of "namespace {" on lines
2408 # that end with backslashes.
2409 if (file_extension == 'h'
2410 and Search(r'\bnamespace\s*{', line)
2411 and line[-1] != '\\'):
2412 error(filename, linenum, 'build/namespaces', 4,
2413 'Do not use unnamed namespaces in header files. See '
2414 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2415 ' for more information.')
2416
2417
2418def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
2419 error):
2420 """Checks for a C-style cast by looking for the pattern.
2421
2422 This also handles sizeof(type) warnings, due to similarity of content.
2423
2424 Args:
2425 filename: The name of the current file.
2426 linenum: The number of the line to check.
2427 line: The line of code to check.
2428 raw_line: The raw line of code to check, with comments.
2429 cast_type: The string for the C++ cast to recommend. This is either
2430 reinterpret_cast or static_cast, depending.
2431 pattern: The regular expression used to find C-style casts.
2432 error: The function to call with any errors found.
2433 """
2434 match = Search(pattern, line)
2435 if not match:
2436 return
2437
2438 # e.g., sizeof(int)
2439 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
2440 if sizeof_match:
2441 error(filename, linenum, 'runtime/sizeof', 1,
2442 'Using sizeof(type). Use sizeof(varname) instead if possible')
2443 return
2444
2445 remainder = line[match.end(0):]
2446
2447 # The close paren is for function pointers as arguments to a function.
2448 # eg, void foo(void (*bar)(int));
2449 # The semicolon check is a more basic function check; also possibly a
2450 # function pointer typedef.
2451 # eg, void foo(int); or void foo(int) const;
2452 # The equals check is for function pointer assignment.
2453 # eg, void *(*foo)(int) = ...
2454 #
2455 # Right now, this will only catch cases where there's a single argument, and
2456 # it's unnamed. It should probably be expanded to check for multiple
2457 # arguments with some unnamed.
2458 function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2459 if function_match:
2460 if (not function_match.group(3) or
2461 function_match.group(3) == ';' or
2462 raw_line.find('/*') < 0):
2463 error(filename, linenum, 'readability/function', 3,
2464 'All parameters should be named in a function')
2465 return
2466
2467 # At this point, all that should be left is actual casts.
2468 error(filename, linenum, 'readability/casting', 4,
2469 'Using C-style cast. Use %s<%s>(...) instead' %
2470 (cast_type, match.group(1)))
2471
2472
2473_HEADERS_CONTAINING_TEMPLATES = (
2474 ('<deque>', ('deque',)),
2475 ('<functional>', ('unary_function', 'binary_function',
2476 'plus', 'minus', 'multiplies', 'divides', 'modulus',
2477 'negate',
2478 'equal_to', 'not_equal_to', 'greater', 'less',
2479 'greater_equal', 'less_equal',
2480 'logical_and', 'logical_or', 'logical_not',
2481 'unary_negate', 'not1', 'binary_negate', 'not2',
2482 'bind1st', 'bind2nd',
2483 'pointer_to_unary_function',
2484 'pointer_to_binary_function',
2485 'ptr_fun',
2486 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2487 'mem_fun_ref_t',
2488 'const_mem_fun_t', 'const_mem_fun1_t',
2489 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2490 'mem_fun_ref',
2491 )),
2492 ('<limits>', ('numeric_limits',)),
2493 ('<list>', ('list',)),
2494 ('<map>', ('map', 'multimap',)),
2495 ('<memory>', ('allocator',)),
2496 ('<queue>', ('queue', 'priority_queue',)),
2497 ('<set>', ('set', 'multiset',)),
2498 ('<stack>', ('stack',)),
2499 ('<string>', ('char_traits', 'basic_string',)),
2500 ('<utility>', ('pair',)),
2501 ('<vector>', ('vector',)),
2502
2503 # gcc extensions.
2504 # Note: std::hash is their hash, ::hash is our hash
2505 ('<hash_map>', ('hash_map', 'hash_multimap',)),
2506 ('<hash_set>', ('hash_set', 'hash_multiset',)),
2507 ('<slist>', ('slist',)),
2508 )
2509
2510_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2511 # We can trust with reasonable confidence that map gives us pair<>, too.
2512 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2513}
2514
2515_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2516
2517_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00002518for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2519 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002520 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2521 # type::max().
2522 _re_pattern_algorithm_header.append(
2523 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2524 _template,
2525 '<algorithm>'))
2526
2527_re_pattern_templates = []
2528for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2529 for _template in _templates:
2530 _re_pattern_templates.append(
2531 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2532 _template + '<>',
2533 _header))
2534
2535
erg@google.come35f7652009-06-19 20:52:09 +00002536def FilesBelongToSameModule(filename_cc, filename_h):
2537 """Check if these two filenames belong to the same module.
2538
2539 The concept of a 'module' here is a as follows:
2540 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
2541 same 'module' if they are in the same directory.
2542 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
2543 to belong to the same module here.
2544
2545 If the filename_cc contains a longer path than the filename_h, for example,
2546 '/absolute/path/to/base/sysinfo.cc', and this file would include
2547 'base/sysinfo.h', this function also produces the prefix needed to open the
2548 header. This is used by the caller of this function to more robustly open the
2549 header file. We don't have access to the real include paths in this context,
2550 so we need this guesswork here.
2551
2552 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
2553 according to this implementation. Because of this, this function gives
2554 some false positives. This should be sufficiently rare in practice.
2555
2556 Args:
2557 filename_cc: is the path for the .cc file
2558 filename_h: is the path for the header path
2559
2560 Returns:
2561 Tuple with a bool and a string:
2562 bool: True if filename_cc and filename_h belong to the same module.
2563 string: the additional prefix needed to open the header file.
2564 """
2565
2566 if not filename_cc.endswith('.cc'):
2567 return (False, '')
2568 filename_cc = filename_cc[:-len('.cc')]
2569 if filename_cc.endswith('_unittest'):
2570 filename_cc = filename_cc[:-len('_unittest')]
2571 elif filename_cc.endswith('_test'):
2572 filename_cc = filename_cc[:-len('_test')]
2573 filename_cc = filename_cc.replace('/public/', '/')
2574 filename_cc = filename_cc.replace('/internal/', '/')
2575
2576 if not filename_h.endswith('.h'):
2577 return (False, '')
2578 filename_h = filename_h[:-len('.h')]
2579 if filename_h.endswith('-inl'):
2580 filename_h = filename_h[:-len('-inl')]
2581 filename_h = filename_h.replace('/public/', '/')
2582 filename_h = filename_h.replace('/internal/', '/')
2583
2584 files_belong_to_same_module = filename_cc.endswith(filename_h)
2585 common_path = ''
2586 if files_belong_to_same_module:
2587 common_path = filename_cc[:-len(filename_h)]
2588 return files_belong_to_same_module, common_path
2589
2590
2591def UpdateIncludeState(filename, include_state, io=codecs):
2592 """Fill up the include_state with new includes found from the file.
2593
2594 Args:
2595 filename: the name of the header to read.
2596 include_state: an _IncludeState instance in which the headers are inserted.
2597 io: The io factory to use to read the file. Provided for testability.
2598
2599 Returns:
2600 True if a header was succesfully added. False otherwise.
2601 """
2602 headerfile = None
2603 try:
2604 headerfile = io.open(filename, 'r', 'utf8', 'replace')
2605 except IOError:
2606 return False
2607 linenum = 0
2608 for line in headerfile:
2609 linenum += 1
2610 clean_line = CleanseComments(line)
2611 match = _RE_PATTERN_INCLUDE.search(clean_line)
2612 if match:
2613 include = match.group(2)
2614 # The value formatting is cute, but not really used right now.
2615 # What matters here is that the key is in include_state.
2616 include_state.setdefault(include, '%s:%d' % (filename, linenum))
2617 return True
2618
2619
2620def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
2621 io=codecs):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002622 """Reports for missing stl includes.
2623
2624 This function will output warnings to make sure you are including the headers
2625 necessary for the stl containers and functions that you use. We only give one
2626 reason to include a header. For example, if you use both equal_to<> and
2627 less<> in a .h file, only one (the latter in the file) of these will be
2628 reported as a reason to include the <functional>.
2629
erg@google.com4e00b9a2009-01-12 23:05:11 +00002630 Args:
2631 filename: The name of the current file.
2632 clean_lines: A CleansedLines instance containing the file.
2633 include_state: An _IncludeState instance.
2634 error: The function to call with any errors found.
erg@google.come35f7652009-06-19 20:52:09 +00002635 io: The IO factory to use to read the header file. Provided for unittest
2636 injection.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002637 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00002638 required = {} # A map of header name to linenumber and the template entity.
2639 # Example of required: { '<functional>': (1219, 'less<>') }
2640
2641 for linenum in xrange(clean_lines.NumLines()):
2642 line = clean_lines.elided[linenum]
2643 if not line or line[0] == '#':
2644 continue
2645
2646 # String is special -- it is a non-templatized type in STL.
2647 if _RE_PATTERN_STRING.search(line):
2648 required['<string>'] = (linenum, 'string')
2649
2650 for pattern, template, header in _re_pattern_algorithm_header:
2651 if pattern.search(line):
2652 required[header] = (linenum, template)
2653
2654 # The following function is just a speed up, no semantics are changed.
2655 if not '<' in line: # Reduces the cpu time usage by skipping lines.
2656 continue
2657
2658 for pattern, template, header in _re_pattern_templates:
2659 if pattern.search(line):
2660 required[header] = (linenum, template)
2661
erg@google.come35f7652009-06-19 20:52:09 +00002662 # The policy is that if you #include something in foo.h you don't need to
2663 # include it again in foo.cc. Here, we will look at possible includes.
2664 # Let's copy the include_state so it is only messed up within this function.
2665 include_state = include_state.copy()
2666
2667 # Did we find the header for this file (if any) and succesfully load it?
2668 header_found = False
2669
2670 # Use the absolute path so that matching works properly.
2671 abs_filename = os.path.abspath(filename)
2672
2673 # For Emacs's flymake.
2674 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
2675 # by flymake and that file name might end with '_flymake.cc'. In that case,
2676 # restore original file name here so that the corresponding header file can be
2677 # found.
2678 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
2679 # instead of 'foo_flymake.h'
2680 emacs_flymake_suffix = '_flymake.cc'
2681 if abs_filename.endswith(emacs_flymake_suffix):
2682 abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cc'
2683
2684 # include_state is modified during iteration, so we iterate over a copy of
2685 # the keys.
2686 for header in include_state.keys(): #NOLINT
2687 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
2688 fullpath = common_path + header
2689 if same_module and UpdateIncludeState(fullpath, include_state, io):
2690 header_found = True
2691
2692 # If we can't find the header file for a .cc, assume it's because we don't
2693 # know where to look. In that case we'll give up as we're not sure they
2694 # didn't include it in the .h file.
2695 # TODO(unknown): Do a better job of finding .h files so we are confident that
2696 # not having the .h file means there isn't one.
2697 if filename.endswith('.cc') and not header_found:
2698 return
2699
erg@google.com4e00b9a2009-01-12 23:05:11 +00002700 # All the lines have been processed, report the errors found.
2701 for required_header_unstripped in required:
2702 template = required[required_header_unstripped][1]
2703 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2704 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2705 if [True for header in headers if header in include_state]:
2706 continue
2707 if required_header_unstripped.strip('<>"') not in include_state:
2708 error(filename, required[required_header_unstripped][0],
2709 'build/include_what_you_use', 4,
2710 'Add #include ' + required_header_unstripped + ' for ' + template)
2711
2712
2713def ProcessLine(filename, file_extension,
2714 clean_lines, line, include_state, function_state,
2715 class_state, error):
2716 """Processes a single line in the file.
2717
2718 Args:
2719 filename: Filename of the file that is being processed.
2720 file_extension: The extension (dot not included) of the file.
2721 clean_lines: An array of strings, each representing a line of the file,
2722 with comments stripped.
2723 line: Number of line being processed.
2724 include_state: An _IncludeState instance in which the headers are inserted.
2725 function_state: A _FunctionState instance which counts function lines, etc.
2726 class_state: A _ClassState instance which maintains information about
2727 the current stack of nested class declarations being parsed.
2728 error: A callable to which errors are reported, which takes 4 arguments:
2729 filename, line number, error level, and message
2730
2731 """
2732 raw_lines = clean_lines.raw_lines
2733 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
2734 if Search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines
2735 return
2736 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
2737 CheckStyle(filename, clean_lines, line, file_extension, error)
2738 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
2739 error)
2740 CheckForNonStandardConstructs(filename, clean_lines, line,
2741 class_state, error)
2742 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com36649102009-03-25 21:18:36 +00002743 CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002744
2745
2746def ProcessFileData(filename, file_extension, lines, error):
2747 """Performs lint checks and reports any errors to the given error function.
2748
2749 Args:
2750 filename: Filename of the file that is being processed.
2751 file_extension: The extension (dot not included) of the file.
2752 lines: An array of strings, each representing a line of the file, with the
2753 last element being empty if the file is termined with a newline.
2754 error: A callable to which errors are reported, which takes 4 arguments:
2755 """
2756 lines = (['// marker so line numbers and indices both start at 1'] + lines +
2757 ['// marker so line numbers end in a known way'])
2758
2759 include_state = _IncludeState()
2760 function_state = _FunctionState()
2761 class_state = _ClassState()
2762
2763 CheckForCopyright(filename, lines, error)
2764
2765 if file_extension == 'h':
2766 CheckForHeaderGuard(filename, lines, error)
2767
2768 RemoveMultiLineComments(filename, lines, error)
2769 clean_lines = CleansedLines(lines)
2770 for line in xrange(clean_lines.NumLines()):
2771 ProcessLine(filename, file_extension, clean_lines, line,
2772 include_state, function_state, class_state, error)
2773 class_state.CheckFinished(filename, error)
2774
2775 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
2776
2777 # We check here rather than inside ProcessLine so that we see raw
2778 # lines rather than "cleaned" lines.
2779 CheckForUnicodeReplacementCharacters(filename, lines, error)
2780
2781 CheckForNewlineAtEOF(filename, lines, error)
2782
2783
2784def ProcessFile(filename, vlevel):
2785 """Does google-lint on a single file.
2786
2787 Args:
2788 filename: The name of the file to parse.
2789
2790 vlevel: The level of errors to report. Every error of confidence
2791 >= verbose_level will be reported. 0 is a good default.
2792 """
2793
2794 _SetVerboseLevel(vlevel)
2795
2796 try:
2797 # Support the UNIX convention of using "-" for stdin. Note that
2798 # we are not opening the file with universal newline support
2799 # (which codecs doesn't support anyway), so the resulting lines do
2800 # contain trailing '\r' characters if we are reading a file that
2801 # has CRLF endings.
2802 # If after the split a trailing '\r' is present, it is removed
2803 # below. If it is not expected to be present (i.e. os.linesep !=
2804 # '\r\n' as in Windows), a warning is issued below if this file
2805 # is processed.
2806
2807 if filename == '-':
2808 lines = codecs.StreamReaderWriter(sys.stdin,
2809 codecs.getreader('utf8'),
2810 codecs.getwriter('utf8'),
2811 'replace').read().split('\n')
2812 else:
2813 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
2814
2815 carriage_return_found = False
2816 # Remove trailing '\r'.
2817 for linenum in range(len(lines)):
2818 if lines[linenum].endswith('\r'):
2819 lines[linenum] = lines[linenum].rstrip('\r')
2820 carriage_return_found = True
2821
2822 except IOError:
2823 sys.stderr.write(
2824 "Skipping input '%s': Can't open for reading\n" % filename)
2825 return
2826
2827 # Note, if no dot is found, this will give the entire filename as the ext.
2828 file_extension = filename[filename.rfind('.') + 1:]
2829
2830 # When reading from stdin, the extension is unknown, so no cpplint tests
2831 # should rely on the extension.
2832 if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
2833 and file_extension != 'cpp'):
2834 sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
2835 else:
2836 ProcessFileData(filename, file_extension, lines, Error)
2837 if carriage_return_found and os.linesep != '\r\n':
2838 # Use 0 for linenum since outputing only one error for potentially
2839 # several lines.
2840 Error(filename, 0, 'whitespace/newline', 1,
2841 'One or more unexpected \\r (^M) found;'
2842 'better to use only a \\n')
2843
2844 sys.stderr.write('Done processing %s\n' % filename)
2845
2846
2847def PrintUsage(message):
2848 """Prints a brief usage string and exits, optionally with an error message.
2849
2850 Args:
2851 message: The optional error message.
2852 """
2853 sys.stderr.write(_USAGE)
2854 if message:
2855 sys.exit('\nFATAL ERROR: ' + message)
2856 else:
2857 sys.exit(1)
2858
2859
2860def PrintCategories():
2861 """Prints a list of all the error-categories used by error messages.
2862
2863 These are the categories used to filter messages via --filter.
2864 """
2865 sys.stderr.write(_ERROR_CATEGORIES)
2866 sys.exit(0)
2867
2868
2869def ParseArguments(args):
2870 """Parses the command line arguments.
2871
2872 This may set the output format and verbosity level as side-effects.
2873
2874 Args:
2875 args: The command line arguments:
2876
2877 Returns:
2878 The list of filenames to lint.
2879 """
2880 try:
2881 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
2882 'filter='])
2883 except getopt.GetoptError:
2884 PrintUsage('Invalid arguments.')
2885
2886 verbosity = _VerboseLevel()
2887 output_format = _OutputFormat()
2888 filters = ''
2889
2890 for (opt, val) in opts:
2891 if opt == '--help':
2892 PrintUsage(None)
2893 elif opt == '--output':
2894 if not val in ('emacs', 'vs7'):
2895 PrintUsage('The only allowed output formats are emacs and vs7.')
2896 output_format = val
2897 elif opt == '--verbose':
2898 verbosity = int(val)
2899 elif opt == '--filter':
2900 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00002901 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002902 PrintCategories()
2903
2904 if not filenames:
2905 PrintUsage('No files were specified.')
2906
2907 _SetOutputFormat(output_format)
2908 _SetVerboseLevel(verbosity)
2909 _SetFilters(filters)
2910
2911 return filenames
2912
2913
2914def main():
2915 filenames = ParseArguments(sys.argv[1:])
2916
2917 # Change stderr to write with replacement characters so we don't die
2918 # if we try to print something containing non-ASCII characters.
2919 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
2920 codecs.getreader('utf8'),
2921 codecs.getwriter('utf8'),
2922 'replace')
2923
2924 _cpplint_state.ResetErrorCount()
2925 for filename in filenames:
2926 ProcessFile(filename, _cpplint_state.verbose_level)
2927 sys.stderr.write('Total errors found: %d\n' % _cpplint_state.error_count)
2928 sys.exit(_cpplint_state.error_count > 0)
2929
2930
2931if __name__ == '__main__':
2932 main()