blob: 182f246d8e8cbd8a58a93cf5b87e7dd4bcfb1d95 [file] [log] [blame]
erg@google.com4e00b9a2009-01-12 23:05:11 +00001#!/usr/bin/python2.4
2#
3# cpplint.py is Copyright (C) 2009 Google Inc.
4#
5# It is free software; you can redistribute it and/or modify it under the
6# terms of either:
7#
8# a) the GNU General Public License as published by the Free Software
9# Foundation; either version 1, or (at your option) any later version, or
10#
11# b) the "Artistic License".
12
13# Here are some issues that I've had people identify in my code during reviews,
14# that I think are possible to flag automatically in a lint tool. If these were
15# caught by lint, it would save time both for myself and that of my reviewers.
16# Most likely, some of these are beyond the scope of the current lint framework,
17# but I think it is valuable to retain these wish-list items even if they cannot
18# be immediately implemented.
19#
20# Suggestions
21# -----------
22# - Check for no 'explicit' for multi-arg ctor
23# - Check for boolean assign RHS in parens
24# - Check for ctor initializer-list colon position and spacing
25# - Check that if there's a ctor, there should be a dtor
26# - Check accessors that return non-pointer member variables are
27# declared const
28# - Check accessors that return non-const pointer member vars are
29# *not* declared const
30# - Check for using public includes for testing
31# - Check for spaces between brackets in one-line inline method
32# - Check for no assert()
33# - Check for spaces surrounding operators
34# - Check for 0 in pointer context (should be NULL)
35# - Check for 0 in char context (should be '\0')
36# - Check for camel-case method name conventions for methods
37# that are not simple inline getters and setters
38# - Check that base classes have virtual destructors
39# put " // namespace" after } that closes a namespace, with
40# namespace's name after 'namespace' if it is named.
41# - Do not indent namespace contents
42# - Avoid inlining non-trivial constructors in header files
43# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
44# - Check for old-school (void) cast for call-sites of functions
45# ignored return value
46# - Check gUnit usage of anonymous namespace
47# - Check for class declaration order (typedefs, consts, enums,
48# ctor(s?), dtor, friend declarations, methods, member vars)
49#
50
51"""Does google-lint on c++ files.
52
53The goal of this script is to identify places in the code that *may*
54be in non-compliance with google style. It does not attempt to fix
55up these problems -- the point is to educate. It does also not
56attempt to find all problems, or to ensure that everything it does
57find is legitimately a problem.
58
59In particular, we can get very confused by /* and // inside strings!
60We do a small hack, which is to ignore //'s with "'s after them on the
61same line, but it is far from perfect (in either direction).
62"""
63
64import codecs
65import getopt
66import math # for log
67import os
68import re
69import sre_compile
70import string
71import sys
72import unicodedata
73
74
75_USAGE = """
76Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
77 <file> [file] ...
78
79 The style guidelines this tries to follow are those in
80 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
81
82 Every problem is given a confidence score from 1-5, with 5 meaning we are
83 certain of the problem, and 1 meaning it could be a legitimate construct.
84 This will miss some errors, and is not a substitute for a code review.
85
86 To prevent specific lines from being linted, add a '// NOLINT' comment to the
87 end of the line.
88
89 The files passed in will be linted; at least one file must be provided.
90 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
91
92 Flags:
93
94 output=vs7
95 By default, the output is formatted to ease emacs parsing. Visual Studio
96 compatible output (vs7) may also be used. Other formats are unsupported.
97
98 verbose=#
99 Specify a number 0-5 to restrict errors to certain verbosity levels.
100
101 filter=-x,+y,...
102 Specify a comma-separated list of category-filters to apply: only
103 error messages whose category names pass the filters will be printed.
104 (Category names are printed with the message and look like
105 "[whitespace/indent]".) Filters are evaluated left to right.
106 "-FOO" and "FOO" means "do not print categories that start with FOO".
107 "+FOO" means "do print categories that start with FOO".
108
109 Examples: --filter=-whitespace,+whitespace/braces
110 --filter=whitespace,runtime/printf,+runtime/printf_format
111 --filter=-,+build/include_what_you_use
112
113 To see a list of all the categories used in cpplint, pass no arg:
114 --filter=
115"""
116
117# We categorize each error message we print. Here are the categories.
118# We want an explicit list so we can list them all in cpplint --filter=.
119# If you add a new error message with a new category, add it to the list
120# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.coma87abb82009-02-24 01:41:01 +0000121# \ used for clearer layout -- pylint: disable-msg=C6013
122_ERROR_CATEGORIES = '''\
erg@google.com4e00b9a2009-01-12 23:05:11 +0000123 build/class
124 build/deprecated
125 build/endif_comment
126 build/forward_decl
127 build/header_guard
128 build/include
129 build/include_order
130 build/include_what_you_use
131 build/namespaces
132 build/printf_format
133 build/storage_class
134 legal/copyright
135 readability/braces
136 readability/casting
137 readability/check
138 readability/constructors
139 readability/fn_size
140 readability/function
141 readability/multiline_comment
142 readability/multiline_string
143 readability/streams
144 readability/todo
145 readability/utf8
146 runtime/arrays
147 runtime/casting
148 runtime/explicit
149 runtime/int
150 runtime/init
erg@google.com36649102009-03-25 21:18:36 +0000151 runtime/invalid_increment
erg@google.com4e00b9a2009-01-12 23:05:11 +0000152 runtime/memset
153 runtime/printf
154 runtime/printf_format
155 runtime/references
156 runtime/rtti
157 runtime/sizeof
158 runtime/string
159 runtime/threadsafe_fn
160 runtime/virtual
161 whitespace/blank_line
162 whitespace/braces
163 whitespace/comma
164 whitespace/comments
165 whitespace/end_of_line
166 whitespace/ending_newline
167 whitespace/indent
168 whitespace/labels
169 whitespace/line_length
170 whitespace/newline
171 whitespace/operators
172 whitespace/parens
173 whitespace/semicolon
174 whitespace/tab
175 whitespace/todo
erg@google.coma87abb82009-02-24 01:41:01 +0000176'''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000177
178# We used to check for high-bit characters, but after much discussion we
179# decided those were OK, as long as they were in UTF-8 and didn't represent
180# hard-coded international strings, which belong in a seperate i18n file.
181
182# Headers that we consider STL headers.
183_STL_HEADERS = frozenset([
184 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
185 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
186 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
187 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
188 'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
189 'utility', 'vector', 'vector.h',
190 ])
191
192
193# Non-STL C++ system headers.
194_CPP_HEADERS = frozenset([
195 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
196 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
197 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
198 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
199 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
200 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
201 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
202 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
203 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
204 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
205 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
206 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
207 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
208 ])
209
210
211# Assertion macros. These are defined in base/logging.h and
212# testing/base/gunit.h. Note that the _M versions need to come first
213# for substring matching to work.
214_CHECK_MACROS = [
215 'CHECK',
216 'EXPECT_TRUE_M', 'EXPECT_TRUE',
217 'ASSERT_TRUE_M', 'ASSERT_TRUE',
218 'EXPECT_FALSE_M', 'EXPECT_FALSE',
219 'ASSERT_FALSE_M', 'ASSERT_FALSE',
220 ]
221
222# Replacement macros for CHECK/EXPECT_TRUE/EXPECT_FALSE
223_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
224
225for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
226 ('>=', 'GE'), ('>', 'GT'),
227 ('<=', 'LE'), ('<', 'LT')]:
228 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
229 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
230 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
231 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
232 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
233
234for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
235 ('>=', 'LT'), ('>', 'LE'),
236 ('<=', 'GT'), ('<', 'GE')]:
237 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
238 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
239 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
240 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
241
242
243# These constants define types of headers for use with
244# _IncludeState.CheckNextIncludeOrder().
245_C_SYS_HEADER = 1
246_CPP_SYS_HEADER = 2
247_LIKELY_MY_HEADER = 3
248_POSSIBLE_MY_HEADER = 4
249_OTHER_HEADER = 5
250
251
252_regexp_compile_cache = {}
253
254
255def Match(pattern, s):
256 """Matches the string with the pattern, caching the compiled regexp."""
257 # The regexp compilation caching is inlined in both Match and Search for
258 # performance reasons; factoring it out into a separate function turns out
259 # to be noticeably expensive.
260 if not pattern in _regexp_compile_cache:
261 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
262 return _regexp_compile_cache[pattern].match(s)
263
264
265def Search(pattern, s):
266 """Searches the string for the pattern, caching the compiled regexp."""
267 if not pattern in _regexp_compile_cache:
268 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
269 return _regexp_compile_cache[pattern].search(s)
270
271
272class _IncludeState(dict):
273 """Tracks line numbers for includes, and the order in which includes appear.
274
275 As a dict, an _IncludeState object serves as a mapping between include
276 filename and line number on which that file was included.
277
278 Call CheckNextIncludeOrder() once for each header in the file, passing
279 in the type constants defined above. Calls in an illegal order will
280 raise an _IncludeError with an appropriate error message.
281
282 """
283 # self._section will move monotonically through this set. If it ever
284 # needs to move backwards, CheckNextIncludeOrder will raise an error.
285 _INITIAL_SECTION = 0
286 _MY_H_SECTION = 1
287 _C_SECTION = 2
288 _CPP_SECTION = 3
289 _OTHER_H_SECTION = 4
290
291 _TYPE_NAMES = {
292 _C_SYS_HEADER: 'C system header',
293 _CPP_SYS_HEADER: 'C++ system header',
294 _LIKELY_MY_HEADER: 'header this file implements',
295 _POSSIBLE_MY_HEADER: 'header this file may implement',
296 _OTHER_HEADER: 'other header',
297 }
298 _SECTION_NAMES = {
299 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
300 _MY_H_SECTION: 'a header this file implements',
301 _C_SECTION: 'C system header',
302 _CPP_SECTION: 'C++ system header',
303 _OTHER_H_SECTION: 'other header',
304 }
305
306 def __init__(self):
307 dict.__init__(self)
308 self._section = self._INITIAL_SECTION
309
310 def CheckNextIncludeOrder(self, header_type):
311 """Returns a non-empty error message if the next header is out of order.
312
313 This function also updates the internal state to be ready to check
314 the next include.
315
316 Args:
317 header_type: One of the _XXX_HEADER constants defined above.
318
319 Returns:
320 The empty string if the header is in the right order, or an
321 error message describing what's wrong.
322
323 """
324 error_message = ('Found %s after %s' %
325 (self._TYPE_NAMES[header_type],
326 self._SECTION_NAMES[self._section]))
327
328 if header_type == _C_SYS_HEADER:
329 if self._section <= self._C_SECTION:
330 self._section = self._C_SECTION
331 else:
332 return error_message
333 elif header_type == _CPP_SYS_HEADER:
334 if self._section <= self._CPP_SECTION:
335 self._section = self._CPP_SECTION
336 else:
337 return error_message
338 elif header_type == _LIKELY_MY_HEADER:
339 if self._section <= self._MY_H_SECTION:
340 self._section = self._MY_H_SECTION
341 else:
342 self._section = self._OTHER_H_SECTION
343 elif header_type == _POSSIBLE_MY_HEADER:
344 if self._section <= self._MY_H_SECTION:
345 self._section = self._MY_H_SECTION
346 else:
347 # This will always be the fallback because we're not sure
348 # enough that the header is associated with this file.
349 self._section = self._OTHER_H_SECTION
350 else:
351 assert header_type == _OTHER_HEADER
352 self._section = self._OTHER_H_SECTION
353
354 return ''
355
356
357class _CppLintState(object):
358 """Maintains module-wide state.."""
359
360 def __init__(self):
361 self.verbose_level = 1 # global setting.
362 self.error_count = 0 # global count of reported errors
363 self.filters = [] # filters to apply when emitting error messages
364
365 # output format:
366 # "emacs" - format that emacs can parse (default)
367 # "vs7" - format that Microsoft Visual Studio 7 can parse
368 self.output_format = 'emacs'
369
370 def SetOutputFormat(self, output_format):
371 """Sets the output format for errors."""
372 self.output_format = output_format
373
374 def SetVerboseLevel(self, level):
375 """Sets the module's verbosity, and returns the previous setting."""
376 last_verbose_level = self.verbose_level
377 self.verbose_level = level
378 return last_verbose_level
379
380 def SetFilters(self, filters):
381 """Sets the error-message filters.
382
383 These filters are applied when deciding whether to emit a given
384 error message.
385
386 Args:
387 filters: A string of comma-separated filters (eg "+whitespace/indent").
388 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000389
390 Raises:
391 ValueError: The comma-separated filters did not all start with '+' or '-'.
392 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000393 """
394 if not filters:
395 self.filters = []
396 else:
397 self.filters = filters.split(',')
398 for filt in self.filters:
399 if not (filt.startswith('+') or filt.startswith('-')):
400 raise ValueError('Every filter in --filters must start with + or -'
401 ' (%s does not)' % filt)
402
403 def ResetErrorCount(self):
404 """Sets the module's error statistic back to zero."""
405 self.error_count = 0
406
407 def IncrementErrorCount(self):
408 """Bumps the module's error statistic."""
409 self.error_count += 1
410
411
412_cpplint_state = _CppLintState()
413
414
415def _OutputFormat():
416 """Gets the module's output format."""
417 return _cpplint_state.output_format
418
419
420def _SetOutputFormat(output_format):
421 """Sets the module's output format."""
422 _cpplint_state.SetOutputFormat(output_format)
423
424
425def _VerboseLevel():
426 """Returns the module's verbosity setting."""
427 return _cpplint_state.verbose_level
428
429
430def _SetVerboseLevel(level):
431 """Sets the module's verbosity, and returns the previous setting."""
432 return _cpplint_state.SetVerboseLevel(level)
433
434
435def _Filters():
436 """Returns the module's list of output filters, as a list."""
437 return _cpplint_state.filters
438
439
440def _SetFilters(filters):
441 """Sets the module's error-message filters.
442
443 These filters are applied when deciding whether to emit a given
444 error message.
445
446 Args:
447 filters: A string of comma-separated filters (eg "whitespace/indent").
448 Each filter should start with + or -; else we die.
449 """
450 _cpplint_state.SetFilters(filters)
451
452
453class _FunctionState(object):
454 """Tracks current function name and the number of lines in its body."""
455
456 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
457 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
458
459 def __init__(self):
460 self.in_a_function = False
461 self.lines_in_function = 0
462 self.current_function = ''
463
464 def Begin(self, function_name):
465 """Start analyzing function body.
466
467 Args:
468 function_name: The name of the function being tracked.
469 """
470 self.in_a_function = True
471 self.lines_in_function = 0
472 self.current_function = function_name
473
474 def Count(self):
475 """Count line in current function body."""
476 if self.in_a_function:
477 self.lines_in_function += 1
478
479 def Check(self, error, filename, linenum):
480 """Report if too many lines in function body.
481
482 Args:
483 error: The function to call with any errors found.
484 filename: The name of the current file.
485 linenum: The number of the line to check.
486 """
487 if Match(r'T(EST|est)', self.current_function):
488 base_trigger = self._TEST_TRIGGER
489 else:
490 base_trigger = self._NORMAL_TRIGGER
491 trigger = base_trigger * 2**_VerboseLevel()
492
493 if self.lines_in_function > trigger:
494 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
495 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
496 if error_level > 5:
497 error_level = 5
498 error(filename, linenum, 'readability/fn_size', error_level,
499 'Small and focused functions are preferred:'
500 ' %s has %d non-comment lines'
501 ' (error triggered by exceeding %d lines).' % (
502 self.current_function, self.lines_in_function, trigger))
503
504 def End(self):
505 """Stop analizing function body."""
506 self.in_a_function = False
507
508
509class _IncludeError(Exception):
510 """Indicates a problem with the include order in a file."""
511 pass
512
513
514class FileInfo:
515 """Provides utility functions for filenames.
516
517 FileInfo provides easy access to the components of a file's path
518 relative to the project root.
519 """
520
521 def __init__(self, filename):
522 self._filename = filename
523
524 def FullName(self):
525 """Make Windows paths like Unix."""
526 return os.path.abspath(self._filename).replace('\\', '/')
527
528 def RepositoryName(self):
529 """FullName after removing the local path to the repository.
530
531 If we have a real absolute path name here we can try to do something smart:
532 detecting the root of the checkout and truncating /path/to/checkout from
533 the name so that we get header guards that don't include things like
534 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
535 people on different computers who have checked the source out to different
536 locations won't see bogus errors.
537 """
538 fullname = self.FullName()
539
540 if os.path.exists(fullname):
541 project_dir = os.path.dirname(fullname)
542
543 if os.path.exists(os.path.join(project_dir, ".svn")):
544 # If there's a .svn file in the current directory, we recursively look
545 # up the directory tree for the top of the SVN checkout
546 root_dir = project_dir
547 one_up_dir = os.path.dirname(root_dir)
548 while os.path.exists(os.path.join(one_up_dir, ".svn")):
549 root_dir = os.path.dirname(root_dir)
550 one_up_dir = os.path.dirname(one_up_dir)
551
552 prefix = os.path.commonprefix([root_dir, project_dir])
553 return fullname[len(prefix) + 1:]
554
555 # Not SVN? Try to find a git top level directory by searching up from the
556 # current path.
557 root_dir = os.path.dirname(fullname)
558 while (root_dir != os.path.dirname(root_dir) and
559 not os.path.exists(os.path.join(root_dir, ".git"))):
560 root_dir = os.path.dirname(root_dir)
561 if os.path.exists(os.path.join(root_dir, ".git")):
562 prefix = os.path.commonprefix([root_dir, project_dir])
563 return fullname[len(prefix) + 1:]
564
565 # Don't know what to do; header guard warnings may be wrong...
566 return fullname
567
568 def Split(self):
569 """Splits the file into the directory, basename, and extension.
570
571 For 'chrome/browser/browser.cc', Split() would
572 return ('chrome/browser', 'browser', '.cc')
573
574 Returns:
575 A tuple of (directory, basename, extension).
576 """
577
578 googlename = self.RepositoryName()
579 project, rest = os.path.split(googlename)
580 return (project,) + os.path.splitext(rest)
581
582 def BaseName(self):
583 """File base name - text after the final slash, before the final period."""
584 return self.Split()[1]
585
586 def Extension(self):
587 """File extension - text following the final period."""
588 return self.Split()[2]
589
590 def NoExtension(self):
591 """File has no source file extension."""
592 return '/'.join(self.Split()[0:2])
593
594 def IsSource(self):
595 """File has a source file extension."""
596 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
597
598
599def _ShouldPrintError(category, confidence):
600 """Returns true iff confidence >= verbose, and category passes filter."""
601 # There are two ways we might decide not to print an error message:
602 # the verbosity level isn't high enough, or the filters filter it out.
603 if confidence < _cpplint_state.verbose_level:
604 return False
605
606 is_filtered = False
607 for one_filter in _Filters():
608 if one_filter.startswith('-'):
609 if category.startswith(one_filter[1:]):
610 is_filtered = True
611 elif one_filter.startswith('+'):
612 if category.startswith(one_filter[1:]):
613 is_filtered = False
614 else:
615 assert False # should have been checked for in SetFilter.
616 if is_filtered:
617 return False
618
619 return True
620
621
622def Error(filename, linenum, category, confidence, message):
623 """Logs the fact we've found a lint error.
624
625 We log where the error was found, and also our confidence in the error,
626 that is, how certain we are this is a legitimate style regression, and
627 not a misidentification or a use that's sometimes justified.
628
629 Args:
630 filename: The name of the file containing the error.
631 linenum: The number of the line containing the error.
632 category: A string used to describe the "category" this bug
633 falls under: "whitespace", say, or "runtime". Categories
634 may have a hierarchy separated by slashes: "whitespace/indent".
635 confidence: A number from 1-5 representing a confidence score for
636 the error, with 5 meaning that we are certain of the problem,
637 and 1 meaning that it could be a legitimate construct.
638 message: The error message.
639 """
640 # There are two ways we might decide not to print an error message:
641 # the verbosity level isn't high enough, or the filters filter it out.
642 if _ShouldPrintError(category, confidence):
643 _cpplint_state.IncrementErrorCount()
644 if _cpplint_state.output_format == 'vs7':
645 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
646 filename, linenum, message, category, confidence))
647 else:
648 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
649 filename, linenum, message, category, confidence))
650
651
652# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
653_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
654 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
655# Matches strings. Escape codes should already be removed by ESCAPES.
656_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
657# Matches characters. Escape codes should already be removed by ESCAPES.
658_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
659# Matches multi-line C++ comments.
660# This RE is a little bit more complicated than one might expect, because we
661# have to take care of space removals tools so we can handle comments inside
662# statements better.
663# The current rule is: We only clear spaces from both sides when we're at the
664# end of the line. Otherwise, we try to remove spaces from the right side,
665# if this doesn't work we try on left side but only if there's a non-character
666# on the right.
667_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
668 r"""(\s*/\*.*\*/\s*$|
669 /\*.*\*/\s+|
670 \s+/\*.*\*/(?=\W)|
671 /\*.*\*/)""", re.VERBOSE)
672
673
674def IsCppString(line):
675 """Does line terminate so, that the next symbol is in string constant.
676
677 This function does not consider single-line nor multi-line comments.
678
679 Args:
680 line: is a partial line of code starting from the 0..n.
681
682 Returns:
683 True, if next character appended to 'line' is inside a
684 string constant.
685 """
686
687 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
688 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
689
690
691def FindNextMultiLineCommentStart(lines, lineix):
692 """Find the beginning marker for a multiline comment."""
693 while lineix < len(lines):
694 if lines[lineix].strip().startswith('/*'):
695 # Only return this marker if the comment goes beyond this line
696 if lines[lineix].strip().find('*/', 2) < 0:
697 return lineix
698 lineix += 1
699 return len(lines)
700
701
702def FindNextMultiLineCommentEnd(lines, lineix):
703 """We are inside a comment, find the end marker."""
704 while lineix < len(lines):
705 if lines[lineix].strip().endswith('*/'):
706 return lineix
707 lineix += 1
708 return len(lines)
709
710
711def RemoveMultiLineCommentsFromRange(lines, begin, end):
712 """Clears a range of lines for multi-line comments."""
713 # Having // dummy comments makes the lines non-empty, so we will not get
714 # unnecessary blank line warnings later in the code.
715 for i in range(begin, end):
716 lines[i] = '// dummy'
717
718
719def RemoveMultiLineComments(filename, lines, error):
720 """Removes multiline (c-style) comments from lines."""
721 lineix = 0
722 while lineix < len(lines):
723 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
724 if lineix_begin >= len(lines):
725 return
726 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
727 if lineix_end >= len(lines):
728 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
729 'Could not find end of multi-line comment')
730 return
731 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
732 lineix = lineix_end + 1
733
734
735def CleanseComments(line):
736 """Removes //-comments and single-line C-style /* */ comments.
737
738 Args:
739 line: A line of C++ source.
740
741 Returns:
742 The line with single-line comments removed.
743 """
744 commentpos = line.find('//')
745 if commentpos != -1 and not IsCppString(line[:commentpos]):
746 line = line[:commentpos]
747 # get rid of /* ... */
748 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
749
750
erg@google.coma87abb82009-02-24 01:41:01 +0000751class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000752 """Holds 3 copies of all lines with different preprocessing applied to them.
753
754 1) elided member contains lines without strings and comments,
755 2) lines member contains lines without comments, and
756 3) raw member contains all the lines without processing.
757 All these three members are of <type 'list'>, and of the same length.
758 """
759
760 def __init__(self, lines):
761 self.elided = []
762 self.lines = []
763 self.raw_lines = lines
764 self.num_lines = len(lines)
765 for linenum in range(len(lines)):
766 self.lines.append(CleanseComments(lines[linenum]))
767 elided = self._CollapseStrings(lines[linenum])
768 self.elided.append(CleanseComments(elided))
769
770 def NumLines(self):
771 """Returns the number of lines represented."""
772 return self.num_lines
773
774 @staticmethod
775 def _CollapseStrings(elided):
776 """Collapses strings and chars on a line to simple "" or '' blocks.
777
778 We nix strings first so we're not fooled by text like '"http://"'
779
780 Args:
781 elided: The line being processed.
782
783 Returns:
784 The line with collapsed strings.
785 """
786 if not _RE_PATTERN_INCLUDE.match(elided):
787 # Remove escaped characters first to make quote/single quote collapsing
788 # basic. Things that look like escaped characters shouldn't occur
789 # outside of strings and chars.
790 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
791 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
792 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
793 return elided
794
795
796def CloseExpression(clean_lines, linenum, pos):
797 """If input points to ( or { or [, finds the position that closes it.
798
799 If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
800 linenum/pos that correspond to the closing of the expression.
801
802 Args:
803 clean_lines: A CleansedLines instance containing the file.
804 linenum: The number of the line to check.
805 pos: A position on the line.
806
807 Returns:
808 A tuple (line, linenum, pos) pointer *past* the closing brace, or
809 (line, len(lines), -1) if we never find a close. Note we ignore
810 strings and comments when matching; and the line we return is the
811 'cleansed' line at linenum.
812 """
813
814 line = clean_lines.elided[linenum]
815 startchar = line[pos]
816 if startchar not in '({[':
817 return (line, clean_lines.NumLines(), -1)
818 if startchar == '(': endchar = ')'
819 if startchar == '[': endchar = ']'
820 if startchar == '{': endchar = '}'
821
822 num_open = line.count(startchar) - line.count(endchar)
823 while linenum < clean_lines.NumLines() and num_open > 0:
824 linenum += 1
825 line = clean_lines.elided[linenum]
826 num_open += line.count(startchar) - line.count(endchar)
827 # OK, now find the endchar that actually got us back to even
828 endpos = len(line)
829 while num_open >= 0:
830 endpos = line.rfind(')', 0, endpos)
831 num_open -= 1 # chopped off another )
832 return (line, linenum, endpos + 1)
833
834
835def CheckForCopyright(filename, lines, error):
836 """Logs an error if no Copyright message appears at the top of the file."""
837
838 # We'll say it should occur by line 10. Don't forget there's a
839 # dummy line at the front.
840 for line in xrange(1, min(len(lines), 11)):
841 if re.search(r'Copyright', lines[line], re.I): break
842 else: # means no copyright line was found
843 error(filename, 0, 'legal/copyright', 5,
844 'No copyright message found. '
845 'You should have a line: "Copyright [year] <Copyright Owner>"')
846
847
848def GetHeaderGuardCPPVariable(filename):
849 """Returns the CPP variable that should be used as a header guard.
850
851 Args:
852 filename: The name of a C++ header file.
853
854 Returns:
855 The CPP variable that should be used as a header guard in the
856 named file.
857
858 """
859
860 fileinfo = FileInfo(filename)
861 return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
862
863
864def CheckForHeaderGuard(filename, lines, error):
865 """Checks that the file contains a header guard.
866
erg@google.coma87abb82009-02-24 01:41:01 +0000867 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +0000868 headers, checks that the full pathname is used.
869
870 Args:
871 filename: The name of the C++ header file.
872 lines: An array of strings, each representing a line of the file.
873 error: The function to call with any errors found.
874 """
875
876 cppvar = GetHeaderGuardCPPVariable(filename)
877
878 ifndef = None
879 ifndef_linenum = 0
880 define = None
881 endif = None
882 endif_linenum = 0
883 for linenum, line in enumerate(lines):
884 linesplit = line.split()
885 if len(linesplit) >= 2:
886 # find the first occurrence of #ifndef and #define, save arg
887 if not ifndef and linesplit[0] == '#ifndef':
888 # set ifndef to the header guard presented on the #ifndef line.
889 ifndef = linesplit[1]
890 ifndef_linenum = linenum
891 if not define and linesplit[0] == '#define':
892 define = linesplit[1]
893 # find the last occurrence of #endif, save entire line
894 if line.startswith('#endif'):
895 endif = line
896 endif_linenum = linenum
897
898 if not ifndef or not define or ifndef != define:
899 error(filename, 0, 'build/header_guard', 5,
900 'No #ifndef header guard found, suggested CPP variable is: %s' %
901 cppvar)
902 return
903
904 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
905 # for backward compatibility.
906 if ifndef != cppvar:
907 error_level = 0
908 if ifndef != cppvar + '_':
909 error_level = 5
910
911 error(filename, ifndef_linenum, 'build/header_guard', error_level,
912 '#ifndef header guard has wrong style, please use: %s' % cppvar)
913
914 if endif != ('#endif // %s' % cppvar):
915 error_level = 0
916 if endif != ('#endif // %s' % (cppvar + '_')):
917 error_level = 5
918
919 error(filename, endif_linenum, 'build/header_guard', error_level,
920 '#endif line should be "#endif // %s"' % cppvar)
921
922
923def CheckForUnicodeReplacementCharacters(filename, lines, error):
924 """Logs an error for each line containing Unicode replacement characters.
925
926 These indicate that either the file contained invalid UTF-8 (likely)
927 or Unicode replacement characters (which it shouldn't). Note that
928 it's possible for this to throw off line numbering if the invalid
929 UTF-8 occurred adjacent to a newline.
930
931 Args:
932 filename: The name of the current file.
933 lines: An array of strings, each representing a line of the file.
934 error: The function to call with any errors found.
935 """
936 for linenum, line in enumerate(lines):
937 if u'\ufffd' in line:
938 error(filename, linenum, 'readability/utf8', 5,
939 'Line contains invalid UTF-8 (or Unicode replacement character).')
940
941
942def CheckForNewlineAtEOF(filename, lines, error):
943 """Logs an error if there is no newline char at the end of the file.
944
945 Args:
946 filename: The name of the current file.
947 lines: An array of strings, each representing a line of the file.
948 error: The function to call with any errors found.
949 """
950
951 # The array lines() was created by adding two newlines to the
952 # original file (go figure), then splitting on \n.
953 # To verify that the file ends in \n, we just have to make sure the
954 # last-but-two element of lines() exists and is empty.
955 if len(lines) < 3 or lines[-2]:
956 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
957 'Could not find a newline character at the end of the file.')
958
959
960def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
961 """Logs an error if we see /* ... */ or "..." that extend past one line.
962
963 /* ... */ comments are legit inside macros, for one line.
964 Otherwise, we prefer // comments, so it's ok to warn about the
965 other. Likewise, it's ok for strings to extend across multiple
966 lines, as long as a line continuation character (backslash)
967 terminates each line. Although not currently prohibited by the C++
968 style guide, it's ugly and unnecessary. We don't do well with either
969 in this lint program, so we warn about both.
970
971 Args:
972 filename: The name of the current file.
973 clean_lines: A CleansedLines instance containing the file.
974 linenum: The number of the line to check.
975 error: The function to call with any errors found.
976 """
977 line = clean_lines.elided[linenum]
978
979 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
980 # second (escaped) slash may trigger later \" detection erroneously.
981 line = line.replace('\\\\', '')
982
983 if line.count('/*') > line.count('*/'):
984 error(filename, linenum, 'readability/multiline_comment', 5,
985 'Complex multi-line /*...*/-style comment found. '
986 'Lint may give bogus warnings. '
987 'Consider replacing these with //-style comments, '
988 'with #if 0...#endif, '
989 'or with more clearly structured multi-line comments.')
990
991 if (line.count('"') - line.count('\\"')) % 2:
992 error(filename, linenum, 'readability/multiline_string', 5,
993 'Multi-line string ("...") found. This lint script doesn\'t '
994 'do well with such strings, and may give bogus warnings. They\'re '
995 'ugly and unnecessary, and you should use concatenation instead".')
996
997
998threading_list = (
999 ('asctime(', 'asctime_r('),
1000 ('ctime(', 'ctime_r('),
1001 ('getgrgid(', 'getgrgid_r('),
1002 ('getgrnam(', 'getgrnam_r('),
1003 ('getlogin(', 'getlogin_r('),
1004 ('getpwnam(', 'getpwnam_r('),
1005 ('getpwuid(', 'getpwuid_r('),
1006 ('gmtime(', 'gmtime_r('),
1007 ('localtime(', 'localtime_r('),
1008 ('rand(', 'rand_r('),
1009 ('readdir(', 'readdir_r('),
1010 ('strtok(', 'strtok_r('),
1011 ('ttyname(', 'ttyname_r('),
1012 )
1013
1014
1015def CheckPosixThreading(filename, clean_lines, linenum, error):
1016 """Checks for calls to thread-unsafe functions.
1017
1018 Much code has been originally written without consideration of
1019 multi-threading. Also, engineers are relying on their old experience;
1020 they have learned posix before threading extensions were added. These
1021 tests guide the engineers to use thread-safe functions (when using
1022 posix directly).
1023
1024 Args:
1025 filename: The name of the current file.
1026 clean_lines: A CleansedLines instance containing the file.
1027 linenum: The number of the line to check.
1028 error: The function to call with any errors found.
1029 """
1030 line = clean_lines.elided[linenum]
1031 for single_thread_function, multithread_safe_function in threading_list:
1032 ix = line.find(single_thread_function)
erg@google.coma87abb82009-02-24 01:41:01 +00001033 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001034 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1035 line[ix - 1] not in ('_', '.', '>'))):
1036 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1037 'Consider using ' + multithread_safe_function +
1038 '...) instead of ' + single_thread_function +
1039 '...) for improved thread safety.')
1040
1041
erg@google.com36649102009-03-25 21:18:36 +00001042# Matches invalid increment: *count++, which moves pointer insead of
1043# incrementing a value.
1044_RE_PATTERN_IVALID_INCREMENT = re.compile(
1045 r'^\s*\*\w+(\+\+|--);')
1046
1047
1048def CheckInvalidIncrement(filename, clean_lines, linenum, error):
1049 """Checks for invalud increment *count++.
1050
1051 For example following function:
1052 void increment_counter(int* count) {
1053 *count++;
1054 }
1055 is invalid, because it effectively does count++, moving pointer, and should
1056 be replaced with ++*count, (*count)++ or *count += 1.
1057
1058 Args:
1059 filename: The name of the current file.
1060 clean_lines: A CleansedLines instance containing the file.
1061 linenum: The number of the line to check.
1062 error: The function to call with any errors found.
1063 """
1064 line = clean_lines.elided[linenum]
1065 if _RE_PATTERN_IVALID_INCREMENT.match(line):
1066 error(filename, linenum, 'runtime/invalid_increment', 5,
1067 'Changing pointer instead of value (or unused value of operator*).')
1068
1069
erg@google.com4e00b9a2009-01-12 23:05:11 +00001070class _ClassInfo(object):
1071 """Stores information about a class."""
1072
1073 def __init__(self, name, linenum):
1074 self.name = name
1075 self.linenum = linenum
1076 self.seen_open_brace = False
1077 self.is_derived = False
1078 self.virtual_method_linenumber = None
1079 self.has_virtual_destructor = False
1080 self.brace_depth = 0
1081
1082
1083class _ClassState(object):
1084 """Holds the current state of the parse relating to class declarations.
1085
1086 It maintains a stack of _ClassInfos representing the parser's guess
1087 as to the current nesting of class declarations. The innermost class
1088 is at the top (back) of the stack. Typically, the stack will either
1089 be empty or have exactly one entry.
1090 """
1091
1092 def __init__(self):
1093 self.classinfo_stack = []
1094
1095 def CheckFinished(self, filename, error):
1096 """Checks that all classes have been completely parsed.
1097
1098 Call this when all lines in a file have been processed.
1099 Args:
1100 filename: The name of the current file.
1101 error: The function to call with any errors found.
1102 """
1103 if self.classinfo_stack:
1104 # Note: This test can result in false positives if #ifdef constructs
1105 # get in the way of brace matching. See the testBuildClass test in
1106 # cpplint_unittest.py for an example of this.
1107 error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
1108 'Failed to find complete declaration of class %s' %
1109 self.classinfo_stack[0].name)
1110
1111
1112def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1113 class_state, error):
1114 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1115
1116 Complain about several constructs which gcc-2 accepts, but which are
1117 not standard C++. Warning about these in lint is one way to ease the
1118 transition to new compilers.
1119 - put storage class first (e.g. "static const" instead of "const static").
1120 - "%lld" instead of %qd" in printf-type functions.
1121 - "%1$d" is non-standard in printf-type functions.
1122 - "\%" is an undefined character escape sequence.
1123 - text after #endif is not allowed.
1124 - invalid inner-style forward declaration.
1125 - >? and <? operators, and their >?= and <?= cousins.
1126 - classes with virtual methods need virtual destructors (compiler warning
1127 available, but not turned on yet.)
1128
1129 Additionally, check for constructor/destructor style violations as it
1130 is very convenient to do so while checking for gcc-2 compliance.
1131
1132 Args:
1133 filename: The name of the current file.
1134 clean_lines: A CleansedLines instance containing the file.
1135 linenum: The number of the line to check.
1136 class_state: A _ClassState instance which maintains information about
1137 the current stack of nested class declarations being parsed.
1138 error: A callable to which errors are reported, which takes 4 arguments:
1139 filename, line number, error level, and message
1140 """
1141
1142 # Remove comments from the line, but leave in strings for now.
1143 line = clean_lines.lines[linenum]
1144
1145 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1146 error(filename, linenum, 'runtime/printf_format', 3,
1147 '%q in format strings is deprecated. Use %ll instead.')
1148
1149 if Search(r'printf\s*\(.*".*%\d+\$', line):
1150 error(filename, linenum, 'runtime/printf_format', 2,
1151 '%N$ formats are unconventional. Try rewriting to avoid them.')
1152
1153 # Remove escaped backslashes before looking for undefined escapes.
1154 line = line.replace('\\\\', '')
1155
1156 if Search(r'("|\').*\\(%|\[|\(|{)', line):
1157 error(filename, linenum, 'build/printf_format', 3,
1158 '%, [, (, and { are undefined character escapes. Unescape them.')
1159
1160 # For the rest, work with both comments and strings removed.
1161 line = clean_lines.elided[linenum]
1162
1163 if Search(r'\b(const|volatile|void|char|short|int|long'
1164 r'|float|double|signed|unsigned'
1165 r'|schar|u?int8|u?int16|u?int32|u?int64)'
1166 r'\s+(auto|register|static|extern|typedef)\b',
1167 line):
1168 error(filename, linenum, 'build/storage_class', 5,
1169 'Storage class (static, extern, typedef, etc) should be first.')
1170
1171 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1172 error(filename, linenum, 'build/endif_comment', 5,
1173 'Uncommented text after #endif is non-standard. Use a comment.')
1174
1175 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1176 error(filename, linenum, 'build/forward_decl', 5,
1177 'Inner-style forward declarations are invalid. Remove this line.')
1178
1179 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1180 line):
1181 error(filename, linenum, 'build/deprecated', 3,
1182 '>? and <? (max and min) operators are non-standard and deprecated.')
1183
1184 # Track class entry and exit, and attempt to find cases within the
1185 # class declaration that don't meet the C++ style
1186 # guidelines. Tracking is very dependent on the code matching Google
1187 # style guidelines, but it seems to perform well enough in testing
1188 # to be a worthwhile addition to the checks.
1189 classinfo_stack = class_state.classinfo_stack
1190 # Look for a class declaration
1191 class_decl_match = Match(
1192 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1193 if class_decl_match:
1194 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
1195
1196 # Everything else in this function uses the top of the stack if it's
1197 # not empty.
1198 if not classinfo_stack:
1199 return
1200
1201 classinfo = classinfo_stack[-1]
1202
1203 # If the opening brace hasn't been seen look for it and also
1204 # parent class declarations.
1205 if not classinfo.seen_open_brace:
1206 # If the line has a ';' in it, assume it's a forward declaration or
1207 # a single-line class declaration, which we won't process.
1208 if line.find(';') != -1:
1209 classinfo_stack.pop()
1210 return
1211 classinfo.seen_open_brace = (line.find('{') != -1)
1212 # Look for a bare ':'
1213 if Search('(^|[^:]):($|[^:])', line):
1214 classinfo.is_derived = True
1215 if not classinfo.seen_open_brace:
1216 return # Everything else in this function is for after open brace
1217
1218 # The class may have been declared with namespace or classname qualifiers.
1219 # The constructor and destructor will not have those qualifiers.
1220 base_classname = classinfo.name.split('::')[-1]
1221
1222 # Look for single-argument constructors that aren't marked explicit.
1223 # Technically a valid construct, but against style.
1224 args = Match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1225 % re.escape(base_classname),
1226 line)
1227 if (args and
1228 args.group(1) != 'void' and
1229 not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1230 args.group(1).strip())):
1231 error(filename, linenum, 'runtime/explicit', 5,
1232 'Single-argument constructors should be marked explicit.')
1233
1234 # Look for methods declared virtual.
1235 if Search(r'\bvirtual\b', line):
1236 classinfo.virtual_method_linenumber = linenum
1237 # Only look for a destructor declaration on the same line. It would
1238 # be extremely unlikely for the destructor declaration to occupy
1239 # more than one line.
1240 if Search(r'~%s\s*\(' % base_classname, line):
1241 classinfo.has_virtual_destructor = True
1242
1243 # Look for class end.
1244 brace_depth = classinfo.brace_depth
1245 brace_depth = brace_depth + line.count('{') - line.count('}')
1246 if brace_depth <= 0:
1247 classinfo = classinfo_stack.pop()
1248 # Try to detect missing virtual destructor declarations.
1249 # For now, only warn if a non-derived class with virtual methods lacks
1250 # a virtual destructor. This is to make it less likely that people will
1251 # declare derived virtual destructors without declaring the base
1252 # destructor virtual.
1253 if ((classinfo.virtual_method_linenumber is not None) and
1254 (not classinfo.has_virtual_destructor) and
1255 (not classinfo.is_derived)): # Only warn for base classes
1256 error(filename, classinfo.linenum, 'runtime/virtual', 4,
1257 'The class %s probably needs a virtual destructor due to '
1258 'having virtual method(s), one declared at line %d.'
1259 % (classinfo.name, classinfo.virtual_method_linenumber))
1260 else:
1261 classinfo.brace_depth = brace_depth
1262
1263
1264def CheckSpacingForFunctionCall(filename, line, linenum, error):
1265 """Checks for the correctness of various spacing around function calls.
1266
1267 Args:
1268 filename: The name of the current file.
1269 line: The text of the line to check.
1270 linenum: The number of the line to check.
1271 error: The function to call with any errors found.
1272 """
1273
1274 # Since function calls often occur inside if/for/while/switch
1275 # expressions - which have their own, more liberal conventions - we
1276 # first see if we should be looking inside such an expression for a
1277 # function call, to which we can apply more strict standards.
1278 fncall = line # if there's no control flow construct, look at whole line
1279 for pattern in (r'\bif\s*\((.*)\)\s*{',
1280 r'\bfor\s*\((.*)\)\s*{',
1281 r'\bwhile\s*\((.*)\)\s*[{;]',
1282 r'\bswitch\s*\((.*)\)\s*{'):
1283 match = Search(pattern, line)
1284 if match:
1285 fncall = match.group(1) # look inside the parens for function calls
1286 break
1287
1288 # Except in if/for/while/switch, there should never be space
1289 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
1290 # for nested parens ( (a+b) + c ). Likewise, there should never be
1291 # a space before a ( when it's a function argument. I assume it's a
1292 # function argument when the char before the whitespace is legal in
1293 # a function name (alnum + _) and we're not starting a macro. Also ignore
1294 # pointers and references to arrays and functions coz they're too tricky:
1295 # we use a very simple way to recognize these:
1296 # " (something)(maybe-something)" or
1297 # " (something)(maybe-something," or
1298 # " (something)[something]"
1299 # Note that we assume the contents of [] to be short enough that
1300 # they'll never need to wrap.
1301 if ( # Ignore control structures.
1302 not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1303 # Ignore pointers/references to functions.
1304 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1305 # Ignore pointers/references to arrays.
1306 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com36649102009-03-25 21:18:36 +00001307 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
erg@google.com4e00b9a2009-01-12 23:05:11 +00001308 error(filename, linenum, 'whitespace/parens', 4,
1309 'Extra space after ( in function call')
erg@google.com36649102009-03-25 21:18:36 +00001310 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001311 error(filename, linenum, 'whitespace/parens', 2,
1312 'Extra space after (')
1313 if (Search(r'\w\s+\(', fncall) and
1314 not Search(r'#\s*define|typedef', fncall)):
1315 error(filename, linenum, 'whitespace/parens', 4,
1316 'Extra space before ( in function call')
1317 # If the ) is followed only by a newline or a { + newline, assume it's
1318 # part of a control statement (if/while/etc), and don't complain
1319 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1320 error(filename, linenum, 'whitespace/parens', 2,
1321 'Extra space before )')
1322
1323
1324def IsBlankLine(line):
1325 """Returns true if the given line is blank.
1326
1327 We consider a line to be blank if the line is empty or consists of
1328 only white spaces.
1329
1330 Args:
1331 line: A line of a string.
1332
1333 Returns:
1334 True, if the given line is blank.
1335 """
1336 return not line or line.isspace()
1337
1338
1339def CheckForFunctionLengths(filename, clean_lines, linenum,
1340 function_state, error):
1341 """Reports for long function bodies.
1342
1343 For an overview why this is done, see:
1344 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1345
1346 Uses a simplistic algorithm assuming other style guidelines
1347 (especially spacing) are followed.
1348 Only checks unindented functions, so class members are unchecked.
1349 Trivial bodies are unchecked, so constructors with huge initializer lists
1350 may be missed.
1351 Blank/comment lines are not counted so as to avoid encouraging the removal
1352 of vertical space and commments just to get through a lint check.
1353 NOLINT *on the last line of a function* disables this check.
1354
1355 Args:
1356 filename: The name of the current file.
1357 clean_lines: A CleansedLines instance containing the file.
1358 linenum: The number of the line to check.
1359 function_state: Current function name and lines in body so far.
1360 error: The function to call with any errors found.
1361 """
1362 lines = clean_lines.lines
1363 line = lines[linenum]
1364 raw = clean_lines.raw_lines
1365 raw_line = raw[linenum]
1366 joined_line = ''
1367
1368 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00001369 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00001370 match_result = Match(regexp, line)
1371 if match_result:
1372 # If the name is all caps and underscores, figure it's a macro and
1373 # ignore it, unless it's TEST or TEST_F.
1374 function_name = match_result.group(1).split()[-1]
1375 if function_name == 'TEST' or function_name == 'TEST_F' or (
1376 not Match(r'[A-Z_]+$', function_name)):
1377 starting_func = True
1378
1379 if starting_func:
1380 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00001381 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001382 start_line = lines[start_linenum]
1383 joined_line += ' ' + start_line.lstrip()
1384 if Search(r'(;|})', start_line): # Declarations and trivial functions
1385 body_found = True
1386 break # ... ignore
1387 elif Search(r'{', start_line):
1388 body_found = True
1389 function = Search(r'((\w|:)*)\(', line).group(1)
1390 if Match(r'TEST', function): # Handle TEST... macros
1391 parameter_regexp = Search(r'(\(.*\))', joined_line)
1392 if parameter_regexp: # Ignore bad syntax
1393 function += parameter_regexp.group(1)
1394 else:
1395 function += '()'
1396 function_state.Begin(function)
1397 break
1398 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00001399 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001400 error(filename, linenum, 'readability/fn_size', 5,
1401 'Lint failed to find start of function body.')
1402 elif Match(r'^\}\s*$', line): # function end
1403 if not Search(r'\bNOLINT\b', raw_line):
1404 function_state.Check(error, filename, linenum)
1405 function_state.End()
1406 elif not Match(r'^\s*$', line):
1407 function_state.Count() # Count non-blank/non-comment lines.
1408
1409
1410_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1411
1412
1413def CheckComment(comment, filename, linenum, error):
1414 """Checks for common mistakes in TODO comments.
1415
1416 Args:
1417 comment: The text of the comment from the line in question.
1418 filename: The name of the current file.
1419 linenum: The number of the line to check.
1420 error: The function to call with any errors found.
1421 """
1422 match = _RE_PATTERN_TODO.match(comment)
1423 if match:
1424 # One whitespace is correct; zero whitespace is handled elsewhere.
1425 leading_whitespace = match.group(1)
1426 if len(leading_whitespace) > 1:
1427 error(filename, linenum, 'whitespace/todo', 2,
1428 'Too many spaces before TODO')
1429
1430 username = match.group(2)
1431 if not username:
1432 error(filename, linenum, 'readability/todo', 2,
1433 'Missing username in TODO; it should look like '
1434 '"// TODO(my_username): Stuff."')
1435
1436 middle_whitespace = match.group(3)
erg@google.coma87abb82009-02-24 01:41:01 +00001437 # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001438 if middle_whitespace != ' ' and middle_whitespace != '':
1439 error(filename, linenum, 'whitespace/todo', 2,
1440 'TODO(my_username) should be followed by a space')
1441
1442
1443def CheckSpacing(filename, clean_lines, linenum, error):
1444 """Checks for the correctness of various spacing issues in the code.
1445
1446 Things we check for: spaces around operators, spaces after
1447 if/for/while/switch, no spaces around parens in function calls, two
1448 spaces between code and comment, don't start a block with a blank
1449 line, don't end a function with a blank line, don't have too many
1450 blank lines in a row.
1451
1452 Args:
1453 filename: The name of the current file.
1454 clean_lines: A CleansedLines instance containing the file.
1455 linenum: The number of the line to check.
1456 error: The function to call with any errors found.
1457 """
1458
1459 raw = clean_lines.raw_lines
1460 line = raw[linenum]
1461
1462 # Before nixing comments, check if the line is blank for no good
1463 # reason. This includes the first line after a block is opened, and
1464 # blank lines at the end of a function (ie, right before a line like '}'
1465 if IsBlankLine(line):
1466 elided = clean_lines.elided
1467 prev_line = elided[linenum - 1]
1468 prevbrace = prev_line.rfind('{')
1469 # TODO(unknown): Don't complain if line before blank line, and line after,
1470 # both start with alnums and are indented the same amount.
1471 # This ignores whitespace at the start of a namespace block
1472 # because those are not usually indented.
1473 if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
1474 and prev_line[:prevbrace].find('namespace') == -1):
1475 # OK, we have a blank line at the start of a code block. Before we
1476 # complain, we check if it is an exception to the rule: The previous
1477 # non-empty line has the paramters of a function header that are indented
1478 # 4 spaces (because they did not fit in a 80 column line when placed on
1479 # the same line as the function name). We also check for the case where
1480 # the previous line is indented 6 spaces, which may happen when the
1481 # initializers of a constructor do not fit into a 80 column line.
1482 exception = False
1483 if Match(r' {6}\w', prev_line): # Initializer list?
1484 # We are looking for the opening column of initializer list, which
1485 # should be indented 4 spaces to cause 6 space indentation afterwards.
1486 search_position = linenum-2
1487 while (search_position >= 0
1488 and Match(r' {6}\w', elided[search_position])):
1489 search_position -= 1
1490 exception = (search_position >= 0
1491 and elided[search_position][:5] == ' :')
1492 else:
1493 # Search for the function arguments or an initializer list. We use a
1494 # simple heuristic here: If the line is indented 4 spaces; and we have a
1495 # closing paren, without the opening paren, followed by an opening brace
1496 # or colon (for initializer lists) we assume that it is the last line of
1497 # a function header. If we have a colon indented 4 spaces, it is an
1498 # initializer list.
1499 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1500 prev_line)
1501 or Match(r' {4}:', prev_line))
1502
1503 if not exception:
1504 error(filename, linenum, 'whitespace/blank_line', 2,
1505 'Blank line at the start of a code block. Is this needed?')
1506 # This doesn't ignore whitespace at the end of a namespace block
1507 # because that is too hard without pairing open/close braces;
1508 # however, a special exception is made for namespace closing
1509 # brackets which have a comment containing "namespace".
1510 #
1511 # Also, ignore blank lines at the end of a block in a long if-else
1512 # chain, like this:
1513 # if (condition1) {
1514 # // Something followed by a blank line
1515 #
1516 # } else if (condition2) {
1517 # // Something else
1518 # }
1519 if linenum + 1 < clean_lines.NumLines():
1520 next_line = raw[linenum + 1]
1521 if (next_line
1522 and Match(r'\s*}', next_line)
1523 and next_line.find('namespace') == -1
1524 and next_line.find('} else ') == -1):
1525 error(filename, linenum, 'whitespace/blank_line', 3,
1526 'Blank line at the end of a code block. Is this needed?')
1527
1528 # Next, we complain if there's a comment too near the text
1529 commentpos = line.find('//')
1530 if commentpos != -1:
1531 # Check if the // may be in quotes. If so, ignore it
erg@google.coma87abb82009-02-24 01:41:01 +00001532 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001533 if (line.count('"', 0, commentpos) -
1534 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
1535 # Allow one space for new scopes, two spaces otherwise:
1536 if (not Match(r'^\s*{ //', line) and
1537 ((commentpos >= 1 and
1538 line[commentpos-1] not in string.whitespace) or
1539 (commentpos >= 2 and
1540 line[commentpos-2] not in string.whitespace))):
1541 error(filename, linenum, 'whitespace/comments', 2,
1542 'At least two spaces is best between code and comments')
1543 # There should always be a space between the // and the comment
1544 commentend = commentpos + 2
1545 if commentend < len(line) and not line[commentend] == ' ':
1546 # but some lines are exceptions -- e.g. if they're big
1547 # comment delimiters like:
1548 # //----------------------------------------------------------
1549 match = Search(r'[=/-]{4,}\s*$', line[commentend:])
1550 if not match:
1551 error(filename, linenum, 'whitespace/comments', 4,
1552 'Should have a space between // and comment')
1553 CheckComment(line[commentpos:], filename, linenum, error)
1554
1555 line = clean_lines.elided[linenum] # get rid of comments and strings
1556
1557 # Don't try to do spacing checks for operator methods
1558 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1559
1560 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1561 # Otherwise not. Note we only check for non-spaces on *both* sides;
1562 # sometimes people put non-spaces on one side when aligning ='s among
1563 # many lines (not that this is behavior that I approve of...)
1564 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
1565 error(filename, linenum, 'whitespace/operators', 4,
1566 'Missing spaces around =')
1567
1568 # It's ok not to have spaces around binary operators like + - * /, but if
1569 # there's too little whitespace, we get concerned. It's hard to tell,
1570 # though, so we punt on this one for now. TODO.
1571
1572 # You should always have whitespace around binary operators.
1573 # Alas, we can't test < or > because they're legitimately used sans spaces
1574 # (a->b, vector<int> a). The only time we can tell is a < with no >, and
1575 # only if it's not template params list spilling into the next line.
1576 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1577 if not match:
1578 # Note that while it seems that the '<[^<]*' term in the following
1579 # regexp could be simplified to '<.*', which would indeed match
1580 # the same class of strings, the [^<] means that searching for the
1581 # regexp takes linear rather than quadratic time.
1582 if not Search(r'<[^<]*,\s*$', line): # template params spill
1583 match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1584 if match:
1585 error(filename, linenum, 'whitespace/operators', 3,
1586 'Missing spaces around %s' % match.group(1))
1587 # We allow no-spaces around << and >> when used like this: 10<<20, but
1588 # not otherwise (particularly, not when used as streams)
1589 match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1590 if match:
1591 error(filename, linenum, 'whitespace/operators', 3,
1592 'Missing spaces around %s' % match.group(1))
1593
1594 # There shouldn't be space around unary operators
1595 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1596 if match:
1597 error(filename, linenum, 'whitespace/operators', 4,
1598 'Extra space for operator %s' % match.group(1))
1599
1600 # A pet peeve of mine: no spaces after an if, while, switch, or for
1601 match = Search(r' (if\(|for\(|while\(|switch\()', line)
1602 if match:
1603 error(filename, linenum, 'whitespace/parens', 5,
1604 'Missing space before ( in %s' % match.group(1))
1605
1606 # For if/for/while/switch, the left and right parens should be
1607 # consistent about how many spaces are inside the parens, and
1608 # there should either be zero or one spaces inside the parens.
1609 # We don't want: "if ( foo)" or "if ( foo )".
1610 # Exception: "for ( ; foo; bar)" is allowed.
1611 match = Search(r'\b(if|for|while|switch)\s*'
1612 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1613 line)
1614 if match:
1615 if len(match.group(2)) != len(match.group(4)):
1616 if not (match.group(3) == ';' and
1617 len(match.group(2)) == 1 + len(match.group(4))):
1618 error(filename, linenum, 'whitespace/parens', 5,
1619 'Mismatching spaces inside () in %s' % match.group(1))
1620 if not len(match.group(2)) in [0, 1]:
1621 error(filename, linenum, 'whitespace/parens', 5,
1622 'Should have zero or one spaces inside ( and ) in %s' %
1623 match.group(1))
1624
1625 # You should always have a space after a comma (either as fn arg or operator)
1626 if Search(r',[^\s]', line):
1627 error(filename, linenum, 'whitespace/comma', 3,
1628 'Missing space after ,')
1629
1630 # Next we will look for issues with function calls.
1631 CheckSpacingForFunctionCall(filename, line, linenum, error)
1632
1633 # Except after an opening paren, you should have spaces before your braces.
1634 # And since you should never have braces at the beginning of a line, this is
1635 # an easy test.
1636 if Search(r'[^ (]{', line):
1637 error(filename, linenum, 'whitespace/braces', 5,
1638 'Missing space before {')
1639
1640 # Make sure '} else {' has spaces.
1641 if Search(r'}else', line):
1642 error(filename, linenum, 'whitespace/braces', 5,
1643 'Missing space before else')
1644
1645 # You shouldn't have spaces before your brackets, except maybe after
1646 # 'delete []' or 'new char * []'.
1647 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
1648 error(filename, linenum, 'whitespace/braces', 5,
1649 'Extra space before [')
1650
1651 # You shouldn't have a space before a semicolon at the end of the line.
1652 # There's a special case for "for" since the style guide allows space before
1653 # the semicolon there.
1654 if Search(r':\s*;\s*$', line):
1655 error(filename, linenum, 'whitespace/semicolon', 5,
1656 'Semicolon defining empty statement. Use { } instead.')
1657 elif Search(r'^\s*;\s*$', line):
1658 error(filename, linenum, 'whitespace/semicolon', 5,
1659 'Line contains only semicolon. If this should be an empty statement, '
1660 'use { } instead.')
1661 elif (Search(r'\s+;\s*$', line) and
1662 not Search(r'\bfor\b', line)):
1663 error(filename, linenum, 'whitespace/semicolon', 5,
1664 'Extra space before last semicolon. If this should be an empty '
1665 'statement, use { } instead.')
1666
1667
1668def GetPreviousNonBlankLine(clean_lines, linenum):
1669 """Return the most recent non-blank line and its line number.
1670
1671 Args:
1672 clean_lines: A CleansedLines instance containing the file contents.
1673 linenum: The number of the line to check.
1674
1675 Returns:
1676 A tuple with two elements. The first element is the contents of the last
1677 non-blank line before the current line, or the empty string if this is the
1678 first non-blank line. The second is the line number of that line, or -1
1679 if this is the first non-blank line.
1680 """
1681
1682 prevlinenum = linenum - 1
1683 while prevlinenum >= 0:
1684 prevline = clean_lines.elided[prevlinenum]
1685 if not IsBlankLine(prevline): # if not a blank line...
1686 return (prevline, prevlinenum)
1687 prevlinenum -= 1
1688 return ('', -1)
1689
1690
1691def CheckBraces(filename, clean_lines, linenum, error):
1692 """Looks for misplaced braces (e.g. at the end of line).
1693
1694 Args:
1695 filename: The name of the current file.
1696 clean_lines: A CleansedLines instance containing the file.
1697 linenum: The number of the line to check.
1698 error: The function to call with any errors found.
1699 """
1700
1701 line = clean_lines.elided[linenum] # get rid of comments and strings
1702
1703 if Match(r'\s*{\s*$', line):
1704 # We allow an open brace to start a line in the case where someone
1705 # is using braces in a block to explicitly create a new scope,
1706 # which is commonly used to control the lifetime of
1707 # stack-allocated variables. We don't detect this perfectly: we
1708 # just don't complain if the last non-whitespace character on the
1709 # previous non-blank line is ';', ':', '{', or '}'.
1710 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1711 if not Search(r'[;:}{]\s*$', prevline):
1712 error(filename, linenum, 'whitespace/braces', 4,
1713 '{ should almost always be at the end of the previous line')
1714
1715 # An else clause should be on the same line as the preceding closing brace.
1716 if Match(r'\s*else\s*', line):
1717 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1718 if Match(r'\s*}\s*$', prevline):
1719 error(filename, linenum, 'whitespace/newline', 4,
1720 'An else should appear on the same line as the preceding }')
1721
1722 # If braces come on one side of an else, they should be on both.
1723 # However, we have to worry about "else if" that spans multiple lines!
1724 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
1725 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
1726 # find the ( after the if
1727 pos = line.find('else if')
1728 pos = line.find('(', pos)
1729 if pos > 0:
1730 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
1731 if endline[endpos:].find('{') == -1: # must be brace after if
1732 error(filename, linenum, 'readability/braces', 5,
1733 'If an else has a brace on one side, it should have it on both')
1734 else: # common case: else not followed by a multi-line if
1735 error(filename, linenum, 'readability/braces', 5,
1736 'If an else has a brace on one side, it should have it on both')
1737
1738 # Likewise, an else should never have the else clause on the same line
1739 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
1740 error(filename, linenum, 'whitespace/newline', 4,
1741 'Else clause should never be on same line as else (use 2 lines)')
1742
1743 # In the same way, a do/while should never be on one line
1744 if Match(r'\s*do [^\s{]', line):
1745 error(filename, linenum, 'whitespace/newline', 4,
1746 'do/while clauses should not be on a single line')
1747
1748 # Braces shouldn't be followed by a ; unless they're defining a struct
1749 # or initializing an array.
1750 # We can't tell in general, but we can for some common cases.
1751 prevlinenum = linenum
1752 while True:
1753 (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
1754 if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
1755 line = prevline + line
1756 else:
1757 break
1758 if (Search(r'{.*}\s*;', line) and
1759 line.count('{') == line.count('}') and
1760 not Search(r'struct|class|enum|\s*=\s*{', line)):
1761 error(filename, linenum, 'readability/braces', 4,
1762 "You don't need a ; after a }")
1763
1764
1765def ReplaceableCheck(operator, macro, line):
1766 """Determine whether a basic CHECK can be replaced with a more specific one.
1767
1768 For example suggest using CHECK_EQ instead of CHECK(a == b) and
1769 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1770
1771 Args:
1772 operator: The C++ operator used in the CHECK.
1773 macro: The CHECK or EXPECT macro being called.
1774 line: The current source line.
1775
1776 Returns:
1777 True if the CHECK can be replaced with a more specific one.
1778 """
1779
1780 # This matches decimal and hex integers, strings, and chars (in that order).
1781 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
1782
1783 # Expression to match two sides of the operator with something that
1784 # looks like a literal, since CHECK(x == iterator) won't compile.
1785 # This means we can't catch all the cases where a more specific
1786 # CHECK is possible, but it's less annoying than dealing with
1787 # extraneous warnings.
1788 match_this = (r'\s*' + macro + r'\((\s*' +
1789 match_constant + r'\s*' + operator + r'[^<>].*|'
1790 r'.*[^<>]' + operator + r'\s*' + match_constant +
1791 r'\s*\))')
1792
1793 # Don't complain about CHECK(x == NULL) or similar because
1794 # CHECK_EQ(x, NULL) won't compile (requires a cast).
1795 # Also, don't complain about more complex boolean expressions
1796 # involving && or || such as CHECK(a == b || c == d).
1797 return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
1798
1799
1800def CheckCheck(filename, clean_lines, linenum, error):
1801 """Checks the use of CHECK and EXPECT macros.
1802
1803 Args:
1804 filename: The name of the current file.
1805 clean_lines: A CleansedLines instance containing the file.
1806 linenum: The number of the line to check.
1807 error: The function to call with any errors found.
1808 """
1809
1810 # Decide the set of replacement macros that should be suggested
1811 raw_lines = clean_lines.raw_lines
1812 current_macro = ''
1813 for macro in _CHECK_MACROS:
1814 if raw_lines[linenum].find(macro) >= 0:
1815 current_macro = macro
1816 break
1817 if not current_macro:
1818 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
1819 return
1820
1821 line = clean_lines.elided[linenum] # get rid of comments and strings
1822
1823 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
1824 for operator in ['==', '!=', '>=', '>', '<=', '<']:
1825 if ReplaceableCheck(operator, current_macro, line):
1826 error(filename, linenum, 'readability/check', 2,
1827 'Consider using %s instead of %s(a %s b)' % (
1828 _CHECK_REPLACEMENT[current_macro][operator],
1829 current_macro, operator))
1830 break
1831
1832
1833def GetLineWidth(line):
1834 """Determines the width of the line in column positions.
1835
1836 Args:
1837 line: A string, which may be a Unicode string.
1838
1839 Returns:
1840 The width of the line in column positions, accounting for Unicode
1841 combining characters and wide characters.
1842 """
1843 if isinstance(line, unicode):
1844 width = 0
1845 for c in unicodedata.normalize('NFC', line):
1846 if unicodedata.east_asian_width(c) in ('W', 'F'):
1847 width += 2
1848 elif not unicodedata.combining(c):
1849 width += 1
1850 return width
1851 else:
1852 return len(line)
1853
1854
1855def CheckStyle(filename, clean_lines, linenum, file_extension, error):
1856 """Checks rules from the 'C++ style rules' section of cppguide.html.
1857
1858 Most of these rules are hard to test (naming, comment style), but we
1859 do what we can. In particular we check for 2-space indents, line lengths,
1860 tab usage, spaces inside code, etc.
1861
1862 Args:
1863 filename: The name of the current file.
1864 clean_lines: A CleansedLines instance containing the file.
1865 linenum: The number of the line to check.
1866 file_extension: The extension (without the dot) of the filename.
1867 error: The function to call with any errors found.
1868 """
1869
1870 raw_lines = clean_lines.raw_lines
1871 line = raw_lines[linenum]
1872
1873 if line.find('\t') != -1:
1874 error(filename, linenum, 'whitespace/tab', 1,
1875 'Tab found; better to use spaces')
1876
1877 # One or three blank spaces at the beginning of the line is weird; it's
1878 # hard to reconcile that with 2-space indents.
1879 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
1880 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
1881 # if(RLENGTH > 20) complain = 0;
1882 # if(match($0, " +(error|private|public|protected):")) complain = 0;
1883 # if(match(prev, "&& *$")) complain = 0;
1884 # if(match(prev, "\\|\\| *$")) complain = 0;
1885 # if(match(prev, "[\",=><] *$")) complain = 0;
1886 # if(match($0, " <<")) complain = 0;
1887 # if(match(prev, " +for \\(")) complain = 0;
1888 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
1889 initial_spaces = 0
1890 cleansed_line = clean_lines.elided[linenum]
1891 while initial_spaces < len(line) and line[initial_spaces] == ' ':
1892 initial_spaces += 1
1893 if line and line[-1].isspace():
1894 error(filename, linenum, 'whitespace/end_of_line', 4,
1895 'Line ends in whitespace. Consider deleting these extra spaces.')
1896 # There are certain situations we allow one space, notably for labels
1897 elif ((initial_spaces == 1 or initial_spaces == 3) and
1898 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
1899 error(filename, linenum, 'whitespace/indent', 3,
1900 'Weird number of spaces at line-start. '
1901 'Are you using a 2-space indent?')
1902 # Labels should always be indented at least one space.
1903 elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
1904 line):
1905 error(filename, linenum, 'whitespace/labels', 4,
1906 'Labels should always be indented at least one space. '
1907 'If this is a member-initializer list in a constructor, '
1908 'the colon should be on the line after the definition header.')
1909
1910 # Check if the line is a header guard.
1911 is_header_guard = False
1912 if file_extension == 'h':
1913 cppvar = GetHeaderGuardCPPVariable(filename)
1914 if (line.startswith('#ifndef %s' % cppvar) or
1915 line.startswith('#define %s' % cppvar) or
1916 line.startswith('#endif // %s' % cppvar)):
1917 is_header_guard = True
1918 # #include lines and header guards can be long, since there's no clean way to
1919 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00001920 #
1921 # URLs can be long too. It's possible to split these, but it makes them
1922 # harder to cut&paste.
1923 if (not line.startswith('#include') and not is_header_guard and
erg@google.com36649102009-03-25 21:18:36 +00001924 not Match(r'^\s*//.*http(s?)://\S*$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001925 line_width = GetLineWidth(line)
1926 if line_width > 100:
1927 error(filename, linenum, 'whitespace/line_length', 4,
1928 'Lines should very rarely be longer than 100 characters')
1929 elif line_width > 80:
1930 error(filename, linenum, 'whitespace/line_length', 2,
1931 'Lines should be <= 80 characters long')
1932
1933 if (cleansed_line.count(';') > 1 and
1934 # for loops are allowed two ;'s (and may run over two lines).
1935 cleansed_line.find('for') == -1 and
1936 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
1937 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
1938 # It's ok to have many commands in a switch case that fits in 1 line
1939 not ((cleansed_line.find('case ') != -1 or
1940 cleansed_line.find('default:') != -1) and
1941 cleansed_line.find('break;') != -1)):
1942 error(filename, linenum, 'whitespace/newline', 4,
1943 'More than one command on the same line')
1944
1945 # Some more style checks
1946 CheckBraces(filename, clean_lines, linenum, error)
1947 CheckSpacing(filename, clean_lines, linenum, error)
1948 CheckCheck(filename, clean_lines, linenum, error)
1949
1950
1951_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
1952_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
1953# Matches the first component of a filename delimited by -s and _s. That is:
1954# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
1955# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
1956# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
1957# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
1958_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
1959
1960
1961def _DropCommonSuffixes(filename):
1962 """Drops common suffixes like _test.cc or -inl.h from filename.
1963
1964 For example:
1965 >>> _DropCommonSuffixes('foo/foo-inl.h')
1966 'foo/foo'
1967 >>> _DropCommonSuffixes('foo/bar/foo.cc')
1968 'foo/bar/foo'
1969 >>> _DropCommonSuffixes('foo/foo_internal.h')
1970 'foo/foo'
1971 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
1972 'foo/foo_unusualinternal'
1973
1974 Args:
1975 filename: The input filename.
1976
1977 Returns:
1978 The filename with the common suffix removed.
1979 """
1980 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
1981 'inl.h', 'impl.h', 'internal.h'):
1982 if (filename.endswith(suffix) and len(filename) > len(suffix) and
1983 filename[-len(suffix) - 1] in ('-', '_')):
1984 return filename[:-len(suffix) - 1]
1985 return os.path.splitext(filename)[0]
1986
1987
1988def _IsTestFilename(filename):
1989 """Determines if the given filename has a suffix that identifies it as a test.
1990
1991 Args:
1992 filename: The input filename.
1993
1994 Returns:
1995 True if 'filename' looks like a test, False otherwise.
1996 """
1997 if (filename.endswith('_test.cc') or
1998 filename.endswith('_unittest.cc') or
1999 filename.endswith('_regtest.cc')):
2000 return True
2001 else:
2002 return False
2003
2004
2005def _ClassifyInclude(fileinfo, include, is_system):
2006 """Figures out what kind of header 'include' is.
2007
2008 Args:
2009 fileinfo: The current file cpplint is running over. A FileInfo instance.
2010 include: The path to a #included file.
2011 is_system: True if the #include used <> rather than "".
2012
2013 Returns:
2014 One of the _XXX_HEADER constants.
2015
2016 For example:
2017 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
2018 _C_SYS_HEADER
2019 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
2020 _CPP_SYS_HEADER
2021 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
2022 _LIKELY_MY_HEADER
2023 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
2024 ... 'bar/foo_other_ext.h', False)
2025 _POSSIBLE_MY_HEADER
2026 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
2027 _OTHER_HEADER
2028 """
2029 # This is a list of all standard c++ header files, except
2030 # those already checked for above.
2031 is_stl_h = include in _STL_HEADERS
2032 is_cpp_h = is_stl_h or include in _CPP_HEADERS
2033
2034 if is_system:
2035 if is_cpp_h:
2036 return _CPP_SYS_HEADER
2037 else:
2038 return _C_SYS_HEADER
2039
2040 # If the target file and the include we're checking share a
2041 # basename when we drop common extensions, and the include
2042 # lives in . , then it's likely to be owned by the target file.
2043 target_dir, target_base = (
2044 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
2045 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
2046 if target_base == include_base and (
2047 include_dir == target_dir or
2048 include_dir == os.path.normpath(target_dir + '/../public')):
2049 return _LIKELY_MY_HEADER
2050
2051 # If the target and include share some initial basename
2052 # component, it's possible the target is implementing the
2053 # include, so it's allowed to be first, but we'll never
2054 # complain if it's not there.
2055 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
2056 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
2057 if (target_first_component and include_first_component and
2058 target_first_component.group(0) ==
2059 include_first_component.group(0)):
2060 return _POSSIBLE_MY_HEADER
2061
2062 return _OTHER_HEADER
2063
2064
erg@google.coma87abb82009-02-24 01:41:01 +00002065
erg@google.com4e00b9a2009-01-12 23:05:11 +00002066def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
2067 error):
2068 """Checks rules from the 'C++ language rules' section of cppguide.html.
2069
2070 Some of these rules are hard to test (function overloading, using
2071 uint32 inappropriately), but we do the best we can.
2072
2073 Args:
2074 filename: The name of the current file.
2075 clean_lines: A CleansedLines instance containing the file.
2076 linenum: The number of the line to check.
2077 file_extension: The extension (without the dot) of the filename.
2078 include_state: An _IncludeState instance in which the headers are inserted.
2079 error: The function to call with any errors found.
2080 """
2081 fileinfo = FileInfo(filename)
2082
2083 # get rid of comments
2084 comment_elided_line = clean_lines.lines[linenum]
2085
2086 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
2087 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(comment_elided_line):
2088 error(filename, linenum, 'build/include', 4,
2089 'Include the directory when naming .h files')
2090
2091 # we shouldn't include a file more than once. actually, there are a
2092 # handful of instances where doing so is okay, but in general it's
2093 # not.
2094 match = _RE_PATTERN_INCLUDE.search(comment_elided_line)
2095 if match:
2096 include = match.group(2)
2097 is_system = (match.group(1) == '<')
2098 if include in include_state:
2099 error(filename, linenum, 'build/include', 4,
2100 '"%s" already included at %s:%s' %
2101 (include, filename, include_state[include]))
2102 else:
2103 include_state[include] = linenum
2104
2105 # We want to ensure that headers appear in the right order:
2106 # 1) for foo.cc, foo.h (preferred location)
2107 # 2) c system files
2108 # 3) cpp system files
2109 # 4) for foo.cc, foo.h (deprecated location)
2110 # 5) other google headers
2111 #
2112 # We classify each include statement as one of those 5 types
2113 # using a number of techniques. The include_state object keeps
2114 # track of the highest type seen, and complains if we see a
2115 # lower type after that.
2116 error_message = include_state.CheckNextIncludeOrder(
2117 _ClassifyInclude(fileinfo, include, is_system))
2118 if error_message:
2119 error(filename, linenum, 'build/include_order', 4,
2120 '%s. Should be: %s.h, c system, c++ system, other.' %
2121 (error_message, fileinfo.BaseName()))
2122
2123 # If the line is empty or consists of entirely a comment, no need to
2124 # check it.
2125 line = clean_lines.elided[linenum]
2126 if not line:
2127 return
2128
2129 # Create an extended_line, which is the concatenation of the current and
2130 # next lines, for more effective checking of code that may span more than one
2131 # line.
2132 if linenum + 1 < clean_lines.NumLines():
2133 extended_line = line + clean_lines.elided[linenum + 1]
2134 else:
2135 extended_line = line
2136
2137 # Make Windows paths like Unix.
2138 fullname = os.path.abspath(filename).replace('\\', '/')
2139
2140 # TODO(unknown): figure out if they're using default arguments in fn proto.
2141
2142 # Look for any of the stream classes that are part of standard C++.
2143 match = _RE_PATTERN_INCLUDE.match(line)
2144 if match:
2145 include = match.group(2)
2146 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2147 # Many unit tests use cout, so we exempt them.
2148 if not _IsTestFilename(filename):
2149 error(filename, linenum, 'readability/streams', 3,
2150 'Streams are highly discouraged.')
2151
2152 # Check for non-const references in functions. This is tricky because &
2153 # is also used to take the address of something. We allow <> for templates,
2154 # (ignoring whatever is between the braces) and : for classes.
2155 # These are complicated re's. They try to capture the following:
2156 # paren (for fn-prototype start), typename, &, varname. For the const
2157 # version, we're willing for const to be before typename or after
2158 # Don't check the implemention on same line.
2159 fnline = line.split('{', 1)[0]
2160 if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
2161 len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
2162 r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
2163 len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
2164 fnline))):
2165
2166 # We allow non-const references in a few standard places, like functions
2167 # called "swap()" or iostream operators like "<<" or ">>".
2168 if not Search(
2169 r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
2170 fnline):
2171 error(filename, linenum, 'runtime/references', 2,
2172 'Is this a non-const reference? '
2173 'If so, make const or use a pointer.')
2174
2175 # Check to see if they're using an conversion function cast.
2176 # I just try to capture the most common basic types, though there are more.
2177 # Parameterless conversion functions, such as bool(), are allowed as they are
2178 # probably a member operator declaration or default constructor.
2179 match = Search(
2180 r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
2181 if match:
2182 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2183 # where type may be float(), int(string), etc. Without context they are
2184 # virtually indistinguishable from int(x) casts.
2185 if not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
2186 error(filename, linenum, 'readability/casting', 4,
2187 'Using deprecated casting style. '
2188 'Use static_cast<%s>(...) instead' %
2189 match.group(1))
2190
2191 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2192 'static_cast',
2193 r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2194 error)
2195 # This doesn't catch all cases. Consider (const char * const)"hello".
2196 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2197 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2198
2199 # In addition, we look for people taking the address of a cast. This
2200 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2201 # point where you think.
2202 if Search(
2203 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2204 error(filename, linenum, 'runtime/casting', 4,
2205 ('Are you taking an address of a cast? '
2206 'This is dangerous: could be a temp var. '
2207 'Take the address before doing the cast, rather than after'))
2208
2209 # Check for people declaring static/global STL strings at the top level.
2210 # This is dangerous because the C++ language does not guarantee that
2211 # globals with constructors are initialized before the first access.
2212 match = Match(
2213 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2214 line)
2215 # Make sure it's not a function.
2216 # Function template specialization looks like: "string foo<Type>(...".
2217 # Class template definitions look like: "string Foo<Type>::Method(...".
2218 if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2219 match.group(3)):
2220 error(filename, linenum, 'runtime/string', 4,
2221 'For a static/global string constant, use a C style string instead: '
2222 '"%schar %s[]".' %
2223 (match.group(1), match.group(2)))
2224
2225 # Check that we're not using RTTI outside of testing code.
2226 if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
2227 error(filename, linenum, 'runtime/rtti', 5,
2228 'Do not use dynamic_cast<>. If you need to cast within a class '
2229 "hierarchy, use static_cast<> to upcast. Google doesn't support "
2230 'RTTI.')
2231
2232 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2233 error(filename, linenum, 'runtime/init', 4,
2234 'You seem to be initializing a member variable with itself.')
2235
2236 if file_extension == 'h':
2237 # TODO(unknown): check that 1-arg constructors are explicit.
2238 # How to tell it's a constructor?
2239 # (handled in CheckForNonStandardConstructs for now)
2240 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
2241 # (level 1 error)
2242 pass
2243
2244 # Check if people are using the verboten C basic types. The only exception
2245 # we regularly allow is "unsigned short port" for port.
2246 if Search(r'\bshort port\b', line):
2247 if not Search(r'\bunsigned short port\b', line):
2248 error(filename, linenum, 'runtime/int', 4,
2249 'Use "unsigned short" for ports, not "short"')
2250 else:
2251 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
2252 if match:
2253 error(filename, linenum, 'runtime/int', 4,
2254 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
2255
2256 # When snprintf is used, the second argument shouldn't be a literal.
2257 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2258 if match:
2259 error(filename, linenum, 'runtime/printf', 3,
2260 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2261 'to snprintf.' % (match.group(1), match.group(2)))
2262
2263 # Check if some verboten C functions are being used.
2264 if Search(r'\bsprintf\b', line):
2265 error(filename, linenum, 'runtime/printf', 5,
2266 'Never use sprintf. Use snprintf instead.')
2267 match = Search(r'\b(strcpy|strcat)\b', line)
2268 if match:
2269 error(filename, linenum, 'runtime/printf', 4,
2270 'Almost always, snprintf is better than %s' % match.group(1))
2271
2272 if Search(r'\bsscanf\b', line):
2273 error(filename, linenum, 'runtime/printf', 1,
2274 'sscanf can be ok, but is slow and can overflow buffers.')
2275
2276 # Check for suspicious usage of "if" like
2277 # } if (a == b) {
2278 if Search(r'\}\s*if\s*\(', line):
2279 error(filename, linenum, 'readability/braces', 4,
2280 'Did you mean "else if"? If not, start a new line for "if".')
2281
2282 # Check for potential format string bugs like printf(foo).
2283 # We constrain the pattern not to pick things like DocidForPrintf(foo).
2284 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2285 match = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2286 if match:
2287 error(filename, linenum, 'runtime/printf', 4,
2288 'Potential format string bug. Do %s("%%s", %s) instead.'
2289 % (match.group(1), match.group(2)))
2290
2291 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2292 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2293 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
2294 error(filename, linenum, 'runtime/memset', 4,
2295 'Did you mean "memset(%s, 0, %s)"?'
2296 % (match.group(1), match.group(2)))
2297
2298 if Search(r'\busing namespace\b', line):
2299 error(filename, linenum, 'build/namespaces', 5,
2300 'Do not use namespace using-directives. '
2301 'Use using-declarations instead.')
2302
2303 # Detect variable-length arrays.
2304 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2305 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
2306 match.group(3).find(']') == -1):
2307 # Split the size using space and arithmetic operators as delimiters.
2308 # If any of the resulting tokens are not compile time constants then
2309 # report the error.
2310 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2311 is_const = True
2312 skip_next = False
2313 for tok in tokens:
2314 if skip_next:
2315 skip_next = False
2316 continue
2317
2318 if Search(r'sizeof\(.+\)', tok): continue
2319 if Search(r'arraysize\(\w+\)', tok): continue
2320
2321 tok = tok.lstrip('(')
2322 tok = tok.rstrip(')')
2323 if not tok: continue
2324 if Match(r'\d+', tok): continue
2325 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
2326 if Match(r'k[A-Z0-9]\w*', tok): continue
2327 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
2328 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
2329 # A catch all for tricky sizeof cases, including 'sizeof expression',
2330 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2331 # requires skipping the next token becasue we split on ' ' and '*'.
2332 if tok.startswith('sizeof'):
2333 skip_next = True
2334 continue
2335 is_const = False
2336 break
2337 if not is_const:
2338 error(filename, linenum, 'runtime/arrays', 1,
2339 'Do not use variable-length arrays. Use an appropriately named '
2340 "('k' followed by CamelCase) compile-time constant for the size.")
2341
2342 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
2343 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
2344 # in the class declaration.
2345 match = Match(
2346 (r'\s*'
2347 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
2348 r'\(.*\);$'),
2349 line)
2350 if match and linenum + 1 < clean_lines.NumLines():
2351 next_line = clean_lines.elided[linenum + 1]
2352 if not Search(r'^\s*};', next_line):
2353 error(filename, linenum, 'readability/constructors', 3,
2354 match.group(1) + ' should be the last thing in the class')
2355
2356 # Check for use of unnamed namespaces in header files. Registration
2357 # macros are typically OK, so we allow use of "namespace {" on lines
2358 # that end with backslashes.
2359 if (file_extension == 'h'
2360 and Search(r'\bnamespace\s*{', line)
2361 and line[-1] != '\\'):
2362 error(filename, linenum, 'build/namespaces', 4,
2363 'Do not use unnamed namespaces in header files. See '
2364 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2365 ' for more information.')
2366
2367
2368def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
2369 error):
2370 """Checks for a C-style cast by looking for the pattern.
2371
2372 This also handles sizeof(type) warnings, due to similarity of content.
2373
2374 Args:
2375 filename: The name of the current file.
2376 linenum: The number of the line to check.
2377 line: The line of code to check.
2378 raw_line: The raw line of code to check, with comments.
2379 cast_type: The string for the C++ cast to recommend. This is either
2380 reinterpret_cast or static_cast, depending.
2381 pattern: The regular expression used to find C-style casts.
2382 error: The function to call with any errors found.
2383 """
2384 match = Search(pattern, line)
2385 if not match:
2386 return
2387
2388 # e.g., sizeof(int)
2389 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
2390 if sizeof_match:
2391 error(filename, linenum, 'runtime/sizeof', 1,
2392 'Using sizeof(type). Use sizeof(varname) instead if possible')
2393 return
2394
2395 remainder = line[match.end(0):]
2396
2397 # The close paren is for function pointers as arguments to a function.
2398 # eg, void foo(void (*bar)(int));
2399 # The semicolon check is a more basic function check; also possibly a
2400 # function pointer typedef.
2401 # eg, void foo(int); or void foo(int) const;
2402 # The equals check is for function pointer assignment.
2403 # eg, void *(*foo)(int) = ...
2404 #
2405 # Right now, this will only catch cases where there's a single argument, and
2406 # it's unnamed. It should probably be expanded to check for multiple
2407 # arguments with some unnamed.
2408 function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2409 if function_match:
2410 if (not function_match.group(3) or
2411 function_match.group(3) == ';' or
2412 raw_line.find('/*') < 0):
2413 error(filename, linenum, 'readability/function', 3,
2414 'All parameters should be named in a function')
2415 return
2416
2417 # At this point, all that should be left is actual casts.
2418 error(filename, linenum, 'readability/casting', 4,
2419 'Using C-style cast. Use %s<%s>(...) instead' %
2420 (cast_type, match.group(1)))
2421
2422
2423_HEADERS_CONTAINING_TEMPLATES = (
2424 ('<deque>', ('deque',)),
2425 ('<functional>', ('unary_function', 'binary_function',
2426 'plus', 'minus', 'multiplies', 'divides', 'modulus',
2427 'negate',
2428 'equal_to', 'not_equal_to', 'greater', 'less',
2429 'greater_equal', 'less_equal',
2430 'logical_and', 'logical_or', 'logical_not',
2431 'unary_negate', 'not1', 'binary_negate', 'not2',
2432 'bind1st', 'bind2nd',
2433 'pointer_to_unary_function',
2434 'pointer_to_binary_function',
2435 'ptr_fun',
2436 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2437 'mem_fun_ref_t',
2438 'const_mem_fun_t', 'const_mem_fun1_t',
2439 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2440 'mem_fun_ref',
2441 )),
2442 ('<limits>', ('numeric_limits',)),
2443 ('<list>', ('list',)),
2444 ('<map>', ('map', 'multimap',)),
2445 ('<memory>', ('allocator',)),
2446 ('<queue>', ('queue', 'priority_queue',)),
2447 ('<set>', ('set', 'multiset',)),
2448 ('<stack>', ('stack',)),
2449 ('<string>', ('char_traits', 'basic_string',)),
2450 ('<utility>', ('pair',)),
2451 ('<vector>', ('vector',)),
2452
2453 # gcc extensions.
2454 # Note: std::hash is their hash, ::hash is our hash
2455 ('<hash_map>', ('hash_map', 'hash_multimap',)),
2456 ('<hash_set>', ('hash_set', 'hash_multiset',)),
2457 ('<slist>', ('slist',)),
2458 )
2459
2460_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2461 # We can trust with reasonable confidence that map gives us pair<>, too.
2462 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2463}
2464
2465_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2466
2467_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00002468for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2469 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002470 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2471 # type::max().
2472 _re_pattern_algorithm_header.append(
2473 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2474 _template,
2475 '<algorithm>'))
2476
2477_re_pattern_templates = []
2478for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2479 for _template in _templates:
2480 _re_pattern_templates.append(
2481 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2482 _template + '<>',
2483 _header))
2484
2485
2486def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error):
2487 """Reports for missing stl includes.
2488
2489 This function will output warnings to make sure you are including the headers
2490 necessary for the stl containers and functions that you use. We only give one
2491 reason to include a header. For example, if you use both equal_to<> and
2492 less<> in a .h file, only one (the latter in the file) of these will be
2493 reported as a reason to include the <functional>.
2494
2495 We only check headers. We do not check inside cc-files. .cc files should be
2496 able to depend on their respective header files for includes. However, there
2497 is no simple way of producing this logic here.
2498
2499 Args:
2500 filename: The name of the current file.
2501 clean_lines: A CleansedLines instance containing the file.
2502 include_state: An _IncludeState instance.
2503 error: The function to call with any errors found.
2504 """
2505 if filename.endswith('.cc'):
2506 return
2507
2508 required = {} # A map of header name to linenumber and the template entity.
2509 # Example of required: { '<functional>': (1219, 'less<>') }
2510
2511 for linenum in xrange(clean_lines.NumLines()):
2512 line = clean_lines.elided[linenum]
2513 if not line or line[0] == '#':
2514 continue
2515
2516 # String is special -- it is a non-templatized type in STL.
2517 if _RE_PATTERN_STRING.search(line):
2518 required['<string>'] = (linenum, 'string')
2519
2520 for pattern, template, header in _re_pattern_algorithm_header:
2521 if pattern.search(line):
2522 required[header] = (linenum, template)
2523
2524 # The following function is just a speed up, no semantics are changed.
2525 if not '<' in line: # Reduces the cpu time usage by skipping lines.
2526 continue
2527
2528 for pattern, template, header in _re_pattern_templates:
2529 if pattern.search(line):
2530 required[header] = (linenum, template)
2531
2532 # All the lines have been processed, report the errors found.
2533 for required_header_unstripped in required:
2534 template = required[required_header_unstripped][1]
2535 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2536 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2537 if [True for header in headers if header in include_state]:
2538 continue
2539 if required_header_unstripped.strip('<>"') not in include_state:
2540 error(filename, required[required_header_unstripped][0],
2541 'build/include_what_you_use', 4,
2542 'Add #include ' + required_header_unstripped + ' for ' + template)
2543
2544
2545def ProcessLine(filename, file_extension,
2546 clean_lines, line, include_state, function_state,
2547 class_state, error):
2548 """Processes a single line in the file.
2549
2550 Args:
2551 filename: Filename of the file that is being processed.
2552 file_extension: The extension (dot not included) of the file.
2553 clean_lines: An array of strings, each representing a line of the file,
2554 with comments stripped.
2555 line: Number of line being processed.
2556 include_state: An _IncludeState instance in which the headers are inserted.
2557 function_state: A _FunctionState instance which counts function lines, etc.
2558 class_state: A _ClassState instance which maintains information about
2559 the current stack of nested class declarations being parsed.
2560 error: A callable to which errors are reported, which takes 4 arguments:
2561 filename, line number, error level, and message
2562
2563 """
2564 raw_lines = clean_lines.raw_lines
2565 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
2566 if Search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines
2567 return
2568 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
2569 CheckStyle(filename, clean_lines, line, file_extension, error)
2570 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
2571 error)
2572 CheckForNonStandardConstructs(filename, clean_lines, line,
2573 class_state, error)
2574 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com36649102009-03-25 21:18:36 +00002575 CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002576
2577
2578def ProcessFileData(filename, file_extension, lines, error):
2579 """Performs lint checks and reports any errors to the given error function.
2580
2581 Args:
2582 filename: Filename of the file that is being processed.
2583 file_extension: The extension (dot not included) of the file.
2584 lines: An array of strings, each representing a line of the file, with the
2585 last element being empty if the file is termined with a newline.
2586 error: A callable to which errors are reported, which takes 4 arguments:
2587 """
2588 lines = (['// marker so line numbers and indices both start at 1'] + lines +
2589 ['// marker so line numbers end in a known way'])
2590
2591 include_state = _IncludeState()
2592 function_state = _FunctionState()
2593 class_state = _ClassState()
2594
2595 CheckForCopyright(filename, lines, error)
2596
2597 if file_extension == 'h':
2598 CheckForHeaderGuard(filename, lines, error)
2599
2600 RemoveMultiLineComments(filename, lines, error)
2601 clean_lines = CleansedLines(lines)
2602 for line in xrange(clean_lines.NumLines()):
2603 ProcessLine(filename, file_extension, clean_lines, line,
2604 include_state, function_state, class_state, error)
2605 class_state.CheckFinished(filename, error)
2606
2607 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
2608
2609 # We check here rather than inside ProcessLine so that we see raw
2610 # lines rather than "cleaned" lines.
2611 CheckForUnicodeReplacementCharacters(filename, lines, error)
2612
2613 CheckForNewlineAtEOF(filename, lines, error)
2614
2615
2616def ProcessFile(filename, vlevel):
2617 """Does google-lint on a single file.
2618
2619 Args:
2620 filename: The name of the file to parse.
2621
2622 vlevel: The level of errors to report. Every error of confidence
2623 >= verbose_level will be reported. 0 is a good default.
2624 """
2625
2626 _SetVerboseLevel(vlevel)
2627
2628 try:
2629 # Support the UNIX convention of using "-" for stdin. Note that
2630 # we are not opening the file with universal newline support
2631 # (which codecs doesn't support anyway), so the resulting lines do
2632 # contain trailing '\r' characters if we are reading a file that
2633 # has CRLF endings.
2634 # If after the split a trailing '\r' is present, it is removed
2635 # below. If it is not expected to be present (i.e. os.linesep !=
2636 # '\r\n' as in Windows), a warning is issued below if this file
2637 # is processed.
2638
2639 if filename == '-':
2640 lines = codecs.StreamReaderWriter(sys.stdin,
2641 codecs.getreader('utf8'),
2642 codecs.getwriter('utf8'),
2643 'replace').read().split('\n')
2644 else:
2645 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
2646
2647 carriage_return_found = False
2648 # Remove trailing '\r'.
2649 for linenum in range(len(lines)):
2650 if lines[linenum].endswith('\r'):
2651 lines[linenum] = lines[linenum].rstrip('\r')
2652 carriage_return_found = True
2653
2654 except IOError:
2655 sys.stderr.write(
2656 "Skipping input '%s': Can't open for reading\n" % filename)
2657 return
2658
2659 # Note, if no dot is found, this will give the entire filename as the ext.
2660 file_extension = filename[filename.rfind('.') + 1:]
2661
2662 # When reading from stdin, the extension is unknown, so no cpplint tests
2663 # should rely on the extension.
2664 if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
2665 and file_extension != 'cpp'):
2666 sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
2667 else:
2668 ProcessFileData(filename, file_extension, lines, Error)
2669 if carriage_return_found and os.linesep != '\r\n':
2670 # Use 0 for linenum since outputing only one error for potentially
2671 # several lines.
2672 Error(filename, 0, 'whitespace/newline', 1,
2673 'One or more unexpected \\r (^M) found;'
2674 'better to use only a \\n')
2675
2676 sys.stderr.write('Done processing %s\n' % filename)
2677
2678
2679def PrintUsage(message):
2680 """Prints a brief usage string and exits, optionally with an error message.
2681
2682 Args:
2683 message: The optional error message.
2684 """
2685 sys.stderr.write(_USAGE)
2686 if message:
2687 sys.exit('\nFATAL ERROR: ' + message)
2688 else:
2689 sys.exit(1)
2690
2691
2692def PrintCategories():
2693 """Prints a list of all the error-categories used by error messages.
2694
2695 These are the categories used to filter messages via --filter.
2696 """
2697 sys.stderr.write(_ERROR_CATEGORIES)
2698 sys.exit(0)
2699
2700
2701def ParseArguments(args):
2702 """Parses the command line arguments.
2703
2704 This may set the output format and verbosity level as side-effects.
2705
2706 Args:
2707 args: The command line arguments:
2708
2709 Returns:
2710 The list of filenames to lint.
2711 """
2712 try:
2713 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
2714 'filter='])
2715 except getopt.GetoptError:
2716 PrintUsage('Invalid arguments.')
2717
2718 verbosity = _VerboseLevel()
2719 output_format = _OutputFormat()
2720 filters = ''
2721
2722 for (opt, val) in opts:
2723 if opt == '--help':
2724 PrintUsage(None)
2725 elif opt == '--output':
2726 if not val in ('emacs', 'vs7'):
2727 PrintUsage('The only allowed output formats are emacs and vs7.')
2728 output_format = val
2729 elif opt == '--verbose':
2730 verbosity = int(val)
2731 elif opt == '--filter':
2732 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00002733 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002734 PrintCategories()
2735
2736 if not filenames:
2737 PrintUsage('No files were specified.')
2738
2739 _SetOutputFormat(output_format)
2740 _SetVerboseLevel(verbosity)
2741 _SetFilters(filters)
2742
2743 return filenames
2744
2745
2746def main():
2747 filenames = ParseArguments(sys.argv[1:])
2748
2749 # Change stderr to write with replacement characters so we don't die
2750 # if we try to print something containing non-ASCII characters.
2751 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
2752 codecs.getreader('utf8'),
2753 codecs.getwriter('utf8'),
2754 'replace')
2755
2756 _cpplint_state.ResetErrorCount()
2757 for filename in filenames:
2758 ProcessFile(filename, _cpplint_state.verbose_level)
2759 sys.stderr.write('Total errors found: %d\n' % _cpplint_state.error_count)
2760 sys.exit(_cpplint_state.error_count > 0)
2761
2762
2763if __name__ == '__main__':
2764 main()