blob: f721ecc563503f1aca0b0bb7a247ff64db0849a9 [file] [log] [blame]
erg@google.com4e00b9a2009-01-12 23:05:11 +00001#!/usr/bin/python2.4
2#
3# cpplint.py is Copyright (C) 2009 Google Inc.
4#
5# It is free software; you can redistribute it and/or modify it under the
6# terms of either:
7#
8# a) the GNU General Public License as published by the Free Software
9# Foundation; either version 1, or (at your option) any later version, or
10#
11# b) the "Artistic License".
12
13# Here are some issues that I've had people identify in my code during reviews,
14# that I think are possible to flag automatically in a lint tool. If these were
15# caught by lint, it would save time both for myself and that of my reviewers.
16# Most likely, some of these are beyond the scope of the current lint framework,
17# but I think it is valuable to retain these wish-list items even if they cannot
18# be immediately implemented.
19#
20# Suggestions
21# -----------
22# - Check for no 'explicit' for multi-arg ctor
23# - Check for boolean assign RHS in parens
24# - Check for ctor initializer-list colon position and spacing
25# - Check that if there's a ctor, there should be a dtor
26# - Check accessors that return non-pointer member variables are
27# declared const
28# - Check accessors that return non-const pointer member vars are
29# *not* declared const
30# - Check for using public includes for testing
31# - Check for spaces between brackets in one-line inline method
32# - Check for no assert()
33# - Check for spaces surrounding operators
34# - Check for 0 in pointer context (should be NULL)
35# - Check for 0 in char context (should be '\0')
36# - Check for camel-case method name conventions for methods
37# that are not simple inline getters and setters
38# - Check that base classes have virtual destructors
39# put " // namespace" after } that closes a namespace, with
40# namespace's name after 'namespace' if it is named.
41# - Do not indent namespace contents
42# - Avoid inlining non-trivial constructors in header files
43# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
44# - Check for old-school (void) cast for call-sites of functions
45# ignored return value
46# - Check gUnit usage of anonymous namespace
47# - Check for class declaration order (typedefs, consts, enums,
48# ctor(s?), dtor, friend declarations, methods, member vars)
49#
50
51"""Does google-lint on c++ files.
52
53The goal of this script is to identify places in the code that *may*
54be in non-compliance with google style. It does not attempt to fix
55up these problems -- the point is to educate. It does also not
56attempt to find all problems, or to ensure that everything it does
57find is legitimately a problem.
58
59In particular, we can get very confused by /* and // inside strings!
60We do a small hack, which is to ignore //'s with "'s after them on the
61same line, but it is far from perfect (in either direction).
62"""
63
64import codecs
65import getopt
66import math # for log
67import os
68import re
69import sre_compile
70import string
71import sys
72import unicodedata
73
74
75_USAGE = """
76Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
77 <file> [file] ...
78
79 The style guidelines this tries to follow are those in
80 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
81
82 Every problem is given a confidence score from 1-5, with 5 meaning we are
83 certain of the problem, and 1 meaning it could be a legitimate construct.
84 This will miss some errors, and is not a substitute for a code review.
85
86 To prevent specific lines from being linted, add a '// NOLINT' comment to the
87 end of the line.
88
89 The files passed in will be linted; at least one file must be provided.
90 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
91
92 Flags:
93
94 output=vs7
95 By default, the output is formatted to ease emacs parsing. Visual Studio
96 compatible output (vs7) may also be used. Other formats are unsupported.
97
98 verbose=#
99 Specify a number 0-5 to restrict errors to certain verbosity levels.
100
101 filter=-x,+y,...
102 Specify a comma-separated list of category-filters to apply: only
103 error messages whose category names pass the filters will be printed.
104 (Category names are printed with the message and look like
105 "[whitespace/indent]".) Filters are evaluated left to right.
106 "-FOO" and "FOO" means "do not print categories that start with FOO".
107 "+FOO" means "do print categories that start with FOO".
108
109 Examples: --filter=-whitespace,+whitespace/braces
110 --filter=whitespace,runtime/printf,+runtime/printf_format
111 --filter=-,+build/include_what_you_use
112
113 To see a list of all the categories used in cpplint, pass no arg:
114 --filter=
115"""
116
117# We categorize each error message we print. Here are the categories.
118# We want an explicit list so we can list them all in cpplint --filter=.
119# If you add a new error message with a new category, add it to the list
120# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.coma87abb82009-02-24 01:41:01 +0000121# \ used for clearer layout -- pylint: disable-msg=C6013
122_ERROR_CATEGORIES = '''\
erg@google.com4e00b9a2009-01-12 23:05:11 +0000123 build/class
124 build/deprecated
125 build/endif_comment
126 build/forward_decl
127 build/header_guard
128 build/include
129 build/include_order
130 build/include_what_you_use
131 build/namespaces
132 build/printf_format
133 build/storage_class
134 legal/copyright
135 readability/braces
136 readability/casting
137 readability/check
138 readability/constructors
139 readability/fn_size
140 readability/function
141 readability/multiline_comment
142 readability/multiline_string
143 readability/streams
144 readability/todo
145 readability/utf8
146 runtime/arrays
147 runtime/casting
148 runtime/explicit
149 runtime/int
150 runtime/init
151 runtime/memset
152 runtime/printf
153 runtime/printf_format
154 runtime/references
155 runtime/rtti
156 runtime/sizeof
157 runtime/string
158 runtime/threadsafe_fn
159 runtime/virtual
160 whitespace/blank_line
161 whitespace/braces
162 whitespace/comma
163 whitespace/comments
164 whitespace/end_of_line
165 whitespace/ending_newline
166 whitespace/indent
167 whitespace/labels
168 whitespace/line_length
169 whitespace/newline
170 whitespace/operators
171 whitespace/parens
172 whitespace/semicolon
173 whitespace/tab
174 whitespace/todo
erg@google.coma87abb82009-02-24 01:41:01 +0000175'''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000176
177# We used to check for high-bit characters, but after much discussion we
178# decided those were OK, as long as they were in UTF-8 and didn't represent
179# hard-coded international strings, which belong in a seperate i18n file.
180
181# Headers that we consider STL headers.
182_STL_HEADERS = frozenset([
183 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
184 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
185 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
186 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
187 'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
188 'utility', 'vector', 'vector.h',
189 ])
190
191
192# Non-STL C++ system headers.
193_CPP_HEADERS = frozenset([
194 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
195 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
196 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
197 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
198 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
199 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
200 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
201 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
202 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
203 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
204 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
205 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
206 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
207 ])
208
209
210# Assertion macros. These are defined in base/logging.h and
211# testing/base/gunit.h. Note that the _M versions need to come first
212# for substring matching to work.
213_CHECK_MACROS = [
214 'CHECK',
215 'EXPECT_TRUE_M', 'EXPECT_TRUE',
216 'ASSERT_TRUE_M', 'ASSERT_TRUE',
217 'EXPECT_FALSE_M', 'EXPECT_FALSE',
218 'ASSERT_FALSE_M', 'ASSERT_FALSE',
219 ]
220
221# Replacement macros for CHECK/EXPECT_TRUE/EXPECT_FALSE
222_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
223
224for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
225 ('>=', 'GE'), ('>', 'GT'),
226 ('<=', 'LE'), ('<', 'LT')]:
227 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
228 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
229 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
230 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
231 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
232
233for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
234 ('>=', 'LT'), ('>', 'LE'),
235 ('<=', 'GT'), ('<', 'GE')]:
236 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
237 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
238 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
239 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
240
241
242# These constants define types of headers for use with
243# _IncludeState.CheckNextIncludeOrder().
244_C_SYS_HEADER = 1
245_CPP_SYS_HEADER = 2
246_LIKELY_MY_HEADER = 3
247_POSSIBLE_MY_HEADER = 4
248_OTHER_HEADER = 5
249
250
251_regexp_compile_cache = {}
252
253
254def Match(pattern, s):
255 """Matches the string with the pattern, caching the compiled regexp."""
256 # The regexp compilation caching is inlined in both Match and Search for
257 # performance reasons; factoring it out into a separate function turns out
258 # to be noticeably expensive.
259 if not pattern in _regexp_compile_cache:
260 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
261 return _regexp_compile_cache[pattern].match(s)
262
263
264def Search(pattern, s):
265 """Searches the string for the pattern, caching the compiled regexp."""
266 if not pattern in _regexp_compile_cache:
267 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
268 return _regexp_compile_cache[pattern].search(s)
269
270
271class _IncludeState(dict):
272 """Tracks line numbers for includes, and the order in which includes appear.
273
274 As a dict, an _IncludeState object serves as a mapping between include
275 filename and line number on which that file was included.
276
277 Call CheckNextIncludeOrder() once for each header in the file, passing
278 in the type constants defined above. Calls in an illegal order will
279 raise an _IncludeError with an appropriate error message.
280
281 """
282 # self._section will move monotonically through this set. If it ever
283 # needs to move backwards, CheckNextIncludeOrder will raise an error.
284 _INITIAL_SECTION = 0
285 _MY_H_SECTION = 1
286 _C_SECTION = 2
287 _CPP_SECTION = 3
288 _OTHER_H_SECTION = 4
289
290 _TYPE_NAMES = {
291 _C_SYS_HEADER: 'C system header',
292 _CPP_SYS_HEADER: 'C++ system header',
293 _LIKELY_MY_HEADER: 'header this file implements',
294 _POSSIBLE_MY_HEADER: 'header this file may implement',
295 _OTHER_HEADER: 'other header',
296 }
297 _SECTION_NAMES = {
298 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
299 _MY_H_SECTION: 'a header this file implements',
300 _C_SECTION: 'C system header',
301 _CPP_SECTION: 'C++ system header',
302 _OTHER_H_SECTION: 'other header',
303 }
304
305 def __init__(self):
306 dict.__init__(self)
307 self._section = self._INITIAL_SECTION
308
309 def CheckNextIncludeOrder(self, header_type):
310 """Returns a non-empty error message if the next header is out of order.
311
312 This function also updates the internal state to be ready to check
313 the next include.
314
315 Args:
316 header_type: One of the _XXX_HEADER constants defined above.
317
318 Returns:
319 The empty string if the header is in the right order, or an
320 error message describing what's wrong.
321
322 """
323 error_message = ('Found %s after %s' %
324 (self._TYPE_NAMES[header_type],
325 self._SECTION_NAMES[self._section]))
326
327 if header_type == _C_SYS_HEADER:
328 if self._section <= self._C_SECTION:
329 self._section = self._C_SECTION
330 else:
331 return error_message
332 elif header_type == _CPP_SYS_HEADER:
333 if self._section <= self._CPP_SECTION:
334 self._section = self._CPP_SECTION
335 else:
336 return error_message
337 elif header_type == _LIKELY_MY_HEADER:
338 if self._section <= self._MY_H_SECTION:
339 self._section = self._MY_H_SECTION
340 else:
341 self._section = self._OTHER_H_SECTION
342 elif header_type == _POSSIBLE_MY_HEADER:
343 if self._section <= self._MY_H_SECTION:
344 self._section = self._MY_H_SECTION
345 else:
346 # This will always be the fallback because we're not sure
347 # enough that the header is associated with this file.
348 self._section = self._OTHER_H_SECTION
349 else:
350 assert header_type == _OTHER_HEADER
351 self._section = self._OTHER_H_SECTION
352
353 return ''
354
355
356class _CppLintState(object):
357 """Maintains module-wide state.."""
358
359 def __init__(self):
360 self.verbose_level = 1 # global setting.
361 self.error_count = 0 # global count of reported errors
362 self.filters = [] # filters to apply when emitting error messages
363
364 # output format:
365 # "emacs" - format that emacs can parse (default)
366 # "vs7" - format that Microsoft Visual Studio 7 can parse
367 self.output_format = 'emacs'
368
369 def SetOutputFormat(self, output_format):
370 """Sets the output format for errors."""
371 self.output_format = output_format
372
373 def SetVerboseLevel(self, level):
374 """Sets the module's verbosity, and returns the previous setting."""
375 last_verbose_level = self.verbose_level
376 self.verbose_level = level
377 return last_verbose_level
378
379 def SetFilters(self, filters):
380 """Sets the error-message filters.
381
382 These filters are applied when deciding whether to emit a given
383 error message.
384
385 Args:
386 filters: A string of comma-separated filters (eg "+whitespace/indent").
387 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000388
389 Raises:
390 ValueError: The comma-separated filters did not all start with '+' or '-'.
391 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000392 """
393 if not filters:
394 self.filters = []
395 else:
396 self.filters = filters.split(',')
397 for filt in self.filters:
398 if not (filt.startswith('+') or filt.startswith('-')):
399 raise ValueError('Every filter in --filters must start with + or -'
400 ' (%s does not)' % filt)
401
402 def ResetErrorCount(self):
403 """Sets the module's error statistic back to zero."""
404 self.error_count = 0
405
406 def IncrementErrorCount(self):
407 """Bumps the module's error statistic."""
408 self.error_count += 1
409
410
411_cpplint_state = _CppLintState()
412
413
414def _OutputFormat():
415 """Gets the module's output format."""
416 return _cpplint_state.output_format
417
418
419def _SetOutputFormat(output_format):
420 """Sets the module's output format."""
421 _cpplint_state.SetOutputFormat(output_format)
422
423
424def _VerboseLevel():
425 """Returns the module's verbosity setting."""
426 return _cpplint_state.verbose_level
427
428
429def _SetVerboseLevel(level):
430 """Sets the module's verbosity, and returns the previous setting."""
431 return _cpplint_state.SetVerboseLevel(level)
432
433
434def _Filters():
435 """Returns the module's list of output filters, as a list."""
436 return _cpplint_state.filters
437
438
439def _SetFilters(filters):
440 """Sets the module's error-message filters.
441
442 These filters are applied when deciding whether to emit a given
443 error message.
444
445 Args:
446 filters: A string of comma-separated filters (eg "whitespace/indent").
447 Each filter should start with + or -; else we die.
448 """
449 _cpplint_state.SetFilters(filters)
450
451
452class _FunctionState(object):
453 """Tracks current function name and the number of lines in its body."""
454
455 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
456 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
457
458 def __init__(self):
459 self.in_a_function = False
460 self.lines_in_function = 0
461 self.current_function = ''
462
463 def Begin(self, function_name):
464 """Start analyzing function body.
465
466 Args:
467 function_name: The name of the function being tracked.
468 """
469 self.in_a_function = True
470 self.lines_in_function = 0
471 self.current_function = function_name
472
473 def Count(self):
474 """Count line in current function body."""
475 if self.in_a_function:
476 self.lines_in_function += 1
477
478 def Check(self, error, filename, linenum):
479 """Report if too many lines in function body.
480
481 Args:
482 error: The function to call with any errors found.
483 filename: The name of the current file.
484 linenum: The number of the line to check.
485 """
486 if Match(r'T(EST|est)', self.current_function):
487 base_trigger = self._TEST_TRIGGER
488 else:
489 base_trigger = self._NORMAL_TRIGGER
490 trigger = base_trigger * 2**_VerboseLevel()
491
492 if self.lines_in_function > trigger:
493 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
494 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
495 if error_level > 5:
496 error_level = 5
497 error(filename, linenum, 'readability/fn_size', error_level,
498 'Small and focused functions are preferred:'
499 ' %s has %d non-comment lines'
500 ' (error triggered by exceeding %d lines).' % (
501 self.current_function, self.lines_in_function, trigger))
502
503 def End(self):
504 """Stop analizing function body."""
505 self.in_a_function = False
506
507
508class _IncludeError(Exception):
509 """Indicates a problem with the include order in a file."""
510 pass
511
512
513class FileInfo:
514 """Provides utility functions for filenames.
515
516 FileInfo provides easy access to the components of a file's path
517 relative to the project root.
518 """
519
520 def __init__(self, filename):
521 self._filename = filename
522
523 def FullName(self):
524 """Make Windows paths like Unix."""
525 return os.path.abspath(self._filename).replace('\\', '/')
526
527 def RepositoryName(self):
528 """FullName after removing the local path to the repository.
529
530 If we have a real absolute path name here we can try to do something smart:
531 detecting the root of the checkout and truncating /path/to/checkout from
532 the name so that we get header guards that don't include things like
533 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
534 people on different computers who have checked the source out to different
535 locations won't see bogus errors.
536 """
537 fullname = self.FullName()
538
539 if os.path.exists(fullname):
540 project_dir = os.path.dirname(fullname)
541
542 if os.path.exists(os.path.join(project_dir, ".svn")):
543 # If there's a .svn file in the current directory, we recursively look
544 # up the directory tree for the top of the SVN checkout
545 root_dir = project_dir
546 one_up_dir = os.path.dirname(root_dir)
547 while os.path.exists(os.path.join(one_up_dir, ".svn")):
548 root_dir = os.path.dirname(root_dir)
549 one_up_dir = os.path.dirname(one_up_dir)
550
551 prefix = os.path.commonprefix([root_dir, project_dir])
552 return fullname[len(prefix) + 1:]
553
554 # Not SVN? Try to find a git top level directory by searching up from the
555 # current path.
556 root_dir = os.path.dirname(fullname)
557 while (root_dir != os.path.dirname(root_dir) and
558 not os.path.exists(os.path.join(root_dir, ".git"))):
559 root_dir = os.path.dirname(root_dir)
560 if os.path.exists(os.path.join(root_dir, ".git")):
561 prefix = os.path.commonprefix([root_dir, project_dir])
562 return fullname[len(prefix) + 1:]
563
564 # Don't know what to do; header guard warnings may be wrong...
565 return fullname
566
567 def Split(self):
568 """Splits the file into the directory, basename, and extension.
569
570 For 'chrome/browser/browser.cc', Split() would
571 return ('chrome/browser', 'browser', '.cc')
572
573 Returns:
574 A tuple of (directory, basename, extension).
575 """
576
577 googlename = self.RepositoryName()
578 project, rest = os.path.split(googlename)
579 return (project,) + os.path.splitext(rest)
580
581 def BaseName(self):
582 """File base name - text after the final slash, before the final period."""
583 return self.Split()[1]
584
585 def Extension(self):
586 """File extension - text following the final period."""
587 return self.Split()[2]
588
589 def NoExtension(self):
590 """File has no source file extension."""
591 return '/'.join(self.Split()[0:2])
592
593 def IsSource(self):
594 """File has a source file extension."""
595 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
596
597
598def _ShouldPrintError(category, confidence):
599 """Returns true iff confidence >= verbose, and category passes filter."""
600 # There are two ways we might decide not to print an error message:
601 # the verbosity level isn't high enough, or the filters filter it out.
602 if confidence < _cpplint_state.verbose_level:
603 return False
604
605 is_filtered = False
606 for one_filter in _Filters():
607 if one_filter.startswith('-'):
608 if category.startswith(one_filter[1:]):
609 is_filtered = True
610 elif one_filter.startswith('+'):
611 if category.startswith(one_filter[1:]):
612 is_filtered = False
613 else:
614 assert False # should have been checked for in SetFilter.
615 if is_filtered:
616 return False
617
618 return True
619
620
621def Error(filename, linenum, category, confidence, message):
622 """Logs the fact we've found a lint error.
623
624 We log where the error was found, and also our confidence in the error,
625 that is, how certain we are this is a legitimate style regression, and
626 not a misidentification or a use that's sometimes justified.
627
628 Args:
629 filename: The name of the file containing the error.
630 linenum: The number of the line containing the error.
631 category: A string used to describe the "category" this bug
632 falls under: "whitespace", say, or "runtime". Categories
633 may have a hierarchy separated by slashes: "whitespace/indent".
634 confidence: A number from 1-5 representing a confidence score for
635 the error, with 5 meaning that we are certain of the problem,
636 and 1 meaning that it could be a legitimate construct.
637 message: The error message.
638 """
639 # There are two ways we might decide not to print an error message:
640 # the verbosity level isn't high enough, or the filters filter it out.
641 if _ShouldPrintError(category, confidence):
642 _cpplint_state.IncrementErrorCount()
643 if _cpplint_state.output_format == 'vs7':
644 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
645 filename, linenum, message, category, confidence))
646 else:
647 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
648 filename, linenum, message, category, confidence))
649
650
651# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
652_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
653 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
654# Matches strings. Escape codes should already be removed by ESCAPES.
655_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
656# Matches characters. Escape codes should already be removed by ESCAPES.
657_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
658# Matches multi-line C++ comments.
659# This RE is a little bit more complicated than one might expect, because we
660# have to take care of space removals tools so we can handle comments inside
661# statements better.
662# The current rule is: We only clear spaces from both sides when we're at the
663# end of the line. Otherwise, we try to remove spaces from the right side,
664# if this doesn't work we try on left side but only if there's a non-character
665# on the right.
666_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
667 r"""(\s*/\*.*\*/\s*$|
668 /\*.*\*/\s+|
669 \s+/\*.*\*/(?=\W)|
670 /\*.*\*/)""", re.VERBOSE)
671
672
673def IsCppString(line):
674 """Does line terminate so, that the next symbol is in string constant.
675
676 This function does not consider single-line nor multi-line comments.
677
678 Args:
679 line: is a partial line of code starting from the 0..n.
680
681 Returns:
682 True, if next character appended to 'line' is inside a
683 string constant.
684 """
685
686 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
687 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
688
689
690def FindNextMultiLineCommentStart(lines, lineix):
691 """Find the beginning marker for a multiline comment."""
692 while lineix < len(lines):
693 if lines[lineix].strip().startswith('/*'):
694 # Only return this marker if the comment goes beyond this line
695 if lines[lineix].strip().find('*/', 2) < 0:
696 return lineix
697 lineix += 1
698 return len(lines)
699
700
701def FindNextMultiLineCommentEnd(lines, lineix):
702 """We are inside a comment, find the end marker."""
703 while lineix < len(lines):
704 if lines[lineix].strip().endswith('*/'):
705 return lineix
706 lineix += 1
707 return len(lines)
708
709
710def RemoveMultiLineCommentsFromRange(lines, begin, end):
711 """Clears a range of lines for multi-line comments."""
712 # Having // dummy comments makes the lines non-empty, so we will not get
713 # unnecessary blank line warnings later in the code.
714 for i in range(begin, end):
715 lines[i] = '// dummy'
716
717
718def RemoveMultiLineComments(filename, lines, error):
719 """Removes multiline (c-style) comments from lines."""
720 lineix = 0
721 while lineix < len(lines):
722 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
723 if lineix_begin >= len(lines):
724 return
725 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
726 if lineix_end >= len(lines):
727 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
728 'Could not find end of multi-line comment')
729 return
730 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
731 lineix = lineix_end + 1
732
733
734def CleanseComments(line):
735 """Removes //-comments and single-line C-style /* */ comments.
736
737 Args:
738 line: A line of C++ source.
739
740 Returns:
741 The line with single-line comments removed.
742 """
743 commentpos = line.find('//')
744 if commentpos != -1 and not IsCppString(line[:commentpos]):
745 line = line[:commentpos]
746 # get rid of /* ... */
747 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
748
749
erg@google.coma87abb82009-02-24 01:41:01 +0000750class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000751 """Holds 3 copies of all lines with different preprocessing applied to them.
752
753 1) elided member contains lines without strings and comments,
754 2) lines member contains lines without comments, and
755 3) raw member contains all the lines without processing.
756 All these three members are of <type 'list'>, and of the same length.
757 """
758
759 def __init__(self, lines):
760 self.elided = []
761 self.lines = []
762 self.raw_lines = lines
763 self.num_lines = len(lines)
764 for linenum in range(len(lines)):
765 self.lines.append(CleanseComments(lines[linenum]))
766 elided = self._CollapseStrings(lines[linenum])
767 self.elided.append(CleanseComments(elided))
768
769 def NumLines(self):
770 """Returns the number of lines represented."""
771 return self.num_lines
772
773 @staticmethod
774 def _CollapseStrings(elided):
775 """Collapses strings and chars on a line to simple "" or '' blocks.
776
777 We nix strings first so we're not fooled by text like '"http://"'
778
779 Args:
780 elided: The line being processed.
781
782 Returns:
783 The line with collapsed strings.
784 """
785 if not _RE_PATTERN_INCLUDE.match(elided):
786 # Remove escaped characters first to make quote/single quote collapsing
787 # basic. Things that look like escaped characters shouldn't occur
788 # outside of strings and chars.
789 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
790 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
791 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
792 return elided
793
794
795def CloseExpression(clean_lines, linenum, pos):
796 """If input points to ( or { or [, finds the position that closes it.
797
798 If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
799 linenum/pos that correspond to the closing of the expression.
800
801 Args:
802 clean_lines: A CleansedLines instance containing the file.
803 linenum: The number of the line to check.
804 pos: A position on the line.
805
806 Returns:
807 A tuple (line, linenum, pos) pointer *past* the closing brace, or
808 (line, len(lines), -1) if we never find a close. Note we ignore
809 strings and comments when matching; and the line we return is the
810 'cleansed' line at linenum.
811 """
812
813 line = clean_lines.elided[linenum]
814 startchar = line[pos]
815 if startchar not in '({[':
816 return (line, clean_lines.NumLines(), -1)
817 if startchar == '(': endchar = ')'
818 if startchar == '[': endchar = ']'
819 if startchar == '{': endchar = '}'
820
821 num_open = line.count(startchar) - line.count(endchar)
822 while linenum < clean_lines.NumLines() and num_open > 0:
823 linenum += 1
824 line = clean_lines.elided[linenum]
825 num_open += line.count(startchar) - line.count(endchar)
826 # OK, now find the endchar that actually got us back to even
827 endpos = len(line)
828 while num_open >= 0:
829 endpos = line.rfind(')', 0, endpos)
830 num_open -= 1 # chopped off another )
831 return (line, linenum, endpos + 1)
832
833
834def CheckForCopyright(filename, lines, error):
835 """Logs an error if no Copyright message appears at the top of the file."""
836
837 # We'll say it should occur by line 10. Don't forget there's a
838 # dummy line at the front.
839 for line in xrange(1, min(len(lines), 11)):
840 if re.search(r'Copyright', lines[line], re.I): break
841 else: # means no copyright line was found
842 error(filename, 0, 'legal/copyright', 5,
843 'No copyright message found. '
844 'You should have a line: "Copyright [year] <Copyright Owner>"')
845
846
847def GetHeaderGuardCPPVariable(filename):
848 """Returns the CPP variable that should be used as a header guard.
849
850 Args:
851 filename: The name of a C++ header file.
852
853 Returns:
854 The CPP variable that should be used as a header guard in the
855 named file.
856
857 """
858
859 fileinfo = FileInfo(filename)
860 return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
861
862
863def CheckForHeaderGuard(filename, lines, error):
864 """Checks that the file contains a header guard.
865
erg@google.coma87abb82009-02-24 01:41:01 +0000866 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +0000867 headers, checks that the full pathname is used.
868
869 Args:
870 filename: The name of the C++ header file.
871 lines: An array of strings, each representing a line of the file.
872 error: The function to call with any errors found.
873 """
874
875 cppvar = GetHeaderGuardCPPVariable(filename)
876
877 ifndef = None
878 ifndef_linenum = 0
879 define = None
880 endif = None
881 endif_linenum = 0
882 for linenum, line in enumerate(lines):
883 linesplit = line.split()
884 if len(linesplit) >= 2:
885 # find the first occurrence of #ifndef and #define, save arg
886 if not ifndef and linesplit[0] == '#ifndef':
887 # set ifndef to the header guard presented on the #ifndef line.
888 ifndef = linesplit[1]
889 ifndef_linenum = linenum
890 if not define and linesplit[0] == '#define':
891 define = linesplit[1]
892 # find the last occurrence of #endif, save entire line
893 if line.startswith('#endif'):
894 endif = line
895 endif_linenum = linenum
896
897 if not ifndef or not define or ifndef != define:
898 error(filename, 0, 'build/header_guard', 5,
899 'No #ifndef header guard found, suggested CPP variable is: %s' %
900 cppvar)
901 return
902
903 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
904 # for backward compatibility.
905 if ifndef != cppvar:
906 error_level = 0
907 if ifndef != cppvar + '_':
908 error_level = 5
909
910 error(filename, ifndef_linenum, 'build/header_guard', error_level,
911 '#ifndef header guard has wrong style, please use: %s' % cppvar)
912
913 if endif != ('#endif // %s' % cppvar):
914 error_level = 0
915 if endif != ('#endif // %s' % (cppvar + '_')):
916 error_level = 5
917
918 error(filename, endif_linenum, 'build/header_guard', error_level,
919 '#endif line should be "#endif // %s"' % cppvar)
920
921
922def CheckForUnicodeReplacementCharacters(filename, lines, error):
923 """Logs an error for each line containing Unicode replacement characters.
924
925 These indicate that either the file contained invalid UTF-8 (likely)
926 or Unicode replacement characters (which it shouldn't). Note that
927 it's possible for this to throw off line numbering if the invalid
928 UTF-8 occurred adjacent to a newline.
929
930 Args:
931 filename: The name of the current file.
932 lines: An array of strings, each representing a line of the file.
933 error: The function to call with any errors found.
934 """
935 for linenum, line in enumerate(lines):
936 if u'\ufffd' in line:
937 error(filename, linenum, 'readability/utf8', 5,
938 'Line contains invalid UTF-8 (or Unicode replacement character).')
939
940
941def CheckForNewlineAtEOF(filename, lines, error):
942 """Logs an error if there is no newline char at the end of the file.
943
944 Args:
945 filename: The name of the current file.
946 lines: An array of strings, each representing a line of the file.
947 error: The function to call with any errors found.
948 """
949
950 # The array lines() was created by adding two newlines to the
951 # original file (go figure), then splitting on \n.
952 # To verify that the file ends in \n, we just have to make sure the
953 # last-but-two element of lines() exists and is empty.
954 if len(lines) < 3 or lines[-2]:
955 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
956 'Could not find a newline character at the end of the file.')
957
958
959def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
960 """Logs an error if we see /* ... */ or "..." that extend past one line.
961
962 /* ... */ comments are legit inside macros, for one line.
963 Otherwise, we prefer // comments, so it's ok to warn about the
964 other. Likewise, it's ok for strings to extend across multiple
965 lines, as long as a line continuation character (backslash)
966 terminates each line. Although not currently prohibited by the C++
967 style guide, it's ugly and unnecessary. We don't do well with either
968 in this lint program, so we warn about both.
969
970 Args:
971 filename: The name of the current file.
972 clean_lines: A CleansedLines instance containing the file.
973 linenum: The number of the line to check.
974 error: The function to call with any errors found.
975 """
976 line = clean_lines.elided[linenum]
977
978 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
979 # second (escaped) slash may trigger later \" detection erroneously.
980 line = line.replace('\\\\', '')
981
982 if line.count('/*') > line.count('*/'):
983 error(filename, linenum, 'readability/multiline_comment', 5,
984 'Complex multi-line /*...*/-style comment found. '
985 'Lint may give bogus warnings. '
986 'Consider replacing these with //-style comments, '
987 'with #if 0...#endif, '
988 'or with more clearly structured multi-line comments.')
989
990 if (line.count('"') - line.count('\\"')) % 2:
991 error(filename, linenum, 'readability/multiline_string', 5,
992 'Multi-line string ("...") found. This lint script doesn\'t '
993 'do well with such strings, and may give bogus warnings. They\'re '
994 'ugly and unnecessary, and you should use concatenation instead".')
995
996
997threading_list = (
998 ('asctime(', 'asctime_r('),
999 ('ctime(', 'ctime_r('),
1000 ('getgrgid(', 'getgrgid_r('),
1001 ('getgrnam(', 'getgrnam_r('),
1002 ('getlogin(', 'getlogin_r('),
1003 ('getpwnam(', 'getpwnam_r('),
1004 ('getpwuid(', 'getpwuid_r('),
1005 ('gmtime(', 'gmtime_r('),
1006 ('localtime(', 'localtime_r('),
1007 ('rand(', 'rand_r('),
1008 ('readdir(', 'readdir_r('),
1009 ('strtok(', 'strtok_r('),
1010 ('ttyname(', 'ttyname_r('),
1011 )
1012
1013
1014def CheckPosixThreading(filename, clean_lines, linenum, error):
1015 """Checks for calls to thread-unsafe functions.
1016
1017 Much code has been originally written without consideration of
1018 multi-threading. Also, engineers are relying on their old experience;
1019 they have learned posix before threading extensions were added. These
1020 tests guide the engineers to use thread-safe functions (when using
1021 posix directly).
1022
1023 Args:
1024 filename: The name of the current file.
1025 clean_lines: A CleansedLines instance containing the file.
1026 linenum: The number of the line to check.
1027 error: The function to call with any errors found.
1028 """
1029 line = clean_lines.elided[linenum]
1030 for single_thread_function, multithread_safe_function in threading_list:
1031 ix = line.find(single_thread_function)
erg@google.coma87abb82009-02-24 01:41:01 +00001032 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001033 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1034 line[ix - 1] not in ('_', '.', '>'))):
1035 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1036 'Consider using ' + multithread_safe_function +
1037 '...) instead of ' + single_thread_function +
1038 '...) for improved thread safety.')
1039
1040
1041class _ClassInfo(object):
1042 """Stores information about a class."""
1043
1044 def __init__(self, name, linenum):
1045 self.name = name
1046 self.linenum = linenum
1047 self.seen_open_brace = False
1048 self.is_derived = False
1049 self.virtual_method_linenumber = None
1050 self.has_virtual_destructor = False
1051 self.brace_depth = 0
1052
1053
1054class _ClassState(object):
1055 """Holds the current state of the parse relating to class declarations.
1056
1057 It maintains a stack of _ClassInfos representing the parser's guess
1058 as to the current nesting of class declarations. The innermost class
1059 is at the top (back) of the stack. Typically, the stack will either
1060 be empty or have exactly one entry.
1061 """
1062
1063 def __init__(self):
1064 self.classinfo_stack = []
1065
1066 def CheckFinished(self, filename, error):
1067 """Checks that all classes have been completely parsed.
1068
1069 Call this when all lines in a file have been processed.
1070 Args:
1071 filename: The name of the current file.
1072 error: The function to call with any errors found.
1073 """
1074 if self.classinfo_stack:
1075 # Note: This test can result in false positives if #ifdef constructs
1076 # get in the way of brace matching. See the testBuildClass test in
1077 # cpplint_unittest.py for an example of this.
1078 error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
1079 'Failed to find complete declaration of class %s' %
1080 self.classinfo_stack[0].name)
1081
1082
1083def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1084 class_state, error):
1085 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1086
1087 Complain about several constructs which gcc-2 accepts, but which are
1088 not standard C++. Warning about these in lint is one way to ease the
1089 transition to new compilers.
1090 - put storage class first (e.g. "static const" instead of "const static").
1091 - "%lld" instead of %qd" in printf-type functions.
1092 - "%1$d" is non-standard in printf-type functions.
1093 - "\%" is an undefined character escape sequence.
1094 - text after #endif is not allowed.
1095 - invalid inner-style forward declaration.
1096 - >? and <? operators, and their >?= and <?= cousins.
1097 - classes with virtual methods need virtual destructors (compiler warning
1098 available, but not turned on yet.)
1099
1100 Additionally, check for constructor/destructor style violations as it
1101 is very convenient to do so while checking for gcc-2 compliance.
1102
1103 Args:
1104 filename: The name of the current file.
1105 clean_lines: A CleansedLines instance containing the file.
1106 linenum: The number of the line to check.
1107 class_state: A _ClassState instance which maintains information about
1108 the current stack of nested class declarations being parsed.
1109 error: A callable to which errors are reported, which takes 4 arguments:
1110 filename, line number, error level, and message
1111 """
1112
1113 # Remove comments from the line, but leave in strings for now.
1114 line = clean_lines.lines[linenum]
1115
1116 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1117 error(filename, linenum, 'runtime/printf_format', 3,
1118 '%q in format strings is deprecated. Use %ll instead.')
1119
1120 if Search(r'printf\s*\(.*".*%\d+\$', line):
1121 error(filename, linenum, 'runtime/printf_format', 2,
1122 '%N$ formats are unconventional. Try rewriting to avoid them.')
1123
1124 # Remove escaped backslashes before looking for undefined escapes.
1125 line = line.replace('\\\\', '')
1126
1127 if Search(r'("|\').*\\(%|\[|\(|{)', line):
1128 error(filename, linenum, 'build/printf_format', 3,
1129 '%, [, (, and { are undefined character escapes. Unescape them.')
1130
1131 # For the rest, work with both comments and strings removed.
1132 line = clean_lines.elided[linenum]
1133
1134 if Search(r'\b(const|volatile|void|char|short|int|long'
1135 r'|float|double|signed|unsigned'
1136 r'|schar|u?int8|u?int16|u?int32|u?int64)'
1137 r'\s+(auto|register|static|extern|typedef)\b',
1138 line):
1139 error(filename, linenum, 'build/storage_class', 5,
1140 'Storage class (static, extern, typedef, etc) should be first.')
1141
1142 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1143 error(filename, linenum, 'build/endif_comment', 5,
1144 'Uncommented text after #endif is non-standard. Use a comment.')
1145
1146 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1147 error(filename, linenum, 'build/forward_decl', 5,
1148 'Inner-style forward declarations are invalid. Remove this line.')
1149
1150 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1151 line):
1152 error(filename, linenum, 'build/deprecated', 3,
1153 '>? and <? (max and min) operators are non-standard and deprecated.')
1154
1155 # Track class entry and exit, and attempt to find cases within the
1156 # class declaration that don't meet the C++ style
1157 # guidelines. Tracking is very dependent on the code matching Google
1158 # style guidelines, but it seems to perform well enough in testing
1159 # to be a worthwhile addition to the checks.
1160 classinfo_stack = class_state.classinfo_stack
1161 # Look for a class declaration
1162 class_decl_match = Match(
1163 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1164 if class_decl_match:
1165 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
1166
1167 # Everything else in this function uses the top of the stack if it's
1168 # not empty.
1169 if not classinfo_stack:
1170 return
1171
1172 classinfo = classinfo_stack[-1]
1173
1174 # If the opening brace hasn't been seen look for it and also
1175 # parent class declarations.
1176 if not classinfo.seen_open_brace:
1177 # If the line has a ';' in it, assume it's a forward declaration or
1178 # a single-line class declaration, which we won't process.
1179 if line.find(';') != -1:
1180 classinfo_stack.pop()
1181 return
1182 classinfo.seen_open_brace = (line.find('{') != -1)
1183 # Look for a bare ':'
1184 if Search('(^|[^:]):($|[^:])', line):
1185 classinfo.is_derived = True
1186 if not classinfo.seen_open_brace:
1187 return # Everything else in this function is for after open brace
1188
1189 # The class may have been declared with namespace or classname qualifiers.
1190 # The constructor and destructor will not have those qualifiers.
1191 base_classname = classinfo.name.split('::')[-1]
1192
1193 # Look for single-argument constructors that aren't marked explicit.
1194 # Technically a valid construct, but against style.
1195 args = Match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1196 % re.escape(base_classname),
1197 line)
1198 if (args and
1199 args.group(1) != 'void' and
1200 not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1201 args.group(1).strip())):
1202 error(filename, linenum, 'runtime/explicit', 5,
1203 'Single-argument constructors should be marked explicit.')
1204
1205 # Look for methods declared virtual.
1206 if Search(r'\bvirtual\b', line):
1207 classinfo.virtual_method_linenumber = linenum
1208 # Only look for a destructor declaration on the same line. It would
1209 # be extremely unlikely for the destructor declaration to occupy
1210 # more than one line.
1211 if Search(r'~%s\s*\(' % base_classname, line):
1212 classinfo.has_virtual_destructor = True
1213
1214 # Look for class end.
1215 brace_depth = classinfo.brace_depth
1216 brace_depth = brace_depth + line.count('{') - line.count('}')
1217 if brace_depth <= 0:
1218 classinfo = classinfo_stack.pop()
1219 # Try to detect missing virtual destructor declarations.
1220 # For now, only warn if a non-derived class with virtual methods lacks
1221 # a virtual destructor. This is to make it less likely that people will
1222 # declare derived virtual destructors without declaring the base
1223 # destructor virtual.
1224 if ((classinfo.virtual_method_linenumber is not None) and
1225 (not classinfo.has_virtual_destructor) and
1226 (not classinfo.is_derived)): # Only warn for base classes
1227 error(filename, classinfo.linenum, 'runtime/virtual', 4,
1228 'The class %s probably needs a virtual destructor due to '
1229 'having virtual method(s), one declared at line %d.'
1230 % (classinfo.name, classinfo.virtual_method_linenumber))
1231 else:
1232 classinfo.brace_depth = brace_depth
1233
1234
1235def CheckSpacingForFunctionCall(filename, line, linenum, error):
1236 """Checks for the correctness of various spacing around function calls.
1237
1238 Args:
1239 filename: The name of the current file.
1240 line: The text of the line to check.
1241 linenum: The number of the line to check.
1242 error: The function to call with any errors found.
1243 """
1244
1245 # Since function calls often occur inside if/for/while/switch
1246 # expressions - which have their own, more liberal conventions - we
1247 # first see if we should be looking inside such an expression for a
1248 # function call, to which we can apply more strict standards.
1249 fncall = line # if there's no control flow construct, look at whole line
1250 for pattern in (r'\bif\s*\((.*)\)\s*{',
1251 r'\bfor\s*\((.*)\)\s*{',
1252 r'\bwhile\s*\((.*)\)\s*[{;]',
1253 r'\bswitch\s*\((.*)\)\s*{'):
1254 match = Search(pattern, line)
1255 if match:
1256 fncall = match.group(1) # look inside the parens for function calls
1257 break
1258
1259 # Except in if/for/while/switch, there should never be space
1260 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
1261 # for nested parens ( (a+b) + c ). Likewise, there should never be
1262 # a space before a ( when it's a function argument. I assume it's a
1263 # function argument when the char before the whitespace is legal in
1264 # a function name (alnum + _) and we're not starting a macro. Also ignore
1265 # pointers and references to arrays and functions coz they're too tricky:
1266 # we use a very simple way to recognize these:
1267 # " (something)(maybe-something)" or
1268 # " (something)(maybe-something," or
1269 # " (something)[something]"
1270 # Note that we assume the contents of [] to be short enough that
1271 # they'll never need to wrap.
1272 if ( # Ignore control structures.
1273 not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1274 # Ignore pointers/references to functions.
1275 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1276 # Ignore pointers/references to arrays.
1277 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
1278 if Search(r'\w\s*\(\s', fncall): # a ( used for a fn call
1279 error(filename, linenum, 'whitespace/parens', 4,
1280 'Extra space after ( in function call')
1281 elif Search(r'\(\s+[^(]', fncall):
1282 error(filename, linenum, 'whitespace/parens', 2,
1283 'Extra space after (')
1284 if (Search(r'\w\s+\(', fncall) and
1285 not Search(r'#\s*define|typedef', fncall)):
1286 error(filename, linenum, 'whitespace/parens', 4,
1287 'Extra space before ( in function call')
1288 # If the ) is followed only by a newline or a { + newline, assume it's
1289 # part of a control statement (if/while/etc), and don't complain
1290 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1291 error(filename, linenum, 'whitespace/parens', 2,
1292 'Extra space before )')
1293
1294
1295def IsBlankLine(line):
1296 """Returns true if the given line is blank.
1297
1298 We consider a line to be blank if the line is empty or consists of
1299 only white spaces.
1300
1301 Args:
1302 line: A line of a string.
1303
1304 Returns:
1305 True, if the given line is blank.
1306 """
1307 return not line or line.isspace()
1308
1309
1310def CheckForFunctionLengths(filename, clean_lines, linenum,
1311 function_state, error):
1312 """Reports for long function bodies.
1313
1314 For an overview why this is done, see:
1315 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1316
1317 Uses a simplistic algorithm assuming other style guidelines
1318 (especially spacing) are followed.
1319 Only checks unindented functions, so class members are unchecked.
1320 Trivial bodies are unchecked, so constructors with huge initializer lists
1321 may be missed.
1322 Blank/comment lines are not counted so as to avoid encouraging the removal
1323 of vertical space and commments just to get through a lint check.
1324 NOLINT *on the last line of a function* disables this check.
1325
1326 Args:
1327 filename: The name of the current file.
1328 clean_lines: A CleansedLines instance containing the file.
1329 linenum: The number of the line to check.
1330 function_state: Current function name and lines in body so far.
1331 error: The function to call with any errors found.
1332 """
1333 lines = clean_lines.lines
1334 line = lines[linenum]
1335 raw = clean_lines.raw_lines
1336 raw_line = raw[linenum]
1337 joined_line = ''
1338
1339 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00001340 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00001341 match_result = Match(regexp, line)
1342 if match_result:
1343 # If the name is all caps and underscores, figure it's a macro and
1344 # ignore it, unless it's TEST or TEST_F.
1345 function_name = match_result.group(1).split()[-1]
1346 if function_name == 'TEST' or function_name == 'TEST_F' or (
1347 not Match(r'[A-Z_]+$', function_name)):
1348 starting_func = True
1349
1350 if starting_func:
1351 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00001352 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001353 start_line = lines[start_linenum]
1354 joined_line += ' ' + start_line.lstrip()
1355 if Search(r'(;|})', start_line): # Declarations and trivial functions
1356 body_found = True
1357 break # ... ignore
1358 elif Search(r'{', start_line):
1359 body_found = True
1360 function = Search(r'((\w|:)*)\(', line).group(1)
1361 if Match(r'TEST', function): # Handle TEST... macros
1362 parameter_regexp = Search(r'(\(.*\))', joined_line)
1363 if parameter_regexp: # Ignore bad syntax
1364 function += parameter_regexp.group(1)
1365 else:
1366 function += '()'
1367 function_state.Begin(function)
1368 break
1369 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00001370 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001371 error(filename, linenum, 'readability/fn_size', 5,
1372 'Lint failed to find start of function body.')
1373 elif Match(r'^\}\s*$', line): # function end
1374 if not Search(r'\bNOLINT\b', raw_line):
1375 function_state.Check(error, filename, linenum)
1376 function_state.End()
1377 elif not Match(r'^\s*$', line):
1378 function_state.Count() # Count non-blank/non-comment lines.
1379
1380
1381_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1382
1383
1384def CheckComment(comment, filename, linenum, error):
1385 """Checks for common mistakes in TODO comments.
1386
1387 Args:
1388 comment: The text of the comment from the line in question.
1389 filename: The name of the current file.
1390 linenum: The number of the line to check.
1391 error: The function to call with any errors found.
1392 """
1393 match = _RE_PATTERN_TODO.match(comment)
1394 if match:
1395 # One whitespace is correct; zero whitespace is handled elsewhere.
1396 leading_whitespace = match.group(1)
1397 if len(leading_whitespace) > 1:
1398 error(filename, linenum, 'whitespace/todo', 2,
1399 'Too many spaces before TODO')
1400
1401 username = match.group(2)
1402 if not username:
1403 error(filename, linenum, 'readability/todo', 2,
1404 'Missing username in TODO; it should look like '
1405 '"// TODO(my_username): Stuff."')
1406
1407 middle_whitespace = match.group(3)
erg@google.coma87abb82009-02-24 01:41:01 +00001408 # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001409 if middle_whitespace != ' ' and middle_whitespace != '':
1410 error(filename, linenum, 'whitespace/todo', 2,
1411 'TODO(my_username) should be followed by a space')
1412
1413
1414def CheckSpacing(filename, clean_lines, linenum, error):
1415 """Checks for the correctness of various spacing issues in the code.
1416
1417 Things we check for: spaces around operators, spaces after
1418 if/for/while/switch, no spaces around parens in function calls, two
1419 spaces between code and comment, don't start a block with a blank
1420 line, don't end a function with a blank line, don't have too many
1421 blank lines in a row.
1422
1423 Args:
1424 filename: The name of the current file.
1425 clean_lines: A CleansedLines instance containing the file.
1426 linenum: The number of the line to check.
1427 error: The function to call with any errors found.
1428 """
1429
1430 raw = clean_lines.raw_lines
1431 line = raw[linenum]
1432
1433 # Before nixing comments, check if the line is blank for no good
1434 # reason. This includes the first line after a block is opened, and
1435 # blank lines at the end of a function (ie, right before a line like '}'
1436 if IsBlankLine(line):
1437 elided = clean_lines.elided
1438 prev_line = elided[linenum - 1]
1439 prevbrace = prev_line.rfind('{')
1440 # TODO(unknown): Don't complain if line before blank line, and line after,
1441 # both start with alnums and are indented the same amount.
1442 # This ignores whitespace at the start of a namespace block
1443 # because those are not usually indented.
1444 if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
1445 and prev_line[:prevbrace].find('namespace') == -1):
1446 # OK, we have a blank line at the start of a code block. Before we
1447 # complain, we check if it is an exception to the rule: The previous
1448 # non-empty line has the paramters of a function header that are indented
1449 # 4 spaces (because they did not fit in a 80 column line when placed on
1450 # the same line as the function name). We also check for the case where
1451 # the previous line is indented 6 spaces, which may happen when the
1452 # initializers of a constructor do not fit into a 80 column line.
1453 exception = False
1454 if Match(r' {6}\w', prev_line): # Initializer list?
1455 # We are looking for the opening column of initializer list, which
1456 # should be indented 4 spaces to cause 6 space indentation afterwards.
1457 search_position = linenum-2
1458 while (search_position >= 0
1459 and Match(r' {6}\w', elided[search_position])):
1460 search_position -= 1
1461 exception = (search_position >= 0
1462 and elided[search_position][:5] == ' :')
1463 else:
1464 # Search for the function arguments or an initializer list. We use a
1465 # simple heuristic here: If the line is indented 4 spaces; and we have a
1466 # closing paren, without the opening paren, followed by an opening brace
1467 # or colon (for initializer lists) we assume that it is the last line of
1468 # a function header. If we have a colon indented 4 spaces, it is an
1469 # initializer list.
1470 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1471 prev_line)
1472 or Match(r' {4}:', prev_line))
1473
1474 if not exception:
1475 error(filename, linenum, 'whitespace/blank_line', 2,
1476 'Blank line at the start of a code block. Is this needed?')
1477 # This doesn't ignore whitespace at the end of a namespace block
1478 # because that is too hard without pairing open/close braces;
1479 # however, a special exception is made for namespace closing
1480 # brackets which have a comment containing "namespace".
1481 #
1482 # Also, ignore blank lines at the end of a block in a long if-else
1483 # chain, like this:
1484 # if (condition1) {
1485 # // Something followed by a blank line
1486 #
1487 # } else if (condition2) {
1488 # // Something else
1489 # }
1490 if linenum + 1 < clean_lines.NumLines():
1491 next_line = raw[linenum + 1]
1492 if (next_line
1493 and Match(r'\s*}', next_line)
1494 and next_line.find('namespace') == -1
1495 and next_line.find('} else ') == -1):
1496 error(filename, linenum, 'whitespace/blank_line', 3,
1497 'Blank line at the end of a code block. Is this needed?')
1498
1499 # Next, we complain if there's a comment too near the text
1500 commentpos = line.find('//')
1501 if commentpos != -1:
1502 # Check if the // may be in quotes. If so, ignore it
erg@google.coma87abb82009-02-24 01:41:01 +00001503 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001504 if (line.count('"', 0, commentpos) -
1505 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
1506 # Allow one space for new scopes, two spaces otherwise:
1507 if (not Match(r'^\s*{ //', line) and
1508 ((commentpos >= 1 and
1509 line[commentpos-1] not in string.whitespace) or
1510 (commentpos >= 2 and
1511 line[commentpos-2] not in string.whitespace))):
1512 error(filename, linenum, 'whitespace/comments', 2,
1513 'At least two spaces is best between code and comments')
1514 # There should always be a space between the // and the comment
1515 commentend = commentpos + 2
1516 if commentend < len(line) and not line[commentend] == ' ':
1517 # but some lines are exceptions -- e.g. if they're big
1518 # comment delimiters like:
1519 # //----------------------------------------------------------
1520 match = Search(r'[=/-]{4,}\s*$', line[commentend:])
1521 if not match:
1522 error(filename, linenum, 'whitespace/comments', 4,
1523 'Should have a space between // and comment')
1524 CheckComment(line[commentpos:], filename, linenum, error)
1525
1526 line = clean_lines.elided[linenum] # get rid of comments and strings
1527
1528 # Don't try to do spacing checks for operator methods
1529 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1530
1531 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1532 # Otherwise not. Note we only check for non-spaces on *both* sides;
1533 # sometimes people put non-spaces on one side when aligning ='s among
1534 # many lines (not that this is behavior that I approve of...)
1535 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
1536 error(filename, linenum, 'whitespace/operators', 4,
1537 'Missing spaces around =')
1538
1539 # It's ok not to have spaces around binary operators like + - * /, but if
1540 # there's too little whitespace, we get concerned. It's hard to tell,
1541 # though, so we punt on this one for now. TODO.
1542
1543 # You should always have whitespace around binary operators.
1544 # Alas, we can't test < or > because they're legitimately used sans spaces
1545 # (a->b, vector<int> a). The only time we can tell is a < with no >, and
1546 # only if it's not template params list spilling into the next line.
1547 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1548 if not match:
1549 # Note that while it seems that the '<[^<]*' term in the following
1550 # regexp could be simplified to '<.*', which would indeed match
1551 # the same class of strings, the [^<] means that searching for the
1552 # regexp takes linear rather than quadratic time.
1553 if not Search(r'<[^<]*,\s*$', line): # template params spill
1554 match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1555 if match:
1556 error(filename, linenum, 'whitespace/operators', 3,
1557 'Missing spaces around %s' % match.group(1))
1558 # We allow no-spaces around << and >> when used like this: 10<<20, but
1559 # not otherwise (particularly, not when used as streams)
1560 match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1561 if match:
1562 error(filename, linenum, 'whitespace/operators', 3,
1563 'Missing spaces around %s' % match.group(1))
1564
1565 # There shouldn't be space around unary operators
1566 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1567 if match:
1568 error(filename, linenum, 'whitespace/operators', 4,
1569 'Extra space for operator %s' % match.group(1))
1570
1571 # A pet peeve of mine: no spaces after an if, while, switch, or for
1572 match = Search(r' (if\(|for\(|while\(|switch\()', line)
1573 if match:
1574 error(filename, linenum, 'whitespace/parens', 5,
1575 'Missing space before ( in %s' % match.group(1))
1576
1577 # For if/for/while/switch, the left and right parens should be
1578 # consistent about how many spaces are inside the parens, and
1579 # there should either be zero or one spaces inside the parens.
1580 # We don't want: "if ( foo)" or "if ( foo )".
1581 # Exception: "for ( ; foo; bar)" is allowed.
1582 match = Search(r'\b(if|for|while|switch)\s*'
1583 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1584 line)
1585 if match:
1586 if len(match.group(2)) != len(match.group(4)):
1587 if not (match.group(3) == ';' and
1588 len(match.group(2)) == 1 + len(match.group(4))):
1589 error(filename, linenum, 'whitespace/parens', 5,
1590 'Mismatching spaces inside () in %s' % match.group(1))
1591 if not len(match.group(2)) in [0, 1]:
1592 error(filename, linenum, 'whitespace/parens', 5,
1593 'Should have zero or one spaces inside ( and ) in %s' %
1594 match.group(1))
1595
1596 # You should always have a space after a comma (either as fn arg or operator)
1597 if Search(r',[^\s]', line):
1598 error(filename, linenum, 'whitespace/comma', 3,
1599 'Missing space after ,')
1600
1601 # Next we will look for issues with function calls.
1602 CheckSpacingForFunctionCall(filename, line, linenum, error)
1603
1604 # Except after an opening paren, you should have spaces before your braces.
1605 # And since you should never have braces at the beginning of a line, this is
1606 # an easy test.
1607 if Search(r'[^ (]{', line):
1608 error(filename, linenum, 'whitespace/braces', 5,
1609 'Missing space before {')
1610
1611 # Make sure '} else {' has spaces.
1612 if Search(r'}else', line):
1613 error(filename, linenum, 'whitespace/braces', 5,
1614 'Missing space before else')
1615
1616 # You shouldn't have spaces before your brackets, except maybe after
1617 # 'delete []' or 'new char * []'.
1618 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
1619 error(filename, linenum, 'whitespace/braces', 5,
1620 'Extra space before [')
1621
1622 # You shouldn't have a space before a semicolon at the end of the line.
1623 # There's a special case for "for" since the style guide allows space before
1624 # the semicolon there.
1625 if Search(r':\s*;\s*$', line):
1626 error(filename, linenum, 'whitespace/semicolon', 5,
1627 'Semicolon defining empty statement. Use { } instead.')
1628 elif Search(r'^\s*;\s*$', line):
1629 error(filename, linenum, 'whitespace/semicolon', 5,
1630 'Line contains only semicolon. If this should be an empty statement, '
1631 'use { } instead.')
1632 elif (Search(r'\s+;\s*$', line) and
1633 not Search(r'\bfor\b', line)):
1634 error(filename, linenum, 'whitespace/semicolon', 5,
1635 'Extra space before last semicolon. If this should be an empty '
1636 'statement, use { } instead.')
1637
1638
1639def GetPreviousNonBlankLine(clean_lines, linenum):
1640 """Return the most recent non-blank line and its line number.
1641
1642 Args:
1643 clean_lines: A CleansedLines instance containing the file contents.
1644 linenum: The number of the line to check.
1645
1646 Returns:
1647 A tuple with two elements. The first element is the contents of the last
1648 non-blank line before the current line, or the empty string if this is the
1649 first non-blank line. The second is the line number of that line, or -1
1650 if this is the first non-blank line.
1651 """
1652
1653 prevlinenum = linenum - 1
1654 while prevlinenum >= 0:
1655 prevline = clean_lines.elided[prevlinenum]
1656 if not IsBlankLine(prevline): # if not a blank line...
1657 return (prevline, prevlinenum)
1658 prevlinenum -= 1
1659 return ('', -1)
1660
1661
1662def CheckBraces(filename, clean_lines, linenum, error):
1663 """Looks for misplaced braces (e.g. at the end of line).
1664
1665 Args:
1666 filename: The name of the current file.
1667 clean_lines: A CleansedLines instance containing the file.
1668 linenum: The number of the line to check.
1669 error: The function to call with any errors found.
1670 """
1671
1672 line = clean_lines.elided[linenum] # get rid of comments and strings
1673
1674 if Match(r'\s*{\s*$', line):
1675 # We allow an open brace to start a line in the case where someone
1676 # is using braces in a block to explicitly create a new scope,
1677 # which is commonly used to control the lifetime of
1678 # stack-allocated variables. We don't detect this perfectly: we
1679 # just don't complain if the last non-whitespace character on the
1680 # previous non-blank line is ';', ':', '{', or '}'.
1681 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1682 if not Search(r'[;:}{]\s*$', prevline):
1683 error(filename, linenum, 'whitespace/braces', 4,
1684 '{ should almost always be at the end of the previous line')
1685
1686 # An else clause should be on the same line as the preceding closing brace.
1687 if Match(r'\s*else\s*', line):
1688 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1689 if Match(r'\s*}\s*$', prevline):
1690 error(filename, linenum, 'whitespace/newline', 4,
1691 'An else should appear on the same line as the preceding }')
1692
1693 # If braces come on one side of an else, they should be on both.
1694 # However, we have to worry about "else if" that spans multiple lines!
1695 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
1696 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
1697 # find the ( after the if
1698 pos = line.find('else if')
1699 pos = line.find('(', pos)
1700 if pos > 0:
1701 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
1702 if endline[endpos:].find('{') == -1: # must be brace after if
1703 error(filename, linenum, 'readability/braces', 5,
1704 'If an else has a brace on one side, it should have it on both')
1705 else: # common case: else not followed by a multi-line if
1706 error(filename, linenum, 'readability/braces', 5,
1707 'If an else has a brace on one side, it should have it on both')
1708
1709 # Likewise, an else should never have the else clause on the same line
1710 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
1711 error(filename, linenum, 'whitespace/newline', 4,
1712 'Else clause should never be on same line as else (use 2 lines)')
1713
1714 # In the same way, a do/while should never be on one line
1715 if Match(r'\s*do [^\s{]', line):
1716 error(filename, linenum, 'whitespace/newline', 4,
1717 'do/while clauses should not be on a single line')
1718
1719 # Braces shouldn't be followed by a ; unless they're defining a struct
1720 # or initializing an array.
1721 # We can't tell in general, but we can for some common cases.
1722 prevlinenum = linenum
1723 while True:
1724 (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
1725 if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
1726 line = prevline + line
1727 else:
1728 break
1729 if (Search(r'{.*}\s*;', line) and
1730 line.count('{') == line.count('}') and
1731 not Search(r'struct|class|enum|\s*=\s*{', line)):
1732 error(filename, linenum, 'readability/braces', 4,
1733 "You don't need a ; after a }")
1734
1735
1736def ReplaceableCheck(operator, macro, line):
1737 """Determine whether a basic CHECK can be replaced with a more specific one.
1738
1739 For example suggest using CHECK_EQ instead of CHECK(a == b) and
1740 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1741
1742 Args:
1743 operator: The C++ operator used in the CHECK.
1744 macro: The CHECK or EXPECT macro being called.
1745 line: The current source line.
1746
1747 Returns:
1748 True if the CHECK can be replaced with a more specific one.
1749 """
1750
1751 # This matches decimal and hex integers, strings, and chars (in that order).
1752 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
1753
1754 # Expression to match two sides of the operator with something that
1755 # looks like a literal, since CHECK(x == iterator) won't compile.
1756 # This means we can't catch all the cases where a more specific
1757 # CHECK is possible, but it's less annoying than dealing with
1758 # extraneous warnings.
1759 match_this = (r'\s*' + macro + r'\((\s*' +
1760 match_constant + r'\s*' + operator + r'[^<>].*|'
1761 r'.*[^<>]' + operator + r'\s*' + match_constant +
1762 r'\s*\))')
1763
1764 # Don't complain about CHECK(x == NULL) or similar because
1765 # CHECK_EQ(x, NULL) won't compile (requires a cast).
1766 # Also, don't complain about more complex boolean expressions
1767 # involving && or || such as CHECK(a == b || c == d).
1768 return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
1769
1770
1771def CheckCheck(filename, clean_lines, linenum, error):
1772 """Checks the use of CHECK and EXPECT macros.
1773
1774 Args:
1775 filename: The name of the current file.
1776 clean_lines: A CleansedLines instance containing the file.
1777 linenum: The number of the line to check.
1778 error: The function to call with any errors found.
1779 """
1780
1781 # Decide the set of replacement macros that should be suggested
1782 raw_lines = clean_lines.raw_lines
1783 current_macro = ''
1784 for macro in _CHECK_MACROS:
1785 if raw_lines[linenum].find(macro) >= 0:
1786 current_macro = macro
1787 break
1788 if not current_macro:
1789 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
1790 return
1791
1792 line = clean_lines.elided[linenum] # get rid of comments and strings
1793
1794 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
1795 for operator in ['==', '!=', '>=', '>', '<=', '<']:
1796 if ReplaceableCheck(operator, current_macro, line):
1797 error(filename, linenum, 'readability/check', 2,
1798 'Consider using %s instead of %s(a %s b)' % (
1799 _CHECK_REPLACEMENT[current_macro][operator],
1800 current_macro, operator))
1801 break
1802
1803
1804def GetLineWidth(line):
1805 """Determines the width of the line in column positions.
1806
1807 Args:
1808 line: A string, which may be a Unicode string.
1809
1810 Returns:
1811 The width of the line in column positions, accounting for Unicode
1812 combining characters and wide characters.
1813 """
1814 if isinstance(line, unicode):
1815 width = 0
1816 for c in unicodedata.normalize('NFC', line):
1817 if unicodedata.east_asian_width(c) in ('W', 'F'):
1818 width += 2
1819 elif not unicodedata.combining(c):
1820 width += 1
1821 return width
1822 else:
1823 return len(line)
1824
1825
1826def CheckStyle(filename, clean_lines, linenum, file_extension, error):
1827 """Checks rules from the 'C++ style rules' section of cppguide.html.
1828
1829 Most of these rules are hard to test (naming, comment style), but we
1830 do what we can. In particular we check for 2-space indents, line lengths,
1831 tab usage, spaces inside code, etc.
1832
1833 Args:
1834 filename: The name of the current file.
1835 clean_lines: A CleansedLines instance containing the file.
1836 linenum: The number of the line to check.
1837 file_extension: The extension (without the dot) of the filename.
1838 error: The function to call with any errors found.
1839 """
1840
1841 raw_lines = clean_lines.raw_lines
1842 line = raw_lines[linenum]
1843
1844 if line.find('\t') != -1:
1845 error(filename, linenum, 'whitespace/tab', 1,
1846 'Tab found; better to use spaces')
1847
1848 # One or three blank spaces at the beginning of the line is weird; it's
1849 # hard to reconcile that with 2-space indents.
1850 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
1851 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
1852 # if(RLENGTH > 20) complain = 0;
1853 # if(match($0, " +(error|private|public|protected):")) complain = 0;
1854 # if(match(prev, "&& *$")) complain = 0;
1855 # if(match(prev, "\\|\\| *$")) complain = 0;
1856 # if(match(prev, "[\",=><] *$")) complain = 0;
1857 # if(match($0, " <<")) complain = 0;
1858 # if(match(prev, " +for \\(")) complain = 0;
1859 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
1860 initial_spaces = 0
1861 cleansed_line = clean_lines.elided[linenum]
1862 while initial_spaces < len(line) and line[initial_spaces] == ' ':
1863 initial_spaces += 1
1864 if line and line[-1].isspace():
1865 error(filename, linenum, 'whitespace/end_of_line', 4,
1866 'Line ends in whitespace. Consider deleting these extra spaces.')
1867 # There are certain situations we allow one space, notably for labels
1868 elif ((initial_spaces == 1 or initial_spaces == 3) and
1869 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
1870 error(filename, linenum, 'whitespace/indent', 3,
1871 'Weird number of spaces at line-start. '
1872 'Are you using a 2-space indent?')
1873 # Labels should always be indented at least one space.
1874 elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
1875 line):
1876 error(filename, linenum, 'whitespace/labels', 4,
1877 'Labels should always be indented at least one space. '
1878 'If this is a member-initializer list in a constructor, '
1879 'the colon should be on the line after the definition header.')
1880
1881 # Check if the line is a header guard.
1882 is_header_guard = False
1883 if file_extension == 'h':
1884 cppvar = GetHeaderGuardCPPVariable(filename)
1885 if (line.startswith('#ifndef %s' % cppvar) or
1886 line.startswith('#define %s' % cppvar) or
1887 line.startswith('#endif // %s' % cppvar)):
1888 is_header_guard = True
1889 # #include lines and header guards can be long, since there's no clean way to
1890 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00001891 #
1892 # URLs can be long too. It's possible to split these, but it makes them
1893 # harder to cut&paste.
1894 if (not line.startswith('#include') and not is_header_guard and
1895 not Match(r'^\s*//\s*http(s?)://\S*$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001896 line_width = GetLineWidth(line)
1897 if line_width > 100:
1898 error(filename, linenum, 'whitespace/line_length', 4,
1899 'Lines should very rarely be longer than 100 characters')
1900 elif line_width > 80:
1901 error(filename, linenum, 'whitespace/line_length', 2,
1902 'Lines should be <= 80 characters long')
1903
1904 if (cleansed_line.count(';') > 1 and
1905 # for loops are allowed two ;'s (and may run over two lines).
1906 cleansed_line.find('for') == -1 and
1907 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
1908 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
1909 # It's ok to have many commands in a switch case that fits in 1 line
1910 not ((cleansed_line.find('case ') != -1 or
1911 cleansed_line.find('default:') != -1) and
1912 cleansed_line.find('break;') != -1)):
1913 error(filename, linenum, 'whitespace/newline', 4,
1914 'More than one command on the same line')
1915
1916 # Some more style checks
1917 CheckBraces(filename, clean_lines, linenum, error)
1918 CheckSpacing(filename, clean_lines, linenum, error)
1919 CheckCheck(filename, clean_lines, linenum, error)
1920
1921
1922_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
1923_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
1924# Matches the first component of a filename delimited by -s and _s. That is:
1925# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
1926# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
1927# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
1928# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
1929_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
1930
1931
1932def _DropCommonSuffixes(filename):
1933 """Drops common suffixes like _test.cc or -inl.h from filename.
1934
1935 For example:
1936 >>> _DropCommonSuffixes('foo/foo-inl.h')
1937 'foo/foo'
1938 >>> _DropCommonSuffixes('foo/bar/foo.cc')
1939 'foo/bar/foo'
1940 >>> _DropCommonSuffixes('foo/foo_internal.h')
1941 'foo/foo'
1942 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
1943 'foo/foo_unusualinternal'
1944
1945 Args:
1946 filename: The input filename.
1947
1948 Returns:
1949 The filename with the common suffix removed.
1950 """
1951 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
1952 'inl.h', 'impl.h', 'internal.h'):
1953 if (filename.endswith(suffix) and len(filename) > len(suffix) and
1954 filename[-len(suffix) - 1] in ('-', '_')):
1955 return filename[:-len(suffix) - 1]
1956 return os.path.splitext(filename)[0]
1957
1958
1959def _IsTestFilename(filename):
1960 """Determines if the given filename has a suffix that identifies it as a test.
1961
1962 Args:
1963 filename: The input filename.
1964
1965 Returns:
1966 True if 'filename' looks like a test, False otherwise.
1967 """
1968 if (filename.endswith('_test.cc') or
1969 filename.endswith('_unittest.cc') or
1970 filename.endswith('_regtest.cc')):
1971 return True
1972 else:
1973 return False
1974
1975
1976def _ClassifyInclude(fileinfo, include, is_system):
1977 """Figures out what kind of header 'include' is.
1978
1979 Args:
1980 fileinfo: The current file cpplint is running over. A FileInfo instance.
1981 include: The path to a #included file.
1982 is_system: True if the #include used <> rather than "".
1983
1984 Returns:
1985 One of the _XXX_HEADER constants.
1986
1987 For example:
1988 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
1989 _C_SYS_HEADER
1990 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
1991 _CPP_SYS_HEADER
1992 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
1993 _LIKELY_MY_HEADER
1994 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
1995 ... 'bar/foo_other_ext.h', False)
1996 _POSSIBLE_MY_HEADER
1997 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
1998 _OTHER_HEADER
1999 """
2000 # This is a list of all standard c++ header files, except
2001 # those already checked for above.
2002 is_stl_h = include in _STL_HEADERS
2003 is_cpp_h = is_stl_h or include in _CPP_HEADERS
2004
2005 if is_system:
2006 if is_cpp_h:
2007 return _CPP_SYS_HEADER
2008 else:
2009 return _C_SYS_HEADER
2010
2011 # If the target file and the include we're checking share a
2012 # basename when we drop common extensions, and the include
2013 # lives in . , then it's likely to be owned by the target file.
2014 target_dir, target_base = (
2015 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
2016 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
2017 if target_base == include_base and (
2018 include_dir == target_dir or
2019 include_dir == os.path.normpath(target_dir + '/../public')):
2020 return _LIKELY_MY_HEADER
2021
2022 # If the target and include share some initial basename
2023 # component, it's possible the target is implementing the
2024 # include, so it's allowed to be first, but we'll never
2025 # complain if it's not there.
2026 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
2027 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
2028 if (target_first_component and include_first_component and
2029 target_first_component.group(0) ==
2030 include_first_component.group(0)):
2031 return _POSSIBLE_MY_HEADER
2032
2033 return _OTHER_HEADER
2034
2035
erg@google.coma87abb82009-02-24 01:41:01 +00002036
erg@google.com4e00b9a2009-01-12 23:05:11 +00002037def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
2038 error):
2039 """Checks rules from the 'C++ language rules' section of cppguide.html.
2040
2041 Some of these rules are hard to test (function overloading, using
2042 uint32 inappropriately), but we do the best we can.
2043
2044 Args:
2045 filename: The name of the current file.
2046 clean_lines: A CleansedLines instance containing the file.
2047 linenum: The number of the line to check.
2048 file_extension: The extension (without the dot) of the filename.
2049 include_state: An _IncludeState instance in which the headers are inserted.
2050 error: The function to call with any errors found.
2051 """
2052 fileinfo = FileInfo(filename)
2053
2054 # get rid of comments
2055 comment_elided_line = clean_lines.lines[linenum]
2056
2057 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
2058 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(comment_elided_line):
2059 error(filename, linenum, 'build/include', 4,
2060 'Include the directory when naming .h files')
2061
2062 # we shouldn't include a file more than once. actually, there are a
2063 # handful of instances where doing so is okay, but in general it's
2064 # not.
2065 match = _RE_PATTERN_INCLUDE.search(comment_elided_line)
2066 if match:
2067 include = match.group(2)
2068 is_system = (match.group(1) == '<')
2069 if include in include_state:
2070 error(filename, linenum, 'build/include', 4,
2071 '"%s" already included at %s:%s' %
2072 (include, filename, include_state[include]))
2073 else:
2074 include_state[include] = linenum
2075
2076 # We want to ensure that headers appear in the right order:
2077 # 1) for foo.cc, foo.h (preferred location)
2078 # 2) c system files
2079 # 3) cpp system files
2080 # 4) for foo.cc, foo.h (deprecated location)
2081 # 5) other google headers
2082 #
2083 # We classify each include statement as one of those 5 types
2084 # using a number of techniques. The include_state object keeps
2085 # track of the highest type seen, and complains if we see a
2086 # lower type after that.
2087 error_message = include_state.CheckNextIncludeOrder(
2088 _ClassifyInclude(fileinfo, include, is_system))
2089 if error_message:
2090 error(filename, linenum, 'build/include_order', 4,
2091 '%s. Should be: %s.h, c system, c++ system, other.' %
2092 (error_message, fileinfo.BaseName()))
2093
2094 # If the line is empty or consists of entirely a comment, no need to
2095 # check it.
2096 line = clean_lines.elided[linenum]
2097 if not line:
2098 return
2099
2100 # Create an extended_line, which is the concatenation of the current and
2101 # next lines, for more effective checking of code that may span more than one
2102 # line.
2103 if linenum + 1 < clean_lines.NumLines():
2104 extended_line = line + clean_lines.elided[linenum + 1]
2105 else:
2106 extended_line = line
2107
2108 # Make Windows paths like Unix.
2109 fullname = os.path.abspath(filename).replace('\\', '/')
2110
2111 # TODO(unknown): figure out if they're using default arguments in fn proto.
2112
2113 # Look for any of the stream classes that are part of standard C++.
2114 match = _RE_PATTERN_INCLUDE.match(line)
2115 if match:
2116 include = match.group(2)
2117 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2118 # Many unit tests use cout, so we exempt them.
2119 if not _IsTestFilename(filename):
2120 error(filename, linenum, 'readability/streams', 3,
2121 'Streams are highly discouraged.')
2122
2123 # Check for non-const references in functions. This is tricky because &
2124 # is also used to take the address of something. We allow <> for templates,
2125 # (ignoring whatever is between the braces) and : for classes.
2126 # These are complicated re's. They try to capture the following:
2127 # paren (for fn-prototype start), typename, &, varname. For the const
2128 # version, we're willing for const to be before typename or after
2129 # Don't check the implemention on same line.
2130 fnline = line.split('{', 1)[0]
2131 if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
2132 len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
2133 r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
2134 len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
2135 fnline))):
2136
2137 # We allow non-const references in a few standard places, like functions
2138 # called "swap()" or iostream operators like "<<" or ">>".
2139 if not Search(
2140 r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
2141 fnline):
2142 error(filename, linenum, 'runtime/references', 2,
2143 'Is this a non-const reference? '
2144 'If so, make const or use a pointer.')
2145
2146 # Check to see if they're using an conversion function cast.
2147 # I just try to capture the most common basic types, though there are more.
2148 # Parameterless conversion functions, such as bool(), are allowed as they are
2149 # probably a member operator declaration or default constructor.
2150 match = Search(
2151 r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
2152 if match:
2153 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2154 # where type may be float(), int(string), etc. Without context they are
2155 # virtually indistinguishable from int(x) casts.
2156 if not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
2157 error(filename, linenum, 'readability/casting', 4,
2158 'Using deprecated casting style. '
2159 'Use static_cast<%s>(...) instead' %
2160 match.group(1))
2161
2162 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2163 'static_cast',
2164 r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2165 error)
2166 # This doesn't catch all cases. Consider (const char * const)"hello".
2167 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2168 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2169
2170 # In addition, we look for people taking the address of a cast. This
2171 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2172 # point where you think.
2173 if Search(
2174 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2175 error(filename, linenum, 'runtime/casting', 4,
2176 ('Are you taking an address of a cast? '
2177 'This is dangerous: could be a temp var. '
2178 'Take the address before doing the cast, rather than after'))
2179
2180 # Check for people declaring static/global STL strings at the top level.
2181 # This is dangerous because the C++ language does not guarantee that
2182 # globals with constructors are initialized before the first access.
2183 match = Match(
2184 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2185 line)
2186 # Make sure it's not a function.
2187 # Function template specialization looks like: "string foo<Type>(...".
2188 # Class template definitions look like: "string Foo<Type>::Method(...".
2189 if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2190 match.group(3)):
2191 error(filename, linenum, 'runtime/string', 4,
2192 'For a static/global string constant, use a C style string instead: '
2193 '"%schar %s[]".' %
2194 (match.group(1), match.group(2)))
2195
2196 # Check that we're not using RTTI outside of testing code.
2197 if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
2198 error(filename, linenum, 'runtime/rtti', 5,
2199 'Do not use dynamic_cast<>. If you need to cast within a class '
2200 "hierarchy, use static_cast<> to upcast. Google doesn't support "
2201 'RTTI.')
2202
2203 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2204 error(filename, linenum, 'runtime/init', 4,
2205 'You seem to be initializing a member variable with itself.')
2206
2207 if file_extension == 'h':
2208 # TODO(unknown): check that 1-arg constructors are explicit.
2209 # How to tell it's a constructor?
2210 # (handled in CheckForNonStandardConstructs for now)
2211 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
2212 # (level 1 error)
2213 pass
2214
2215 # Check if people are using the verboten C basic types. The only exception
2216 # we regularly allow is "unsigned short port" for port.
2217 if Search(r'\bshort port\b', line):
2218 if not Search(r'\bunsigned short port\b', line):
2219 error(filename, linenum, 'runtime/int', 4,
2220 'Use "unsigned short" for ports, not "short"')
2221 else:
2222 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
2223 if match:
2224 error(filename, linenum, 'runtime/int', 4,
2225 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
2226
2227 # When snprintf is used, the second argument shouldn't be a literal.
2228 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2229 if match:
2230 error(filename, linenum, 'runtime/printf', 3,
2231 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2232 'to snprintf.' % (match.group(1), match.group(2)))
2233
2234 # Check if some verboten C functions are being used.
2235 if Search(r'\bsprintf\b', line):
2236 error(filename, linenum, 'runtime/printf', 5,
2237 'Never use sprintf. Use snprintf instead.')
2238 match = Search(r'\b(strcpy|strcat)\b', line)
2239 if match:
2240 error(filename, linenum, 'runtime/printf', 4,
2241 'Almost always, snprintf is better than %s' % match.group(1))
2242
2243 if Search(r'\bsscanf\b', line):
2244 error(filename, linenum, 'runtime/printf', 1,
2245 'sscanf can be ok, but is slow and can overflow buffers.')
2246
2247 # Check for suspicious usage of "if" like
2248 # } if (a == b) {
2249 if Search(r'\}\s*if\s*\(', line):
2250 error(filename, linenum, 'readability/braces', 4,
2251 'Did you mean "else if"? If not, start a new line for "if".')
2252
2253 # Check for potential format string bugs like printf(foo).
2254 # We constrain the pattern not to pick things like DocidForPrintf(foo).
2255 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2256 match = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2257 if match:
2258 error(filename, linenum, 'runtime/printf', 4,
2259 'Potential format string bug. Do %s("%%s", %s) instead.'
2260 % (match.group(1), match.group(2)))
2261
2262 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2263 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2264 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
2265 error(filename, linenum, 'runtime/memset', 4,
2266 'Did you mean "memset(%s, 0, %s)"?'
2267 % (match.group(1), match.group(2)))
2268
2269 if Search(r'\busing namespace\b', line):
2270 error(filename, linenum, 'build/namespaces', 5,
2271 'Do not use namespace using-directives. '
2272 'Use using-declarations instead.')
2273
2274 # Detect variable-length arrays.
2275 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2276 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
2277 match.group(3).find(']') == -1):
2278 # Split the size using space and arithmetic operators as delimiters.
2279 # If any of the resulting tokens are not compile time constants then
2280 # report the error.
2281 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2282 is_const = True
2283 skip_next = False
2284 for tok in tokens:
2285 if skip_next:
2286 skip_next = False
2287 continue
2288
2289 if Search(r'sizeof\(.+\)', tok): continue
2290 if Search(r'arraysize\(\w+\)', tok): continue
2291
2292 tok = tok.lstrip('(')
2293 tok = tok.rstrip(')')
2294 if not tok: continue
2295 if Match(r'\d+', tok): continue
2296 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
2297 if Match(r'k[A-Z0-9]\w*', tok): continue
2298 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
2299 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
2300 # A catch all for tricky sizeof cases, including 'sizeof expression',
2301 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2302 # requires skipping the next token becasue we split on ' ' and '*'.
2303 if tok.startswith('sizeof'):
2304 skip_next = True
2305 continue
2306 is_const = False
2307 break
2308 if not is_const:
2309 error(filename, linenum, 'runtime/arrays', 1,
2310 'Do not use variable-length arrays. Use an appropriately named '
2311 "('k' followed by CamelCase) compile-time constant for the size.")
2312
2313 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
2314 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
2315 # in the class declaration.
2316 match = Match(
2317 (r'\s*'
2318 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
2319 r'\(.*\);$'),
2320 line)
2321 if match and linenum + 1 < clean_lines.NumLines():
2322 next_line = clean_lines.elided[linenum + 1]
2323 if not Search(r'^\s*};', next_line):
2324 error(filename, linenum, 'readability/constructors', 3,
2325 match.group(1) + ' should be the last thing in the class')
2326
2327 # Check for use of unnamed namespaces in header files. Registration
2328 # macros are typically OK, so we allow use of "namespace {" on lines
2329 # that end with backslashes.
2330 if (file_extension == 'h'
2331 and Search(r'\bnamespace\s*{', line)
2332 and line[-1] != '\\'):
2333 error(filename, linenum, 'build/namespaces', 4,
2334 'Do not use unnamed namespaces in header files. See '
2335 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2336 ' for more information.')
2337
2338
2339def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
2340 error):
2341 """Checks for a C-style cast by looking for the pattern.
2342
2343 This also handles sizeof(type) warnings, due to similarity of content.
2344
2345 Args:
2346 filename: The name of the current file.
2347 linenum: The number of the line to check.
2348 line: The line of code to check.
2349 raw_line: The raw line of code to check, with comments.
2350 cast_type: The string for the C++ cast to recommend. This is either
2351 reinterpret_cast or static_cast, depending.
2352 pattern: The regular expression used to find C-style casts.
2353 error: The function to call with any errors found.
2354 """
2355 match = Search(pattern, line)
2356 if not match:
2357 return
2358
2359 # e.g., sizeof(int)
2360 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
2361 if sizeof_match:
2362 error(filename, linenum, 'runtime/sizeof', 1,
2363 'Using sizeof(type). Use sizeof(varname) instead if possible')
2364 return
2365
2366 remainder = line[match.end(0):]
2367
2368 # The close paren is for function pointers as arguments to a function.
2369 # eg, void foo(void (*bar)(int));
2370 # The semicolon check is a more basic function check; also possibly a
2371 # function pointer typedef.
2372 # eg, void foo(int); or void foo(int) const;
2373 # The equals check is for function pointer assignment.
2374 # eg, void *(*foo)(int) = ...
2375 #
2376 # Right now, this will only catch cases where there's a single argument, and
2377 # it's unnamed. It should probably be expanded to check for multiple
2378 # arguments with some unnamed.
2379 function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2380 if function_match:
2381 if (not function_match.group(3) or
2382 function_match.group(3) == ';' or
2383 raw_line.find('/*') < 0):
2384 error(filename, linenum, 'readability/function', 3,
2385 'All parameters should be named in a function')
2386 return
2387
2388 # At this point, all that should be left is actual casts.
2389 error(filename, linenum, 'readability/casting', 4,
2390 'Using C-style cast. Use %s<%s>(...) instead' %
2391 (cast_type, match.group(1)))
2392
2393
2394_HEADERS_CONTAINING_TEMPLATES = (
2395 ('<deque>', ('deque',)),
2396 ('<functional>', ('unary_function', 'binary_function',
2397 'plus', 'minus', 'multiplies', 'divides', 'modulus',
2398 'negate',
2399 'equal_to', 'not_equal_to', 'greater', 'less',
2400 'greater_equal', 'less_equal',
2401 'logical_and', 'logical_or', 'logical_not',
2402 'unary_negate', 'not1', 'binary_negate', 'not2',
2403 'bind1st', 'bind2nd',
2404 'pointer_to_unary_function',
2405 'pointer_to_binary_function',
2406 'ptr_fun',
2407 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2408 'mem_fun_ref_t',
2409 'const_mem_fun_t', 'const_mem_fun1_t',
2410 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2411 'mem_fun_ref',
2412 )),
2413 ('<limits>', ('numeric_limits',)),
2414 ('<list>', ('list',)),
2415 ('<map>', ('map', 'multimap',)),
2416 ('<memory>', ('allocator',)),
2417 ('<queue>', ('queue', 'priority_queue',)),
2418 ('<set>', ('set', 'multiset',)),
2419 ('<stack>', ('stack',)),
2420 ('<string>', ('char_traits', 'basic_string',)),
2421 ('<utility>', ('pair',)),
2422 ('<vector>', ('vector',)),
2423
2424 # gcc extensions.
2425 # Note: std::hash is their hash, ::hash is our hash
2426 ('<hash_map>', ('hash_map', 'hash_multimap',)),
2427 ('<hash_set>', ('hash_set', 'hash_multiset',)),
2428 ('<slist>', ('slist',)),
2429 )
2430
2431_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2432 # We can trust with reasonable confidence that map gives us pair<>, too.
2433 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2434}
2435
2436_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2437
2438_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00002439for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2440 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002441 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2442 # type::max().
2443 _re_pattern_algorithm_header.append(
2444 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2445 _template,
2446 '<algorithm>'))
2447
2448_re_pattern_templates = []
2449for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2450 for _template in _templates:
2451 _re_pattern_templates.append(
2452 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2453 _template + '<>',
2454 _header))
2455
2456
2457def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error):
2458 """Reports for missing stl includes.
2459
2460 This function will output warnings to make sure you are including the headers
2461 necessary for the stl containers and functions that you use. We only give one
2462 reason to include a header. For example, if you use both equal_to<> and
2463 less<> in a .h file, only one (the latter in the file) of these will be
2464 reported as a reason to include the <functional>.
2465
2466 We only check headers. We do not check inside cc-files. .cc files should be
2467 able to depend on their respective header files for includes. However, there
2468 is no simple way of producing this logic here.
2469
2470 Args:
2471 filename: The name of the current file.
2472 clean_lines: A CleansedLines instance containing the file.
2473 include_state: An _IncludeState instance.
2474 error: The function to call with any errors found.
2475 """
2476 if filename.endswith('.cc'):
2477 return
2478
2479 required = {} # A map of header name to linenumber and the template entity.
2480 # Example of required: { '<functional>': (1219, 'less<>') }
2481
2482 for linenum in xrange(clean_lines.NumLines()):
2483 line = clean_lines.elided[linenum]
2484 if not line or line[0] == '#':
2485 continue
2486
2487 # String is special -- it is a non-templatized type in STL.
2488 if _RE_PATTERN_STRING.search(line):
2489 required['<string>'] = (linenum, 'string')
2490
2491 for pattern, template, header in _re_pattern_algorithm_header:
2492 if pattern.search(line):
2493 required[header] = (linenum, template)
2494
2495 # The following function is just a speed up, no semantics are changed.
2496 if not '<' in line: # Reduces the cpu time usage by skipping lines.
2497 continue
2498
2499 for pattern, template, header in _re_pattern_templates:
2500 if pattern.search(line):
2501 required[header] = (linenum, template)
2502
2503 # All the lines have been processed, report the errors found.
2504 for required_header_unstripped in required:
2505 template = required[required_header_unstripped][1]
2506 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2507 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2508 if [True for header in headers if header in include_state]:
2509 continue
2510 if required_header_unstripped.strip('<>"') not in include_state:
2511 error(filename, required[required_header_unstripped][0],
2512 'build/include_what_you_use', 4,
2513 'Add #include ' + required_header_unstripped + ' for ' + template)
2514
2515
2516def ProcessLine(filename, file_extension,
2517 clean_lines, line, include_state, function_state,
2518 class_state, error):
2519 """Processes a single line in the file.
2520
2521 Args:
2522 filename: Filename of the file that is being processed.
2523 file_extension: The extension (dot not included) of the file.
2524 clean_lines: An array of strings, each representing a line of the file,
2525 with comments stripped.
2526 line: Number of line being processed.
2527 include_state: An _IncludeState instance in which the headers are inserted.
2528 function_state: A _FunctionState instance which counts function lines, etc.
2529 class_state: A _ClassState instance which maintains information about
2530 the current stack of nested class declarations being parsed.
2531 error: A callable to which errors are reported, which takes 4 arguments:
2532 filename, line number, error level, and message
2533
2534 """
2535 raw_lines = clean_lines.raw_lines
2536 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
2537 if Search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines
2538 return
2539 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
2540 CheckStyle(filename, clean_lines, line, file_extension, error)
2541 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
2542 error)
2543 CheckForNonStandardConstructs(filename, clean_lines, line,
2544 class_state, error)
2545 CheckPosixThreading(filename, clean_lines, line, error)
2546
2547
2548def ProcessFileData(filename, file_extension, lines, error):
2549 """Performs lint checks and reports any errors to the given error function.
2550
2551 Args:
2552 filename: Filename of the file that is being processed.
2553 file_extension: The extension (dot not included) of the file.
2554 lines: An array of strings, each representing a line of the file, with the
2555 last element being empty if the file is termined with a newline.
2556 error: A callable to which errors are reported, which takes 4 arguments:
2557 """
2558 lines = (['// marker so line numbers and indices both start at 1'] + lines +
2559 ['// marker so line numbers end in a known way'])
2560
2561 include_state = _IncludeState()
2562 function_state = _FunctionState()
2563 class_state = _ClassState()
2564
2565 CheckForCopyright(filename, lines, error)
2566
2567 if file_extension == 'h':
2568 CheckForHeaderGuard(filename, lines, error)
2569
2570 RemoveMultiLineComments(filename, lines, error)
2571 clean_lines = CleansedLines(lines)
2572 for line in xrange(clean_lines.NumLines()):
2573 ProcessLine(filename, file_extension, clean_lines, line,
2574 include_state, function_state, class_state, error)
2575 class_state.CheckFinished(filename, error)
2576
2577 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
2578
2579 # We check here rather than inside ProcessLine so that we see raw
2580 # lines rather than "cleaned" lines.
2581 CheckForUnicodeReplacementCharacters(filename, lines, error)
2582
2583 CheckForNewlineAtEOF(filename, lines, error)
2584
2585
2586def ProcessFile(filename, vlevel):
2587 """Does google-lint on a single file.
2588
2589 Args:
2590 filename: The name of the file to parse.
2591
2592 vlevel: The level of errors to report. Every error of confidence
2593 >= verbose_level will be reported. 0 is a good default.
2594 """
2595
2596 _SetVerboseLevel(vlevel)
2597
2598 try:
2599 # Support the UNIX convention of using "-" for stdin. Note that
2600 # we are not opening the file with universal newline support
2601 # (which codecs doesn't support anyway), so the resulting lines do
2602 # contain trailing '\r' characters if we are reading a file that
2603 # has CRLF endings.
2604 # If after the split a trailing '\r' is present, it is removed
2605 # below. If it is not expected to be present (i.e. os.linesep !=
2606 # '\r\n' as in Windows), a warning is issued below if this file
2607 # is processed.
2608
2609 if filename == '-':
2610 lines = codecs.StreamReaderWriter(sys.stdin,
2611 codecs.getreader('utf8'),
2612 codecs.getwriter('utf8'),
2613 'replace').read().split('\n')
2614 else:
2615 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
2616
2617 carriage_return_found = False
2618 # Remove trailing '\r'.
2619 for linenum in range(len(lines)):
2620 if lines[linenum].endswith('\r'):
2621 lines[linenum] = lines[linenum].rstrip('\r')
2622 carriage_return_found = True
2623
2624 except IOError:
2625 sys.stderr.write(
2626 "Skipping input '%s': Can't open for reading\n" % filename)
2627 return
2628
2629 # Note, if no dot is found, this will give the entire filename as the ext.
2630 file_extension = filename[filename.rfind('.') + 1:]
2631
2632 # When reading from stdin, the extension is unknown, so no cpplint tests
2633 # should rely on the extension.
2634 if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
2635 and file_extension != 'cpp'):
2636 sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
2637 else:
2638 ProcessFileData(filename, file_extension, lines, Error)
2639 if carriage_return_found and os.linesep != '\r\n':
2640 # Use 0 for linenum since outputing only one error for potentially
2641 # several lines.
2642 Error(filename, 0, 'whitespace/newline', 1,
2643 'One or more unexpected \\r (^M) found;'
2644 'better to use only a \\n')
2645
2646 sys.stderr.write('Done processing %s\n' % filename)
2647
2648
2649def PrintUsage(message):
2650 """Prints a brief usage string and exits, optionally with an error message.
2651
2652 Args:
2653 message: The optional error message.
2654 """
2655 sys.stderr.write(_USAGE)
2656 if message:
2657 sys.exit('\nFATAL ERROR: ' + message)
2658 else:
2659 sys.exit(1)
2660
2661
2662def PrintCategories():
2663 """Prints a list of all the error-categories used by error messages.
2664
2665 These are the categories used to filter messages via --filter.
2666 """
2667 sys.stderr.write(_ERROR_CATEGORIES)
2668 sys.exit(0)
2669
2670
2671def ParseArguments(args):
2672 """Parses the command line arguments.
2673
2674 This may set the output format and verbosity level as side-effects.
2675
2676 Args:
2677 args: The command line arguments:
2678
2679 Returns:
2680 The list of filenames to lint.
2681 """
2682 try:
2683 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
2684 'filter='])
2685 except getopt.GetoptError:
2686 PrintUsage('Invalid arguments.')
2687
2688 verbosity = _VerboseLevel()
2689 output_format = _OutputFormat()
2690 filters = ''
2691
2692 for (opt, val) in opts:
2693 if opt == '--help':
2694 PrintUsage(None)
2695 elif opt == '--output':
2696 if not val in ('emacs', 'vs7'):
2697 PrintUsage('The only allowed output formats are emacs and vs7.')
2698 output_format = val
2699 elif opt == '--verbose':
2700 verbosity = int(val)
2701 elif opt == '--filter':
2702 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00002703 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002704 PrintCategories()
2705
2706 if not filenames:
2707 PrintUsage('No files were specified.')
2708
2709 _SetOutputFormat(output_format)
2710 _SetVerboseLevel(verbosity)
2711 _SetFilters(filters)
2712
2713 return filenames
2714
2715
2716def main():
2717 filenames = ParseArguments(sys.argv[1:])
2718
2719 # Change stderr to write with replacement characters so we don't die
2720 # if we try to print something containing non-ASCII characters.
2721 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
2722 codecs.getreader('utf8'),
2723 codecs.getwriter('utf8'),
2724 'replace')
2725
2726 _cpplint_state.ResetErrorCount()
2727 for filename in filenames:
2728 ProcessFile(filename, _cpplint_state.verbose_level)
2729 sys.stderr.write('Total errors found: %d\n' % _cpplint_state.error_count)
2730 sys.exit(_cpplint_state.error_count > 0)
2731
2732
2733if __name__ == '__main__':
2734 main()