blob: b5493b6d986443c7650e169eb160d0c4642d86a2 [file] [log] [blame]
erg@google.com720121a2012-05-11 16:31:47 +00001#!/usr/bin/python
erg@google.com4e00b9a2009-01-12 23:05:11 +00002#
erg@google.com8f91ab22011-09-06 21:04:45 +00003# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004#
erg@google.com969161c2009-06-26 22:06:46 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
erg@google.com4e00b9a2009-01-12 23:05:11 +00008#
erg@google.com969161c2009-06-26 22:06:46 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
erg@google.com4e00b9a2009-01-12 23:05:11 +000018#
erg@google.com969161c2009-06-26 22:06:46 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com4e00b9a2009-01-12 23:05:11 +000030
erg@google.com4e00b9a2009-01-12 23:05:11 +000031"""Does google-lint on c++ files.
32
33The goal of this script is to identify places in the code that *may*
34be in non-compliance with google style. It does not attempt to fix
35up these problems -- the point is to educate. It does also not
36attempt to find all problems, or to ensure that everything it does
37find is legitimately a problem.
38
39In particular, we can get very confused by /* and // inside strings!
40We do a small hack, which is to ignore //'s with "'s after them on the
41same line, but it is far from perfect (in either direction).
42"""
43
44import codecs
erg@google.comd350fe52013-01-14 17:51:48 +000045import copy
erg@google.com4e00b9a2009-01-12 23:05:11 +000046import getopt
47import math # for log
48import os
49import re
50import sre_compile
51import string
52import sys
53import unicodedata
54
55
56_USAGE = """
57Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.comab53edf2013-11-05 22:23:37 +000058 [--counting=total|toplevel|detailed] [--root=subdir]
59 [--linelength=digits]
erg@google.com4e00b9a2009-01-12 23:05:11 +000060 <file> [file] ...
61
62 The style guidelines this tries to follow are those in
63 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
64
65 Every problem is given a confidence score from 1-5, with 5 meaning we are
66 certain of the problem, and 1 meaning it could be a legitimate construct.
67 This will miss some errors, and is not a substitute for a code review.
68
erg+personal@google.com05189642010-04-30 20:43:03 +000069 To suppress false-positive errors of a certain category, add a
70 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
71 suppresses errors of all categories on that line.
erg@google.com4e00b9a2009-01-12 23:05:11 +000072
73 The files passed in will be linted; at least one file must be provided.
74 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
75
76 Flags:
77
78 output=vs7
79 By default, the output is formatted to ease emacs parsing. Visual Studio
80 compatible output (vs7) may also be used. Other formats are unsupported.
81
82 verbose=#
83 Specify a number 0-5 to restrict errors to certain verbosity levels.
84
85 filter=-x,+y,...
86 Specify a comma-separated list of category-filters to apply: only
87 error messages whose category names pass the filters will be printed.
88 (Category names are printed with the message and look like
89 "[whitespace/indent]".) Filters are evaluated left to right.
90 "-FOO" and "FOO" means "do not print categories that start with FOO".
91 "+FOO" means "do print categories that start with FOO".
92
93 Examples: --filter=-whitespace,+whitespace/braces
94 --filter=whitespace,runtime/printf,+runtime/printf_format
95 --filter=-,+build/include_what_you_use
96
97 To see a list of all the categories used in cpplint, pass no arg:
98 --filter=
erg@google.coma868d2d2009-10-09 21:18:45 +000099
100 counting=total|toplevel|detailed
101 The total number of errors found is always printed. If
102 'toplevel' is provided, then the count of errors in each of
103 the top-level categories like 'build' and 'whitespace' will
104 also be printed. If 'detailed' is provided, then a count
105 is provided for each category like 'build/class'.
erg@google.com4d70a882013-04-16 21:06:32 +0000106
107 root=subdir
108 The root directory used for deriving header guard CPP variable.
109 By default, the header guard CPP variable is calculated as the relative
110 path to the directory that contains .git, .hg, or .svn. When this flag
111 is specified, the relative path is calculated from the specified
112 directory. If the specified directory does not exist, this flag is
113 ignored.
114
115 Examples:
116 Assuing that src/.git exists, the header guard CPP variables for
117 src/chrome/browser/ui/browser.h are:
118
119 No flag => CHROME_BROWSER_UI_BROWSER_H_
120 --root=chrome => BROWSER_UI_BROWSER_H_
121 --root=chrome/browser => UI_BROWSER_H_
erg@google.comab53edf2013-11-05 22:23:37 +0000122
123 linelength=digits
124 This is the allowed line length for the project. The default value is
125 80 characters.
126
127 Examples:
128 --linelength=120
erg@google.com4e00b9a2009-01-12 23:05:11 +0000129"""
130
131# We categorize each error message we print. Here are the categories.
132# We want an explicit list so we can list them all in cpplint --filter=.
133# If you add a new error message with a new category, add it to the list
134# here! cpplint_unittest.py should tell you if you forget to do this.
erg+personal@google.com05189642010-04-30 20:43:03 +0000135_ERROR_CATEGORIES = [
136 'build/class',
137 'build/deprecated',
138 'build/endif_comment',
erg@google.com8a95ecc2011-09-08 00:45:54 +0000139 'build/explicit_make_pair',
erg+personal@google.com05189642010-04-30 20:43:03 +0000140 'build/forward_decl',
141 'build/header_guard',
142 'build/include',
143 'build/include_alpha',
144 'build/include_order',
145 'build/include_what_you_use',
146 'build/namespaces',
147 'build/printf_format',
148 'build/storage_class',
149 'legal/copyright',
erg@google.comd350fe52013-01-14 17:51:48 +0000150 'readability/alt_tokens',
erg+personal@google.com05189642010-04-30 20:43:03 +0000151 'readability/braces',
152 'readability/casting',
153 'readability/check',
154 'readability/constructors',
155 'readability/fn_size',
156 'readability/function',
157 'readability/multiline_comment',
158 'readability/multiline_string',
erg@google.comd350fe52013-01-14 17:51:48 +0000159 'readability/namespace',
erg+personal@google.com05189642010-04-30 20:43:03 +0000160 'readability/nolint',
erg@google.com2aa59982013-10-28 19:09:25 +0000161 'readability/nul',
erg+personal@google.com05189642010-04-30 20:43:03 +0000162 'readability/streams',
163 'readability/todo',
164 'readability/utf8',
165 'runtime/arrays',
166 'runtime/casting',
167 'runtime/explicit',
168 'runtime/int',
169 'runtime/init',
170 'runtime/invalid_increment',
171 'runtime/member_string_references',
172 'runtime/memset',
173 'runtime/operator',
174 'runtime/printf',
175 'runtime/printf_format',
176 'runtime/references',
erg+personal@google.com05189642010-04-30 20:43:03 +0000177 'runtime/string',
178 'runtime/threadsafe_fn',
erg@google.com2aa59982013-10-28 19:09:25 +0000179 'runtime/vlog',
180 'whitespace/blank_line',
181 'whitespace/braces',
182 'whitespace/comma',
183 'whitespace/comments',
184 'whitespace/empty_conditional_body',
185 'whitespace/empty_loop_body',
186 'whitespace/end_of_line',
187 'whitespace/ending_newline',
188 'whitespace/forcolon',
erg+personal@google.com05189642010-04-30 20:43:03 +0000189 'whitespace/indent',
erg+personal@google.com05189642010-04-30 20:43:03 +0000190 'whitespace/line_length',
191 'whitespace/newline',
192 'whitespace/operators',
193 'whitespace/parens',
194 'whitespace/semicolon',
195 'whitespace/tab',
196 'whitespace/todo'
197 ]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000198
erg@google.come35f7652009-06-19 20:52:09 +0000199# The default state of the category filter. This is overrided by the --filter=
200# flag. By default all errors are on, so only add here categories that should be
201# off by default (i.e., categories that must be enabled by the --filter= flags).
202# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com8a95ecc2011-09-08 00:45:54 +0000203_DEFAULT_FILTERS = ['-build/include_alpha']
erg@google.come35f7652009-06-19 20:52:09 +0000204
erg@google.com4e00b9a2009-01-12 23:05:11 +0000205# We used to check for high-bit characters, but after much discussion we
206# decided those were OK, as long as they were in UTF-8 and didn't represent
erg@google.com8a95ecc2011-09-08 00:45:54 +0000207# hard-coded international strings, which belong in a separate i18n file.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000208
erg@google.com4e00b9a2009-01-12 23:05:11 +0000209
erg@google.comfd5da632013-10-25 17:39:45 +0000210# C++ headers
erg@google.com4e00b9a2009-01-12 23:05:11 +0000211_CPP_HEADERS = frozenset([
erg@google.comfd5da632013-10-25 17:39:45 +0000212 # Legacy
213 'algobase.h',
214 'algo.h',
215 'alloc.h',
216 'builtinbuf.h',
217 'bvector.h',
218 'complex.h',
219 'defalloc.h',
220 'deque.h',
221 'editbuf.h',
222 'fstream.h',
223 'function.h',
224 'hash_map',
225 'hash_map.h',
226 'hash_set',
227 'hash_set.h',
228 'hashtable.h',
229 'heap.h',
230 'indstream.h',
231 'iomanip.h',
232 'iostream.h',
233 'istream.h',
234 'iterator.h',
235 'list.h',
236 'map.h',
237 'multimap.h',
238 'multiset.h',
239 'ostream.h',
240 'pair.h',
241 'parsestream.h',
242 'pfstream.h',
243 'procbuf.h',
244 'pthread_alloc',
245 'pthread_alloc.h',
246 'rope',
247 'rope.h',
248 'ropeimpl.h',
249 'set.h',
250 'slist',
251 'slist.h',
252 'stack.h',
253 'stdiostream.h',
254 'stl_alloc.h',
255 'stl_relops.h',
256 'streambuf.h',
257 'stream.h',
258 'strfile.h',
259 'strstream.h',
260 'tempbuf.h',
261 'tree.h',
262 'type_traits.h',
263 'vector.h',
264 # 17.6.1.2 C++ library headers
265 'algorithm',
266 'array',
267 'atomic',
268 'bitset',
269 'chrono',
270 'codecvt',
271 'complex',
272 'condition_variable',
273 'deque',
274 'exception',
275 'forward_list',
276 'fstream',
277 'functional',
278 'future',
279 'initializer_list',
280 'iomanip',
281 'ios',
282 'iosfwd',
283 'iostream',
284 'istream',
285 'iterator',
286 'limits',
287 'list',
288 'locale',
289 'map',
290 'memory',
291 'mutex',
292 'new',
293 'numeric',
294 'ostream',
295 'queue',
296 'random',
297 'ratio',
298 'regex',
299 'set',
300 'sstream',
301 'stack',
302 'stdexcept',
303 'streambuf',
304 'string',
305 'strstream',
306 'system_error',
307 'thread',
308 'tuple',
309 'typeindex',
310 'typeinfo',
311 'type_traits',
312 'unordered_map',
313 'unordered_set',
314 'utility',
erg@google.com5d00c562013-07-12 19:57:05 +0000315 'valarray',
erg@google.comfd5da632013-10-25 17:39:45 +0000316 'vector',
317 # 17.6.1.2 C++ headers for C library facilities
318 'cassert',
319 'ccomplex',
320 'cctype',
321 'cerrno',
322 'cfenv',
323 'cfloat',
324 'cinttypes',
325 'ciso646',
326 'climits',
327 'clocale',
328 'cmath',
329 'csetjmp',
330 'csignal',
331 'cstdalign',
332 'cstdarg',
333 'cstdbool',
334 'cstddef',
335 'cstdint',
336 'cstdio',
337 'cstdlib',
338 'cstring',
339 'ctgmath',
340 'ctime',
341 'cuchar',
342 'cwchar',
343 'cwctype',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000344 ])
345
erg@google.com4e00b9a2009-01-12 23:05:11 +0000346# Assertion macros. These are defined in base/logging.h and
347# testing/base/gunit.h. Note that the _M versions need to come first
348# for substring matching to work.
349_CHECK_MACROS = [
erg@google.come35f7652009-06-19 20:52:09 +0000350 'DCHECK', 'CHECK',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000351 'EXPECT_TRUE_M', 'EXPECT_TRUE',
352 'ASSERT_TRUE_M', 'ASSERT_TRUE',
353 'EXPECT_FALSE_M', 'EXPECT_FALSE',
354 'ASSERT_FALSE_M', 'ASSERT_FALSE',
355 ]
356
erg@google.come35f7652009-06-19 20:52:09 +0000357# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com4e00b9a2009-01-12 23:05:11 +0000358_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
359
360for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
361 ('>=', 'GE'), ('>', 'GT'),
362 ('<=', 'LE'), ('<', 'LT')]:
erg@google.come35f7652009-06-19 20:52:09 +0000363 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com4e00b9a2009-01-12 23:05:11 +0000364 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
365 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
366 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
367 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
368 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
369
370for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
371 ('>=', 'LT'), ('>', 'LE'),
372 ('<=', 'GT'), ('<', 'GE')]:
373 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
374 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
375 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
376 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
377
erg@google.comd350fe52013-01-14 17:51:48 +0000378# Alternative tokens and their replacements. For full list, see section 2.5
379# Alternative tokens [lex.digraph] in the C++ standard.
380#
381# Digraphs (such as '%:') are not included here since it's a mess to
382# match those on a word boundary.
383_ALT_TOKEN_REPLACEMENT = {
384 'and': '&&',
385 'bitor': '|',
386 'or': '||',
387 'xor': '^',
388 'compl': '~',
389 'bitand': '&',
390 'and_eq': '&=',
391 'or_eq': '|=',
392 'xor_eq': '^=',
393 'not': '!',
394 'not_eq': '!='
395 }
396
397# Compile regular expression that matches all the above keywords. The "[ =()]"
398# bit is meant to avoid matching these keywords outside of boolean expressions.
399#
erg@google.comc6671232013-10-25 21:44:03 +0000400# False positives include C-style multi-line comments and multi-line strings
401# but those have always been troublesome for cpplint.
erg@google.comd350fe52013-01-14 17:51:48 +0000402_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
403 r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
404
erg@google.com4e00b9a2009-01-12 23:05:11 +0000405
406# These constants define types of headers for use with
407# _IncludeState.CheckNextIncludeOrder().
408_C_SYS_HEADER = 1
409_CPP_SYS_HEADER = 2
410_LIKELY_MY_HEADER = 3
411_POSSIBLE_MY_HEADER = 4
412_OTHER_HEADER = 5
413
erg@google.comd350fe52013-01-14 17:51:48 +0000414# These constants define the current inline assembly state
415_NO_ASM = 0 # Outside of inline assembly block
416_INSIDE_ASM = 1 # Inside inline assembly block
417_END_ASM = 2 # Last line of inline assembly block
418_BLOCK_ASM = 3 # The whole block is an inline assembly block
419
420# Match start of assembly blocks
421_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
422 r'(?:\s+(volatile|__volatile__))?'
423 r'\s*[{(]')
424
erg@google.com4e00b9a2009-01-12 23:05:11 +0000425
426_regexp_compile_cache = {}
427
erg+personal@google.com05189642010-04-30 20:43:03 +0000428# Finds occurrences of NOLINT or NOLINT(...).
429_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
430
431# {str, set(int)}: a map from error categories to sets of linenumbers
432# on which those errors are expected and should be suppressed.
433_error_suppressions = {}
434
erg@google.com4d70a882013-04-16 21:06:32 +0000435# The root directory used for deriving header guard CPP variable.
436# This is set by --root flag.
437_root = None
438
erg@google.comab53edf2013-11-05 22:23:37 +0000439# The allowed line length of files.
440# This is set by --linelength flag.
441_line_length = 80
442
erg+personal@google.com05189642010-04-30 20:43:03 +0000443def ParseNolintSuppressions(filename, raw_line, linenum, error):
444 """Updates the global list of error-suppressions.
445
446 Parses any NOLINT comments on the current line, updating the global
447 error_suppressions store. Reports an error if the NOLINT comment
448 was malformed.
449
450 Args:
451 filename: str, the name of the input file.
452 raw_line: str, the line of input text, with comments.
453 linenum: int, the number of the current line.
454 error: function, an error handler.
455 """
456 # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
erg@google.com8a95ecc2011-09-08 00:45:54 +0000457 matched = _RE_SUPPRESSION.search(raw_line)
458 if matched:
459 category = matched.group(1)
erg+personal@google.com05189642010-04-30 20:43:03 +0000460 if category in (None, '(*)'): # => "suppress all"
461 _error_suppressions.setdefault(None, set()).add(linenum)
462 else:
463 if category.startswith('(') and category.endswith(')'):
464 category = category[1:-1]
465 if category in _ERROR_CATEGORIES:
466 _error_suppressions.setdefault(category, set()).add(linenum)
467 else:
468 error(filename, linenum, 'readability/nolint', 5,
erg@google.com8a95ecc2011-09-08 00:45:54 +0000469 'Unknown NOLINT error category: %s' % category)
erg+personal@google.com05189642010-04-30 20:43:03 +0000470
471
472def ResetNolintSuppressions():
473 "Resets the set of NOLINT suppressions to empty."
474 _error_suppressions.clear()
475
476
477def IsErrorSuppressedByNolint(category, linenum):
478 """Returns true if the specified error category is suppressed on this line.
479
480 Consults the global error_suppressions map populated by
481 ParseNolintSuppressions/ResetNolintSuppressions.
482
483 Args:
484 category: str, the category of the error.
485 linenum: int, the current line number.
486 Returns:
487 bool, True iff the error should be suppressed due to a NOLINT comment.
488 """
489 return (linenum in _error_suppressions.get(category, set()) or
490 linenum in _error_suppressions.get(None, set()))
erg@google.com4e00b9a2009-01-12 23:05:11 +0000491
492def Match(pattern, s):
493 """Matches the string with the pattern, caching the compiled regexp."""
494 # The regexp compilation caching is inlined in both Match and Search for
495 # performance reasons; factoring it out into a separate function turns out
496 # to be noticeably expensive.
erg@google.comc6671232013-10-25 21:44:03 +0000497 if pattern not in _regexp_compile_cache:
erg@google.com4e00b9a2009-01-12 23:05:11 +0000498 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
499 return _regexp_compile_cache[pattern].match(s)
500
501
erg@google.comfd5da632013-10-25 17:39:45 +0000502def ReplaceAll(pattern, rep, s):
503 """Replaces instances of pattern in a string with a replacement.
504
505 The compiled regex is kept in a cache shared by Match and Search.
506
507 Args:
508 pattern: regex pattern
509 rep: replacement text
510 s: search string
511
512 Returns:
513 string with replacements made (or original string if no replacements)
514 """
515 if pattern not in _regexp_compile_cache:
516 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
517 return _regexp_compile_cache[pattern].sub(rep, s)
518
519
erg@google.com4e00b9a2009-01-12 23:05:11 +0000520def Search(pattern, s):
521 """Searches the string for the pattern, caching the compiled regexp."""
erg@google.comc6671232013-10-25 21:44:03 +0000522 if pattern not in _regexp_compile_cache:
erg@google.com4e00b9a2009-01-12 23:05:11 +0000523 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
524 return _regexp_compile_cache[pattern].search(s)
525
526
527class _IncludeState(dict):
528 """Tracks line numbers for includes, and the order in which includes appear.
529
530 As a dict, an _IncludeState object serves as a mapping between include
531 filename and line number on which that file was included.
532
533 Call CheckNextIncludeOrder() once for each header in the file, passing
534 in the type constants defined above. Calls in an illegal order will
535 raise an _IncludeError with an appropriate error message.
536
537 """
538 # self._section will move monotonically through this set. If it ever
539 # needs to move backwards, CheckNextIncludeOrder will raise an error.
540 _INITIAL_SECTION = 0
541 _MY_H_SECTION = 1
542 _C_SECTION = 2
543 _CPP_SECTION = 3
544 _OTHER_H_SECTION = 4
545
546 _TYPE_NAMES = {
547 _C_SYS_HEADER: 'C system header',
548 _CPP_SYS_HEADER: 'C++ system header',
549 _LIKELY_MY_HEADER: 'header this file implements',
550 _POSSIBLE_MY_HEADER: 'header this file may implement',
551 _OTHER_HEADER: 'other header',
552 }
553 _SECTION_NAMES = {
554 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
555 _MY_H_SECTION: 'a header this file implements',
556 _C_SECTION: 'C system header',
557 _CPP_SECTION: 'C++ system header',
558 _OTHER_H_SECTION: 'other header',
559 }
560
561 def __init__(self):
562 dict.__init__(self)
erg@google.com2aa59982013-10-28 19:09:25 +0000563 self.ResetSection()
564
565 def ResetSection(self):
erg@google.coma868d2d2009-10-09 21:18:45 +0000566 # The name of the current section.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000567 self._section = self._INITIAL_SECTION
erg@google.coma868d2d2009-10-09 21:18:45 +0000568 # The path of last found header.
569 self._last_header = ''
570
erg@google.comfd5da632013-10-25 17:39:45 +0000571 def SetLastHeader(self, header_path):
572 self._last_header = header_path
573
erg@google.coma868d2d2009-10-09 21:18:45 +0000574 def CanonicalizeAlphabeticalOrder(self, header_path):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000575 """Returns a path canonicalized for alphabetical comparison.
erg@google.coma868d2d2009-10-09 21:18:45 +0000576
577 - replaces "-" with "_" so they both cmp the same.
578 - removes '-inl' since we don't require them to be after the main header.
579 - lowercase everything, just in case.
580
581 Args:
582 header_path: Path to be canonicalized.
583
584 Returns:
585 Canonicalized path.
586 """
587 return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
588
erg@google.comfd5da632013-10-25 17:39:45 +0000589 def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
erg@google.coma868d2d2009-10-09 21:18:45 +0000590 """Check if a header is in alphabetical order with the previous header.
591
592 Args:
erg@google.comfd5da632013-10-25 17:39:45 +0000593 clean_lines: A CleansedLines instance containing the file.
594 linenum: The number of the line to check.
595 header_path: Canonicalized header to be checked.
erg@google.coma868d2d2009-10-09 21:18:45 +0000596
597 Returns:
598 Returns true if the header is in alphabetical order.
599 """
erg@google.comfd5da632013-10-25 17:39:45 +0000600 # If previous section is different from current section, _last_header will
601 # be reset to empty string, so it's always less than current header.
602 #
603 # If previous line was a blank line, assume that the headers are
604 # intentionally sorted the way they are.
605 if (self._last_header > header_path and
606 not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
erg@google.coma868d2d2009-10-09 21:18:45 +0000607 return False
erg@google.coma868d2d2009-10-09 21:18:45 +0000608 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +0000609
610 def CheckNextIncludeOrder(self, header_type):
611 """Returns a non-empty error message if the next header is out of order.
612
613 This function also updates the internal state to be ready to check
614 the next include.
615
616 Args:
617 header_type: One of the _XXX_HEADER constants defined above.
618
619 Returns:
620 The empty string if the header is in the right order, or an
621 error message describing what's wrong.
622
623 """
624 error_message = ('Found %s after %s' %
625 (self._TYPE_NAMES[header_type],
626 self._SECTION_NAMES[self._section]))
627
erg@google.coma868d2d2009-10-09 21:18:45 +0000628 last_section = self._section
629
erg@google.com4e00b9a2009-01-12 23:05:11 +0000630 if header_type == _C_SYS_HEADER:
631 if self._section <= self._C_SECTION:
632 self._section = self._C_SECTION
633 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000634 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000635 return error_message
636 elif header_type == _CPP_SYS_HEADER:
637 if self._section <= self._CPP_SECTION:
638 self._section = self._CPP_SECTION
639 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000640 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000641 return error_message
642 elif header_type == _LIKELY_MY_HEADER:
643 if self._section <= self._MY_H_SECTION:
644 self._section = self._MY_H_SECTION
645 else:
646 self._section = self._OTHER_H_SECTION
647 elif header_type == _POSSIBLE_MY_HEADER:
648 if self._section <= self._MY_H_SECTION:
649 self._section = self._MY_H_SECTION
650 else:
651 # This will always be the fallback because we're not sure
652 # enough that the header is associated with this file.
653 self._section = self._OTHER_H_SECTION
654 else:
655 assert header_type == _OTHER_HEADER
656 self._section = self._OTHER_H_SECTION
657
erg@google.coma868d2d2009-10-09 21:18:45 +0000658 if last_section != self._section:
659 self._last_header = ''
660
erg@google.com4e00b9a2009-01-12 23:05:11 +0000661 return ''
662
663
664class _CppLintState(object):
665 """Maintains module-wide state.."""
666
667 def __init__(self):
668 self.verbose_level = 1 # global setting.
669 self.error_count = 0 # global count of reported errors
erg@google.come35f7652009-06-19 20:52:09 +0000670 # filters to apply when emitting error messages
671 self.filters = _DEFAULT_FILTERS[:]
erg@google.coma868d2d2009-10-09 21:18:45 +0000672 self.counting = 'total' # In what way are we counting errors?
673 self.errors_by_category = {} # string to int dict storing error counts
erg@google.com4e00b9a2009-01-12 23:05:11 +0000674
675 # output format:
676 # "emacs" - format that emacs can parse (default)
677 # "vs7" - format that Microsoft Visual Studio 7 can parse
678 self.output_format = 'emacs'
679
680 def SetOutputFormat(self, output_format):
681 """Sets the output format for errors."""
682 self.output_format = output_format
683
684 def SetVerboseLevel(self, level):
685 """Sets the module's verbosity, and returns the previous setting."""
686 last_verbose_level = self.verbose_level
687 self.verbose_level = level
688 return last_verbose_level
689
erg@google.coma868d2d2009-10-09 21:18:45 +0000690 def SetCountingStyle(self, counting_style):
691 """Sets the module's counting options."""
692 self.counting = counting_style
693
erg@google.com4e00b9a2009-01-12 23:05:11 +0000694 def SetFilters(self, filters):
695 """Sets the error-message filters.
696
697 These filters are applied when deciding whether to emit a given
698 error message.
699
700 Args:
701 filters: A string of comma-separated filters (eg "+whitespace/indent").
702 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000703
704 Raises:
705 ValueError: The comma-separated filters did not all start with '+' or '-'.
706 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000707 """
erg@google.come35f7652009-06-19 20:52:09 +0000708 # Default filters always have less priority than the flag ones.
709 self.filters = _DEFAULT_FILTERS[:]
710 for filt in filters.split(','):
711 clean_filt = filt.strip()
712 if clean_filt:
713 self.filters.append(clean_filt)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000714 for filt in self.filters:
715 if not (filt.startswith('+') or filt.startswith('-')):
716 raise ValueError('Every filter in --filters must start with + or -'
717 ' (%s does not)' % filt)
718
erg@google.coma868d2d2009-10-09 21:18:45 +0000719 def ResetErrorCounts(self):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000720 """Sets the module's error statistic back to zero."""
721 self.error_count = 0
erg@google.coma868d2d2009-10-09 21:18:45 +0000722 self.errors_by_category = {}
erg@google.com4e00b9a2009-01-12 23:05:11 +0000723
erg@google.coma868d2d2009-10-09 21:18:45 +0000724 def IncrementErrorCount(self, category):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000725 """Bumps the module's error statistic."""
726 self.error_count += 1
erg@google.coma868d2d2009-10-09 21:18:45 +0000727 if self.counting in ('toplevel', 'detailed'):
728 if self.counting != 'detailed':
729 category = category.split('/')[0]
730 if category not in self.errors_by_category:
731 self.errors_by_category[category] = 0
732 self.errors_by_category[category] += 1
erg@google.com4e00b9a2009-01-12 23:05:11 +0000733
erg@google.coma868d2d2009-10-09 21:18:45 +0000734 def PrintErrorCounts(self):
735 """Print a summary of errors by category, and the total."""
736 for category, count in self.errors_by_category.iteritems():
737 sys.stderr.write('Category \'%s\' errors found: %d\n' %
738 (category, count))
739 sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000740
741_cpplint_state = _CppLintState()
742
743
744def _OutputFormat():
745 """Gets the module's output format."""
746 return _cpplint_state.output_format
747
748
749def _SetOutputFormat(output_format):
750 """Sets the module's output format."""
751 _cpplint_state.SetOutputFormat(output_format)
752
753
754def _VerboseLevel():
755 """Returns the module's verbosity setting."""
756 return _cpplint_state.verbose_level
757
758
759def _SetVerboseLevel(level):
760 """Sets the module's verbosity, and returns the previous setting."""
761 return _cpplint_state.SetVerboseLevel(level)
762
763
erg@google.coma868d2d2009-10-09 21:18:45 +0000764def _SetCountingStyle(level):
765 """Sets the module's counting options."""
766 _cpplint_state.SetCountingStyle(level)
767
768
erg@google.com4e00b9a2009-01-12 23:05:11 +0000769def _Filters():
770 """Returns the module's list of output filters, as a list."""
771 return _cpplint_state.filters
772
773
774def _SetFilters(filters):
775 """Sets the module's error-message filters.
776
777 These filters are applied when deciding whether to emit a given
778 error message.
779
780 Args:
781 filters: A string of comma-separated filters (eg "whitespace/indent").
782 Each filter should start with + or -; else we die.
783 """
784 _cpplint_state.SetFilters(filters)
785
786
787class _FunctionState(object):
788 """Tracks current function name and the number of lines in its body."""
789
790 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
791 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
792
793 def __init__(self):
794 self.in_a_function = False
795 self.lines_in_function = 0
796 self.current_function = ''
797
798 def Begin(self, function_name):
799 """Start analyzing function body.
800
801 Args:
802 function_name: The name of the function being tracked.
803 """
804 self.in_a_function = True
805 self.lines_in_function = 0
806 self.current_function = function_name
807
808 def Count(self):
809 """Count line in current function body."""
810 if self.in_a_function:
811 self.lines_in_function += 1
812
813 def Check(self, error, filename, linenum):
814 """Report if too many lines in function body.
815
816 Args:
817 error: The function to call with any errors found.
818 filename: The name of the current file.
819 linenum: The number of the line to check.
820 """
821 if Match(r'T(EST|est)', self.current_function):
822 base_trigger = self._TEST_TRIGGER
823 else:
824 base_trigger = self._NORMAL_TRIGGER
825 trigger = base_trigger * 2**_VerboseLevel()
826
827 if self.lines_in_function > trigger:
828 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
829 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
830 if error_level > 5:
831 error_level = 5
832 error(filename, linenum, 'readability/fn_size', error_level,
833 'Small and focused functions are preferred:'
834 ' %s has %d non-comment lines'
835 ' (error triggered by exceeding %d lines).' % (
836 self.current_function, self.lines_in_function, trigger))
837
838 def End(self):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000839 """Stop analyzing function body."""
erg@google.com4e00b9a2009-01-12 23:05:11 +0000840 self.in_a_function = False
841
842
843class _IncludeError(Exception):
844 """Indicates a problem with the include order in a file."""
845 pass
846
847
848class FileInfo:
849 """Provides utility functions for filenames.
850
851 FileInfo provides easy access to the components of a file's path
852 relative to the project root.
853 """
854
855 def __init__(self, filename):
856 self._filename = filename
857
858 def FullName(self):
859 """Make Windows paths like Unix."""
860 return os.path.abspath(self._filename).replace('\\', '/')
861
862 def RepositoryName(self):
863 """FullName after removing the local path to the repository.
864
865 If we have a real absolute path name here we can try to do something smart:
866 detecting the root of the checkout and truncating /path/to/checkout from
867 the name so that we get header guards that don't include things like
868 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
869 people on different computers who have checked the source out to different
870 locations won't see bogus errors.
871 """
872 fullname = self.FullName()
873
874 if os.path.exists(fullname):
875 project_dir = os.path.dirname(fullname)
876
877 if os.path.exists(os.path.join(project_dir, ".svn")):
878 # If there's a .svn file in the current directory, we recursively look
879 # up the directory tree for the top of the SVN checkout
880 root_dir = project_dir
881 one_up_dir = os.path.dirname(root_dir)
882 while os.path.exists(os.path.join(one_up_dir, ".svn")):
883 root_dir = os.path.dirname(root_dir)
884 one_up_dir = os.path.dirname(one_up_dir)
885
886 prefix = os.path.commonprefix([root_dir, project_dir])
887 return fullname[len(prefix) + 1:]
888
erg@google.com3dc74262011-11-30 01:12:00 +0000889 # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
890 # searching up from the current path.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000891 root_dir = os.path.dirname(fullname)
892 while (root_dir != os.path.dirname(root_dir) and
erg@google.com5e169692010-01-28 20:17:01 +0000893 not os.path.exists(os.path.join(root_dir, ".git")) and
erg@google.com3dc74262011-11-30 01:12:00 +0000894 not os.path.exists(os.path.join(root_dir, ".hg")) and
895 not os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000896 root_dir = os.path.dirname(root_dir)
erg@google.com42e59b02010-10-04 22:18:07 +0000897
898 if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@google.com3dc74262011-11-30 01:12:00 +0000899 os.path.exists(os.path.join(root_dir, ".hg")) or
900 os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com42e59b02010-10-04 22:18:07 +0000901 prefix = os.path.commonprefix([root_dir, project_dir])
902 return fullname[len(prefix) + 1:]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000903
904 # Don't know what to do; header guard warnings may be wrong...
905 return fullname
906
907 def Split(self):
908 """Splits the file into the directory, basename, and extension.
909
910 For 'chrome/browser/browser.cc', Split() would
911 return ('chrome/browser', 'browser', '.cc')
912
913 Returns:
914 A tuple of (directory, basename, extension).
915 """
916
917 googlename = self.RepositoryName()
918 project, rest = os.path.split(googlename)
919 return (project,) + os.path.splitext(rest)
920
921 def BaseName(self):
922 """File base name - text after the final slash, before the final period."""
923 return self.Split()[1]
924
925 def Extension(self):
926 """File extension - text following the final period."""
927 return self.Split()[2]
928
929 def NoExtension(self):
930 """File has no source file extension."""
931 return '/'.join(self.Split()[0:2])
932
933 def IsSource(self):
934 """File has a source file extension."""
935 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
936
937
erg+personal@google.com05189642010-04-30 20:43:03 +0000938def _ShouldPrintError(category, confidence, linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000939 """If confidence >= verbose, category passes filter and is not suppressed."""
erg+personal@google.com05189642010-04-30 20:43:03 +0000940
941 # There are three ways we might decide not to print an error message:
942 # a "NOLINT(category)" comment appears in the source,
erg@google.com4e00b9a2009-01-12 23:05:11 +0000943 # the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com05189642010-04-30 20:43:03 +0000944 if IsErrorSuppressedByNolint(category, linenum):
945 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +0000946 if confidence < _cpplint_state.verbose_level:
947 return False
948
949 is_filtered = False
950 for one_filter in _Filters():
951 if one_filter.startswith('-'):
952 if category.startswith(one_filter[1:]):
953 is_filtered = True
954 elif one_filter.startswith('+'):
955 if category.startswith(one_filter[1:]):
956 is_filtered = False
957 else:
958 assert False # should have been checked for in SetFilter.
959 if is_filtered:
960 return False
961
962 return True
963
964
965def Error(filename, linenum, category, confidence, message):
966 """Logs the fact we've found a lint error.
967
968 We log where the error was found, and also our confidence in the error,
969 that is, how certain we are this is a legitimate style regression, and
970 not a misidentification or a use that's sometimes justified.
971
erg+personal@google.com05189642010-04-30 20:43:03 +0000972 False positives can be suppressed by the use of
973 "cpplint(category)" comments on the offending line. These are
974 parsed into _error_suppressions.
975
erg@google.com4e00b9a2009-01-12 23:05:11 +0000976 Args:
977 filename: The name of the file containing the error.
978 linenum: The number of the line containing the error.
979 category: A string used to describe the "category" this bug
980 falls under: "whitespace", say, or "runtime". Categories
981 may have a hierarchy separated by slashes: "whitespace/indent".
982 confidence: A number from 1-5 representing a confidence score for
983 the error, with 5 meaning that we are certain of the problem,
984 and 1 meaning that it could be a legitimate construct.
985 message: The error message.
986 """
erg+personal@google.com05189642010-04-30 20:43:03 +0000987 if _ShouldPrintError(category, confidence, linenum):
erg@google.coma868d2d2009-10-09 21:18:45 +0000988 _cpplint_state.IncrementErrorCount(category)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000989 if _cpplint_state.output_format == 'vs7':
990 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
991 filename, linenum, message, category, confidence))
erg@google.com02c27fd2013-05-28 21:34:34 +0000992 elif _cpplint_state.output_format == 'eclipse':
993 sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
994 filename, linenum, message, category, confidence))
erg@google.com4e00b9a2009-01-12 23:05:11 +0000995 else:
996 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
997 filename, linenum, message, category, confidence))
998
999
erg@google.com2aa59982013-10-28 19:09:25 +00001000# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001001_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1002 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1003# Matches strings. Escape codes should already be removed by ESCAPES.
1004_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
1005# Matches characters. Escape codes should already be removed by ESCAPES.
1006_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
1007# Matches multi-line C++ comments.
1008# This RE is a little bit more complicated than one might expect, because we
1009# have to take care of space removals tools so we can handle comments inside
1010# statements better.
1011# The current rule is: We only clear spaces from both sides when we're at the
1012# end of the line. Otherwise, we try to remove spaces from the right side,
1013# if this doesn't work we try on left side but only if there's a non-character
1014# on the right.
1015_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1016 r"""(\s*/\*.*\*/\s*$|
1017 /\*.*\*/\s+|
1018 \s+/\*.*\*/(?=\W)|
1019 /\*.*\*/)""", re.VERBOSE)
1020
1021
1022def IsCppString(line):
1023 """Does line terminate so, that the next symbol is in string constant.
1024
1025 This function does not consider single-line nor multi-line comments.
1026
1027 Args:
1028 line: is a partial line of code starting from the 0..n.
1029
1030 Returns:
1031 True, if next character appended to 'line' is inside a
1032 string constant.
1033 """
1034
1035 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
1036 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1037
1038
erg@google.com2aa59982013-10-28 19:09:25 +00001039def CleanseRawStrings(raw_lines):
1040 """Removes C++11 raw strings from lines.
1041
1042 Before:
1043 static const char kData[] = R"(
1044 multi-line string
1045 )";
1046
1047 After:
1048 static const char kData[] = ""
1049 (replaced by blank line)
1050 "";
1051
1052 Args:
1053 raw_lines: list of raw lines.
1054
1055 Returns:
1056 list of lines with C++11 raw strings replaced by empty strings.
1057 """
1058
1059 delimiter = None
1060 lines_without_raw_strings = []
1061 for line in raw_lines:
1062 if delimiter:
1063 # Inside a raw string, look for the end
1064 end = line.find(delimiter)
1065 if end >= 0:
1066 # Found the end of the string, match leading space for this
1067 # line and resume copying the original lines, and also insert
1068 # a "" on the last line.
1069 leading_space = Match(r'^(\s*)\S', line)
1070 line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1071 delimiter = None
1072 else:
1073 # Haven't found the end yet, append a blank line.
1074 line = ''
1075
1076 else:
1077 # Look for beginning of a raw string.
1078 # See 2.14.15 [lex.string] for syntax.
1079 matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1080 if matched:
1081 delimiter = ')' + matched.group(2) + '"'
1082
1083 end = matched.group(3).find(delimiter)
1084 if end >= 0:
1085 # Raw string ended on same line
1086 line = (matched.group(1) + '""' +
1087 matched.group(3)[end + len(delimiter):])
1088 delimiter = None
1089 else:
1090 # Start of a multi-line raw string
1091 line = matched.group(1) + '""'
1092
1093 lines_without_raw_strings.append(line)
1094
1095 # TODO(unknown): if delimiter is not None here, we might want to
1096 # emit a warning for unterminated string.
1097 return lines_without_raw_strings
1098
1099
erg@google.com4e00b9a2009-01-12 23:05:11 +00001100def FindNextMultiLineCommentStart(lines, lineix):
1101 """Find the beginning marker for a multiline comment."""
1102 while lineix < len(lines):
1103 if lines[lineix].strip().startswith('/*'):
1104 # Only return this marker if the comment goes beyond this line
1105 if lines[lineix].strip().find('*/', 2) < 0:
1106 return lineix
1107 lineix += 1
1108 return len(lines)
1109
1110
1111def FindNextMultiLineCommentEnd(lines, lineix):
1112 """We are inside a comment, find the end marker."""
1113 while lineix < len(lines):
1114 if lines[lineix].strip().endswith('*/'):
1115 return lineix
1116 lineix += 1
1117 return len(lines)
1118
1119
1120def RemoveMultiLineCommentsFromRange(lines, begin, end):
1121 """Clears a range of lines for multi-line comments."""
1122 # Having // dummy comments makes the lines non-empty, so we will not get
1123 # unnecessary blank line warnings later in the code.
1124 for i in range(begin, end):
1125 lines[i] = '// dummy'
1126
1127
1128def RemoveMultiLineComments(filename, lines, error):
1129 """Removes multiline (c-style) comments from lines."""
1130 lineix = 0
1131 while lineix < len(lines):
1132 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1133 if lineix_begin >= len(lines):
1134 return
1135 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1136 if lineix_end >= len(lines):
1137 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1138 'Could not find end of multi-line comment')
1139 return
1140 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1141 lineix = lineix_end + 1
1142
1143
1144def CleanseComments(line):
1145 """Removes //-comments and single-line C-style /* */ comments.
1146
1147 Args:
1148 line: A line of C++ source.
1149
1150 Returns:
1151 The line with single-line comments removed.
1152 """
1153 commentpos = line.find('//')
1154 if commentpos != -1 and not IsCppString(line[:commentpos]):
erg@google.comd7d27472011-09-07 17:36:35 +00001155 line = line[:commentpos].rstrip()
erg@google.com4e00b9a2009-01-12 23:05:11 +00001156 # get rid of /* ... */
1157 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1158
1159
erg@google.coma87abb82009-02-24 01:41:01 +00001160class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001161 """Holds 3 copies of all lines with different preprocessing applied to them.
1162
1163 1) elided member contains lines without strings and comments,
1164 2) lines member contains lines without comments, and
erg@google.comd350fe52013-01-14 17:51:48 +00001165 3) raw_lines member contains all the lines without processing.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001166 All these three members are of <type 'list'>, and of the same length.
1167 """
1168
1169 def __init__(self, lines):
1170 self.elided = []
1171 self.lines = []
1172 self.raw_lines = lines
1173 self.num_lines = len(lines)
erg@google.com2aa59982013-10-28 19:09:25 +00001174 self.lines_without_raw_strings = CleanseRawStrings(lines)
1175 for linenum in range(len(self.lines_without_raw_strings)):
1176 self.lines.append(CleanseComments(
1177 self.lines_without_raw_strings[linenum]))
1178 elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
erg@google.com4e00b9a2009-01-12 23:05:11 +00001179 self.elided.append(CleanseComments(elided))
1180
1181 def NumLines(self):
1182 """Returns the number of lines represented."""
1183 return self.num_lines
1184
1185 @staticmethod
1186 def _CollapseStrings(elided):
1187 """Collapses strings and chars on a line to simple "" or '' blocks.
1188
1189 We nix strings first so we're not fooled by text like '"http://"'
1190
1191 Args:
1192 elided: The line being processed.
1193
1194 Returns:
1195 The line with collapsed strings.
1196 """
1197 if not _RE_PATTERN_INCLUDE.match(elided):
1198 # Remove escaped characters first to make quote/single quote collapsing
1199 # basic. Things that look like escaped characters shouldn't occur
1200 # outside of strings and chars.
1201 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1202 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
1203 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
1204 return elided
1205
1206
erg@google.comd350fe52013-01-14 17:51:48 +00001207def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
1208 """Find the position just after the matching endchar.
1209
1210 Args:
1211 line: a CleansedLines line.
1212 startpos: start searching at this position.
1213 depth: nesting level at startpos.
1214 startchar: expression opening character.
1215 endchar: expression closing character.
1216
1217 Returns:
erg@google.com2aa59982013-10-28 19:09:25 +00001218 On finding matching endchar: (index just after matching endchar, 0)
1219 Otherwise: (-1, new depth at end of this line)
erg@google.comd350fe52013-01-14 17:51:48 +00001220 """
1221 for i in xrange(startpos, len(line)):
1222 if line[i] == startchar:
1223 depth += 1
1224 elif line[i] == endchar:
1225 depth -= 1
1226 if depth == 0:
erg@google.com2aa59982013-10-28 19:09:25 +00001227 return (i + 1, 0)
1228 return (-1, depth)
erg@google.comd350fe52013-01-14 17:51:48 +00001229
1230
erg@google.com4e00b9a2009-01-12 23:05:11 +00001231def CloseExpression(clean_lines, linenum, pos):
erg@google.com2aa59982013-10-28 19:09:25 +00001232 """If input points to ( or { or [ or <, finds the position that closes it.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001233
erg@google.com2aa59982013-10-28 19:09:25 +00001234 If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
erg@google.com4e00b9a2009-01-12 23:05:11 +00001235 linenum/pos that correspond to the closing of the expression.
1236
1237 Args:
1238 clean_lines: A CleansedLines instance containing the file.
1239 linenum: The number of the line to check.
1240 pos: A position on the line.
1241
1242 Returns:
1243 A tuple (line, linenum, pos) pointer *past* the closing brace, or
1244 (line, len(lines), -1) if we never find a close. Note we ignore
1245 strings and comments when matching; and the line we return is the
1246 'cleansed' line at linenum.
1247 """
1248
1249 line = clean_lines.elided[linenum]
1250 startchar = line[pos]
erg@google.com2aa59982013-10-28 19:09:25 +00001251 if startchar not in '({[<':
erg@google.com4e00b9a2009-01-12 23:05:11 +00001252 return (line, clean_lines.NumLines(), -1)
1253 if startchar == '(': endchar = ')'
1254 if startchar == '[': endchar = ']'
1255 if startchar == '{': endchar = '}'
erg@google.com2aa59982013-10-28 19:09:25 +00001256 if startchar == '<': endchar = '>'
erg@google.com4e00b9a2009-01-12 23:05:11 +00001257
erg@google.comd350fe52013-01-14 17:51:48 +00001258 # Check first line
erg@google.com2aa59982013-10-28 19:09:25 +00001259 (end_pos, num_open) = FindEndOfExpressionInLine(
1260 line, pos, 0, startchar, endchar)
erg@google.comd350fe52013-01-14 17:51:48 +00001261 if end_pos > -1:
1262 return (line, linenum, end_pos)
erg@google.com2aa59982013-10-28 19:09:25 +00001263
1264 # Continue scanning forward
erg@google.comd350fe52013-01-14 17:51:48 +00001265 while linenum < clean_lines.NumLines() - 1:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001266 linenum += 1
1267 line = clean_lines.elided[linenum]
erg@google.com2aa59982013-10-28 19:09:25 +00001268 (end_pos, num_open) = FindEndOfExpressionInLine(
1269 line, 0, num_open, startchar, endchar)
1270 if end_pos > -1:
1271 return (line, linenum, end_pos)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001272
erg@google.comd350fe52013-01-14 17:51:48 +00001273 # Did not find endchar before end of file, give up
1274 return (line, clean_lines.NumLines(), -1)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001275
erg@google.com2aa59982013-10-28 19:09:25 +00001276
1277def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
1278 """Find position at the matching startchar.
1279
1280 This is almost the reverse of FindEndOfExpressionInLine, but note
1281 that the input position and returned position differs by 1.
1282
1283 Args:
1284 line: a CleansedLines line.
1285 endpos: start searching at this position.
1286 depth: nesting level at endpos.
1287 startchar: expression opening character.
1288 endchar: expression closing character.
1289
1290 Returns:
1291 On finding matching startchar: (index at matching startchar, 0)
1292 Otherwise: (-1, new depth at beginning of this line)
1293 """
1294 for i in xrange(endpos, -1, -1):
1295 if line[i] == endchar:
1296 depth += 1
1297 elif line[i] == startchar:
1298 depth -= 1
1299 if depth == 0:
1300 return (i, 0)
1301 return (-1, depth)
1302
1303
1304def ReverseCloseExpression(clean_lines, linenum, pos):
1305 """If input points to ) or } or ] or >, finds the position that opens it.
1306
1307 If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1308 linenum/pos that correspond to the opening of the expression.
1309
1310 Args:
1311 clean_lines: A CleansedLines instance containing the file.
1312 linenum: The number of the line to check.
1313 pos: A position on the line.
1314
1315 Returns:
1316 A tuple (line, linenum, pos) pointer *at* the opening brace, or
1317 (line, 0, -1) if we never find the matching opening brace. Note
1318 we ignore strings and comments when matching; and the line we
1319 return is the 'cleansed' line at linenum.
1320 """
1321 line = clean_lines.elided[linenum]
1322 endchar = line[pos]
1323 if endchar not in ')}]>':
1324 return (line, 0, -1)
1325 if endchar == ')': startchar = '('
1326 if endchar == ']': startchar = '['
1327 if endchar == '}': startchar = '{'
1328 if endchar == '>': startchar = '<'
1329
1330 # Check last line
1331 (start_pos, num_open) = FindStartOfExpressionInLine(
1332 line, pos, 0, startchar, endchar)
1333 if start_pos > -1:
1334 return (line, linenum, start_pos)
1335
1336 # Continue scanning backward
1337 while linenum > 0:
1338 linenum -= 1
1339 line = clean_lines.elided[linenum]
1340 (start_pos, num_open) = FindStartOfExpressionInLine(
1341 line, len(line) - 1, num_open, startchar, endchar)
1342 if start_pos > -1:
1343 return (line, linenum, start_pos)
1344
1345 # Did not find startchar before beginning of file, give up
1346 return (line, 0, -1)
1347
1348
erg@google.com4e00b9a2009-01-12 23:05:11 +00001349def CheckForCopyright(filename, lines, error):
1350 """Logs an error if no Copyright message appears at the top of the file."""
1351
1352 # We'll say it should occur by line 10. Don't forget there's a
1353 # dummy line at the front.
1354 for line in xrange(1, min(len(lines), 11)):
1355 if re.search(r'Copyright', lines[line], re.I): break
1356 else: # means no copyright line was found
1357 error(filename, 0, 'legal/copyright', 5,
1358 'No copyright message found. '
1359 'You should have a line: "Copyright [year] <Copyright Owner>"')
1360
1361
1362def GetHeaderGuardCPPVariable(filename):
1363 """Returns the CPP variable that should be used as a header guard.
1364
1365 Args:
1366 filename: The name of a C++ header file.
1367
1368 Returns:
1369 The CPP variable that should be used as a header guard in the
1370 named file.
1371
1372 """
1373
erg+personal@google.com05189642010-04-30 20:43:03 +00001374 # Restores original filename in case that cpplint is invoked from Emacs's
1375 # flymake.
1376 filename = re.sub(r'_flymake\.h$', '.h', filename)
erg@google.comd350fe52013-01-14 17:51:48 +00001377 filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
erg+personal@google.com05189642010-04-30 20:43:03 +00001378
erg@google.com4e00b9a2009-01-12 23:05:11 +00001379 fileinfo = FileInfo(filename)
erg@google.com4d70a882013-04-16 21:06:32 +00001380 file_path_from_root = fileinfo.RepositoryName()
1381 if _root:
1382 file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1383 return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
erg@google.com4e00b9a2009-01-12 23:05:11 +00001384
1385
1386def CheckForHeaderGuard(filename, lines, error):
1387 """Checks that the file contains a header guard.
1388
erg@google.coma87abb82009-02-24 01:41:01 +00001389 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +00001390 headers, checks that the full pathname is used.
1391
1392 Args:
1393 filename: The name of the C++ header file.
1394 lines: An array of strings, each representing a line of the file.
1395 error: The function to call with any errors found.
1396 """
1397
1398 cppvar = GetHeaderGuardCPPVariable(filename)
1399
1400 ifndef = None
1401 ifndef_linenum = 0
1402 define = None
1403 endif = None
1404 endif_linenum = 0
1405 for linenum, line in enumerate(lines):
1406 linesplit = line.split()
1407 if len(linesplit) >= 2:
1408 # find the first occurrence of #ifndef and #define, save arg
1409 if not ifndef and linesplit[0] == '#ifndef':
1410 # set ifndef to the header guard presented on the #ifndef line.
1411 ifndef = linesplit[1]
1412 ifndef_linenum = linenum
1413 if not define and linesplit[0] == '#define':
1414 define = linesplit[1]
1415 # find the last occurrence of #endif, save entire line
1416 if line.startswith('#endif'):
1417 endif = line
1418 endif_linenum = linenum
1419
erg@google.comdc289702012-01-26 20:30:03 +00001420 if not ifndef:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001421 error(filename, 0, 'build/header_guard', 5,
1422 'No #ifndef header guard found, suggested CPP variable is: %s' %
1423 cppvar)
1424 return
1425
erg@google.comdc289702012-01-26 20:30:03 +00001426 if not define:
1427 error(filename, 0, 'build/header_guard', 5,
1428 'No #define header guard found, suggested CPP variable is: %s' %
1429 cppvar)
1430 return
1431
erg@google.com4e00b9a2009-01-12 23:05:11 +00001432 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1433 # for backward compatibility.
erg+personal@google.com05189642010-04-30 20:43:03 +00001434 if ifndef != cppvar:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001435 error_level = 0
1436 if ifndef != cppvar + '_':
1437 error_level = 5
1438
erg+personal@google.com05189642010-04-30 20:43:03 +00001439 ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1440 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001441 error(filename, ifndef_linenum, 'build/header_guard', error_level,
1442 '#ifndef header guard has wrong style, please use: %s' % cppvar)
1443
erg@google.comdc289702012-01-26 20:30:03 +00001444 if define != ifndef:
1445 error(filename, 0, 'build/header_guard', 5,
1446 '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1447 cppvar)
1448 return
1449
erg+personal@google.com05189642010-04-30 20:43:03 +00001450 if endif != ('#endif // %s' % cppvar):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001451 error_level = 0
1452 if endif != ('#endif // %s' % (cppvar + '_')):
1453 error_level = 5
1454
erg+personal@google.com05189642010-04-30 20:43:03 +00001455 ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1456 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001457 error(filename, endif_linenum, 'build/header_guard', error_level,
1458 '#endif line should be "#endif // %s"' % cppvar)
1459
1460
erg@google.com2aa59982013-10-28 19:09:25 +00001461def CheckForBadCharacters(filename, lines, error):
1462 """Logs an error for each line containing bad characters.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001463
erg@google.com2aa59982013-10-28 19:09:25 +00001464 Two kinds of bad characters:
1465
1466 1. Unicode replacement characters: These indicate that either the file
1467 contained invalid UTF-8 (likely) or Unicode replacement characters (which
1468 it shouldn't). Note that it's possible for this to throw off line
1469 numbering if the invalid UTF-8 occurred adjacent to a newline.
1470
1471 2. NUL bytes. These are problematic for some tools.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001472
1473 Args:
1474 filename: The name of the current file.
1475 lines: An array of strings, each representing a line of the file.
1476 error: The function to call with any errors found.
1477 """
1478 for linenum, line in enumerate(lines):
1479 if u'\ufffd' in line:
1480 error(filename, linenum, 'readability/utf8', 5,
1481 'Line contains invalid UTF-8 (or Unicode replacement character).')
erg@google.com2aa59982013-10-28 19:09:25 +00001482 if '\0' in line:
1483 error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00001484
1485
1486def CheckForNewlineAtEOF(filename, lines, error):
1487 """Logs an error if there is no newline char at the end of the file.
1488
1489 Args:
1490 filename: The name of the current file.
1491 lines: An array of strings, each representing a line of the file.
1492 error: The function to call with any errors found.
1493 """
1494
1495 # The array lines() was created by adding two newlines to the
1496 # original file (go figure), then splitting on \n.
1497 # To verify that the file ends in \n, we just have to make sure the
1498 # last-but-two element of lines() exists and is empty.
1499 if len(lines) < 3 or lines[-2]:
1500 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1501 'Could not find a newline character at the end of the file.')
1502
1503
1504def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1505 """Logs an error if we see /* ... */ or "..." that extend past one line.
1506
1507 /* ... */ comments are legit inside macros, for one line.
1508 Otherwise, we prefer // comments, so it's ok to warn about the
1509 other. Likewise, it's ok for strings to extend across multiple
1510 lines, as long as a line continuation character (backslash)
1511 terminates each line. Although not currently prohibited by the C++
1512 style guide, it's ugly and unnecessary. We don't do well with either
1513 in this lint program, so we warn about both.
1514
1515 Args:
1516 filename: The name of the current file.
1517 clean_lines: A CleansedLines instance containing the file.
1518 linenum: The number of the line to check.
1519 error: The function to call with any errors found.
1520 """
1521 line = clean_lines.elided[linenum]
1522
1523 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1524 # second (escaped) slash may trigger later \" detection erroneously.
1525 line = line.replace('\\\\', '')
1526
1527 if line.count('/*') > line.count('*/'):
1528 error(filename, linenum, 'readability/multiline_comment', 5,
1529 'Complex multi-line /*...*/-style comment found. '
1530 'Lint may give bogus warnings. '
1531 'Consider replacing these with //-style comments, '
1532 'with #if 0...#endif, '
1533 'or with more clearly structured multi-line comments.')
1534
1535 if (line.count('"') - line.count('\\"')) % 2:
1536 error(filename, linenum, 'readability/multiline_string', 5,
1537 'Multi-line string ("...") found. This lint script doesn\'t '
erg@google.com2aa59982013-10-28 19:09:25 +00001538 'do well with such strings, and may give bogus warnings. '
1539 'Use C++11 raw strings or concatenation instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00001540
1541
1542threading_list = (
1543 ('asctime(', 'asctime_r('),
1544 ('ctime(', 'ctime_r('),
1545 ('getgrgid(', 'getgrgid_r('),
1546 ('getgrnam(', 'getgrnam_r('),
1547 ('getlogin(', 'getlogin_r('),
1548 ('getpwnam(', 'getpwnam_r('),
1549 ('getpwuid(', 'getpwuid_r('),
1550 ('gmtime(', 'gmtime_r('),
1551 ('localtime(', 'localtime_r('),
1552 ('rand(', 'rand_r('),
erg@google.com4e00b9a2009-01-12 23:05:11 +00001553 ('strtok(', 'strtok_r('),
1554 ('ttyname(', 'ttyname_r('),
1555 )
1556
1557
1558def CheckPosixThreading(filename, clean_lines, linenum, error):
1559 """Checks for calls to thread-unsafe functions.
1560
1561 Much code has been originally written without consideration of
1562 multi-threading. Also, engineers are relying on their old experience;
1563 they have learned posix before threading extensions were added. These
1564 tests guide the engineers to use thread-safe functions (when using
1565 posix directly).
1566
1567 Args:
1568 filename: The name of the current file.
1569 clean_lines: A CleansedLines instance containing the file.
1570 linenum: The number of the line to check.
1571 error: The function to call with any errors found.
1572 """
1573 line = clean_lines.elided[linenum]
1574 for single_thread_function, multithread_safe_function in threading_list:
1575 ix = line.find(single_thread_function)
erg@google.com2aa59982013-10-28 19:09:25 +00001576 # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com4e00b9a2009-01-12 23:05:11 +00001577 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1578 line[ix - 1] not in ('_', '.', '>'))):
1579 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1580 'Consider using ' + multithread_safe_function +
1581 '...) instead of ' + single_thread_function +
1582 '...) for improved thread safety.')
1583
1584
erg@google.com2aa59982013-10-28 19:09:25 +00001585def CheckVlogArguments(filename, clean_lines, linenum, error):
1586 """Checks that VLOG() is only used for defining a logging level.
1587
1588 For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
1589 VLOG(FATAL) are not.
1590
1591 Args:
1592 filename: The name of the current file.
1593 clean_lines: A CleansedLines instance containing the file.
1594 linenum: The number of the line to check.
1595 error: The function to call with any errors found.
1596 """
1597 line = clean_lines.elided[linenum]
1598 if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
1599 error(filename, linenum, 'runtime/vlog', 5,
1600 'VLOG() should be used with numeric verbosity level. '
1601 'Use LOG() if you want symbolic severity levels.')
1602
1603
erg@google.coma868d2d2009-10-09 21:18:45 +00001604# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com36649102009-03-25 21:18:36 +00001605# incrementing a value.
erg@google.coma868d2d2009-10-09 21:18:45 +00001606_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com36649102009-03-25 21:18:36 +00001607 r'^\s*\*\w+(\+\+|--);')
1608
1609
1610def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.coma868d2d2009-10-09 21:18:45 +00001611 """Checks for invalid increment *count++.
erg@google.com36649102009-03-25 21:18:36 +00001612
1613 For example following function:
1614 void increment_counter(int* count) {
1615 *count++;
1616 }
1617 is invalid, because it effectively does count++, moving pointer, and should
1618 be replaced with ++*count, (*count)++ or *count += 1.
1619
1620 Args:
1621 filename: The name of the current file.
1622 clean_lines: A CleansedLines instance containing the file.
1623 linenum: The number of the line to check.
1624 error: The function to call with any errors found.
1625 """
1626 line = clean_lines.elided[linenum]
erg@google.coma868d2d2009-10-09 21:18:45 +00001627 if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com36649102009-03-25 21:18:36 +00001628 error(filename, linenum, 'runtime/invalid_increment', 5,
1629 'Changing pointer instead of value (or unused value of operator*).')
1630
1631
erg@google.comd350fe52013-01-14 17:51:48 +00001632class _BlockInfo(object):
1633 """Stores information about a generic block of code."""
1634
1635 def __init__(self, seen_open_brace):
1636 self.seen_open_brace = seen_open_brace
1637 self.open_parentheses = 0
1638 self.inline_asm = _NO_ASM
1639
1640 def CheckBegin(self, filename, clean_lines, linenum, error):
1641 """Run checks that applies to text up to the opening brace.
1642
1643 This is mostly for checking the text after the class identifier
1644 and the "{", usually where the base class is specified. For other
1645 blocks, there isn't much to check, so we always pass.
1646
1647 Args:
1648 filename: The name of the current file.
1649 clean_lines: A CleansedLines instance containing the file.
1650 linenum: The number of the line to check.
1651 error: The function to call with any errors found.
1652 """
1653 pass
1654
1655 def CheckEnd(self, filename, clean_lines, linenum, error):
1656 """Run checks that applies to text after the closing brace.
1657
1658 This is mostly used for checking end of namespace comments.
1659
1660 Args:
1661 filename: The name of the current file.
1662 clean_lines: A CleansedLines instance containing the file.
1663 linenum: The number of the line to check.
1664 error: The function to call with any errors found.
1665 """
1666 pass
1667
1668
1669class _ClassInfo(_BlockInfo):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001670 """Stores information about a class."""
1671
erg@google.comd350fe52013-01-14 17:51:48 +00001672 def __init__(self, name, class_or_struct, clean_lines, linenum):
1673 _BlockInfo.__init__(self, False)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001674 self.name = name
erg@google.comd350fe52013-01-14 17:51:48 +00001675 self.starting_linenum = linenum
erg@google.com4e00b9a2009-01-12 23:05:11 +00001676 self.is_derived = False
erg@google.comd350fe52013-01-14 17:51:48 +00001677 if class_or_struct == 'struct':
1678 self.access = 'public'
erg@google.comfd5da632013-10-25 17:39:45 +00001679 self.is_struct = True
erg@google.comd350fe52013-01-14 17:51:48 +00001680 else:
1681 self.access = 'private'
erg@google.comfd5da632013-10-25 17:39:45 +00001682 self.is_struct = False
1683
1684 # Remember initial indentation level for this class. Using raw_lines here
erg@google.comc6671232013-10-25 21:44:03 +00001685 # instead of elided to account for leading comments.
erg@google.comfd5da632013-10-25 17:39:45 +00001686 initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
1687 if initial_indent:
1688 self.class_indent = len(initial_indent.group(1))
1689 else:
1690 self.class_indent = 0
erg@google.com4e00b9a2009-01-12 23:05:11 +00001691
erg@google.com8a95ecc2011-09-08 00:45:54 +00001692 # Try to find the end of the class. This will be confused by things like:
1693 # class A {
1694 # } *x = { ...
1695 #
1696 # But it's still good enough for CheckSectionSpacing.
1697 self.last_line = 0
1698 depth = 0
1699 for i in range(linenum, clean_lines.NumLines()):
erg@google.comd350fe52013-01-14 17:51:48 +00001700 line = clean_lines.elided[i]
erg@google.com8a95ecc2011-09-08 00:45:54 +00001701 depth += line.count('{') - line.count('}')
1702 if not depth:
1703 self.last_line = i
1704 break
1705
erg@google.comd350fe52013-01-14 17:51:48 +00001706 def CheckBegin(self, filename, clean_lines, linenum, error):
1707 # Look for a bare ':'
1708 if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1709 self.is_derived = True
erg@google.com4e00b9a2009-01-12 23:05:11 +00001710
erg@google.comfd5da632013-10-25 17:39:45 +00001711 def CheckEnd(self, filename, clean_lines, linenum, error):
1712 # Check that closing brace is aligned with beginning of the class.
1713 # Only do this if the closing brace is indented by only whitespaces.
1714 # This means we will not check single-line class definitions.
1715 indent = Match(r'^( *)\}', clean_lines.elided[linenum])
1716 if indent and len(indent.group(1)) != self.class_indent:
1717 if self.is_struct:
1718 parent = 'struct ' + self.name
1719 else:
1720 parent = 'class ' + self.name
1721 error(filename, linenum, 'whitespace/indent', 3,
1722 'Closing brace should be aligned with beginning of %s' % parent)
1723
erg@google.com4e00b9a2009-01-12 23:05:11 +00001724
erg@google.comd350fe52013-01-14 17:51:48 +00001725class _NamespaceInfo(_BlockInfo):
1726 """Stores information about a namespace."""
1727
1728 def __init__(self, name, linenum):
1729 _BlockInfo.__init__(self, False)
1730 self.name = name or ''
1731 self.starting_linenum = linenum
1732
1733 def CheckEnd(self, filename, clean_lines, linenum, error):
1734 """Check end of namespace comments."""
1735 line = clean_lines.raw_lines[linenum]
1736
1737 # Check how many lines is enclosed in this namespace. Don't issue
1738 # warning for missing namespace comments if there aren't enough
1739 # lines. However, do apply checks if there is already an end of
1740 # namespace comment and it's incorrect.
1741 #
1742 # TODO(unknown): We always want to check end of namespace comments
1743 # if a namespace is large, but sometimes we also want to apply the
1744 # check if a short namespace contained nontrivial things (something
1745 # other than forward declarations). There is currently no logic on
1746 # deciding what these nontrivial things are, so this check is
1747 # triggered by namespace size only, which works most of the time.
1748 if (linenum - self.starting_linenum < 10
1749 and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
1750 return
1751
1752 # Look for matching comment at end of namespace.
1753 #
1754 # Note that we accept C style "/* */" comments for terminating
1755 # namespaces, so that code that terminate namespaces inside
erg@google.comc6671232013-10-25 21:44:03 +00001756 # preprocessor macros can be cpplint clean.
erg@google.comd350fe52013-01-14 17:51:48 +00001757 #
1758 # We also accept stuff like "// end of namespace <name>." with the
1759 # period at the end.
1760 #
1761 # Besides these, we don't accept anything else, otherwise we might
1762 # get false negatives when existing comment is a substring of the
erg@google.comc6671232013-10-25 21:44:03 +00001763 # expected namespace.
erg@google.comd350fe52013-01-14 17:51:48 +00001764 if self.name:
1765 # Named namespace
1766 if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
1767 r'[\*/\.\\\s]*$'),
1768 line):
1769 error(filename, linenum, 'readability/namespace', 5,
1770 'Namespace should be terminated with "// namespace %s"' %
1771 self.name)
1772 else:
1773 # Anonymous namespace
1774 if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
1775 error(filename, linenum, 'readability/namespace', 5,
1776 'Namespace should be terminated with "// namespace"')
1777
1778
1779class _PreprocessorInfo(object):
1780 """Stores checkpoints of nesting stacks when #if/#else is seen."""
1781
1782 def __init__(self, stack_before_if):
1783 # The entire nesting stack before #if
1784 self.stack_before_if = stack_before_if
1785
1786 # The entire nesting stack up to #else
1787 self.stack_before_else = []
1788
1789 # Whether we have already seen #else or #elif
1790 self.seen_else = False
1791
1792
1793class _NestingState(object):
1794 """Holds states related to parsing braces."""
erg@google.com4e00b9a2009-01-12 23:05:11 +00001795
1796 def __init__(self):
erg@google.comd350fe52013-01-14 17:51:48 +00001797 # Stack for tracking all braces. An object is pushed whenever we
1798 # see a "{", and popped when we see a "}". Only 3 types of
1799 # objects are possible:
1800 # - _ClassInfo: a class or struct.
1801 # - _NamespaceInfo: a namespace.
1802 # - _BlockInfo: some other type of block.
1803 self.stack = []
erg@google.com4e00b9a2009-01-12 23:05:11 +00001804
erg@google.comd350fe52013-01-14 17:51:48 +00001805 # Stack of _PreprocessorInfo objects.
1806 self.pp_stack = []
1807
1808 def SeenOpenBrace(self):
1809 """Check if we have seen the opening brace for the innermost block.
1810
1811 Returns:
1812 True if we have seen the opening brace, False if the innermost
1813 block is still expecting an opening brace.
1814 """
1815 return (not self.stack) or self.stack[-1].seen_open_brace
1816
1817 def InNamespaceBody(self):
1818 """Check if we are currently one level inside a namespace body.
1819
1820 Returns:
1821 True if top of the stack is a namespace block, False otherwise.
1822 """
1823 return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
1824
1825 def UpdatePreprocessor(self, line):
1826 """Update preprocessor stack.
1827
1828 We need to handle preprocessors due to classes like this:
1829 #ifdef SWIG
1830 struct ResultDetailsPageElementExtensionPoint {
1831 #else
1832 struct ResultDetailsPageElementExtensionPoint : public Extension {
1833 #endif
erg@google.comd350fe52013-01-14 17:51:48 +00001834
1835 We make the following assumptions (good enough for most files):
1836 - Preprocessor condition evaluates to true from #if up to first
1837 #else/#elif/#endif.
1838
1839 - Preprocessor condition evaluates to false from #else/#elif up
1840 to #endif. We still perform lint checks on these lines, but
1841 these do not affect nesting stack.
1842
1843 Args:
1844 line: current line to check.
1845 """
1846 if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
1847 # Beginning of #if block, save the nesting stack here. The saved
1848 # stack will allow us to restore the parsing state in the #else case.
1849 self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
1850 elif Match(r'^\s*#\s*(else|elif)\b', line):
1851 # Beginning of #else block
1852 if self.pp_stack:
1853 if not self.pp_stack[-1].seen_else:
1854 # This is the first #else or #elif block. Remember the
1855 # whole nesting stack up to this point. This is what we
1856 # keep after the #endif.
1857 self.pp_stack[-1].seen_else = True
1858 self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
1859
1860 # Restore the stack to how it was before the #if
1861 self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
1862 else:
1863 # TODO(unknown): unexpected #else, issue warning?
1864 pass
1865 elif Match(r'^\s*#\s*endif\b', line):
1866 # End of #if or #else blocks.
1867 if self.pp_stack:
1868 # If we saw an #else, we will need to restore the nesting
1869 # stack to its former state before the #else, otherwise we
1870 # will just continue from where we left off.
1871 if self.pp_stack[-1].seen_else:
1872 # Here we can just use a shallow copy since we are the last
1873 # reference to it.
1874 self.stack = self.pp_stack[-1].stack_before_else
1875 # Drop the corresponding #if
1876 self.pp_stack.pop()
1877 else:
1878 # TODO(unknown): unexpected #endif, issue warning?
1879 pass
1880
1881 def Update(self, filename, clean_lines, linenum, error):
1882 """Update nesting state with current line.
1883
1884 Args:
1885 filename: The name of the current file.
1886 clean_lines: A CleansedLines instance containing the file.
1887 linenum: The number of the line to check.
1888 error: The function to call with any errors found.
1889 """
1890 line = clean_lines.elided[linenum]
1891
1892 # Update pp_stack first
1893 self.UpdatePreprocessor(line)
1894
1895 # Count parentheses. This is to avoid adding struct arguments to
1896 # the nesting stack.
1897 if self.stack:
1898 inner_block = self.stack[-1]
1899 depth_change = line.count('(') - line.count(')')
1900 inner_block.open_parentheses += depth_change
1901
1902 # Also check if we are starting or ending an inline assembly block.
1903 if inner_block.inline_asm in (_NO_ASM, _END_ASM):
1904 if (depth_change != 0 and
1905 inner_block.open_parentheses == 1 and
1906 _MATCH_ASM.match(line)):
1907 # Enter assembly block
1908 inner_block.inline_asm = _INSIDE_ASM
1909 else:
1910 # Not entering assembly block. If previous line was _END_ASM,
1911 # we will now shift to _NO_ASM state.
1912 inner_block.inline_asm = _NO_ASM
1913 elif (inner_block.inline_asm == _INSIDE_ASM and
1914 inner_block.open_parentheses == 0):
1915 # Exit assembly block
1916 inner_block.inline_asm = _END_ASM
1917
1918 # Consume namespace declaration at the beginning of the line. Do
1919 # this in a loop so that we catch same line declarations like this:
1920 # namespace proto2 { namespace bridge { class MessageSet; } }
1921 while True:
1922 # Match start of namespace. The "\b\s*" below catches namespace
1923 # declarations even if it weren't followed by a whitespace, this
1924 # is so that we don't confuse our namespace checker. The
1925 # missing spaces will be flagged by CheckSpacing.
1926 namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
1927 if not namespace_decl_match:
1928 break
1929
1930 new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
1931 self.stack.append(new_namespace)
1932
1933 line = namespace_decl_match.group(2)
1934 if line.find('{') != -1:
1935 new_namespace.seen_open_brace = True
1936 line = line[line.find('{') + 1:]
1937
1938 # Look for a class declaration in whatever is left of the line
1939 # after parsing namespaces. The regexp accounts for decorated classes
1940 # such as in:
1941 # class LOCKABLE API Object {
1942 # };
1943 #
1944 # Templates with class arguments may confuse the parser, for example:
1945 # template <class T
1946 # class Comparator = less<T>,
1947 # class Vector = vector<T> >
1948 # class HeapQueue {
1949 #
1950 # Because this parser has no nesting state about templates, by the
1951 # time it saw "class Comparator", it may think that it's a new class.
1952 # Nested templates have a similar problem:
1953 # template <
1954 # typename ExportedType,
1955 # typename TupleType,
1956 # template <typename, typename> class ImplTemplate>
1957 #
1958 # To avoid these cases, we ignore classes that are followed by '=' or '>'
1959 class_decl_match = Match(
1960 r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
erg@google.comfd5da632013-10-25 17:39:45 +00001961 r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
1962 r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line)
erg@google.comd350fe52013-01-14 17:51:48 +00001963 if (class_decl_match and
1964 (not self.stack or self.stack[-1].open_parentheses == 0)):
1965 self.stack.append(_ClassInfo(
1966 class_decl_match.group(4), class_decl_match.group(2),
1967 clean_lines, linenum))
1968 line = class_decl_match.group(5)
1969
1970 # If we have not yet seen the opening brace for the innermost block,
1971 # run checks here.
1972 if not self.SeenOpenBrace():
1973 self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
1974
1975 # Update access control if we are inside a class/struct
1976 if self.stack and isinstance(self.stack[-1], _ClassInfo):
erg@google.comfd5da632013-10-25 17:39:45 +00001977 classinfo = self.stack[-1]
1978 access_match = Match(
1979 r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
1980 r':(?:[^:]|$)',
1981 line)
erg@google.comd350fe52013-01-14 17:51:48 +00001982 if access_match:
erg@google.comfd5da632013-10-25 17:39:45 +00001983 classinfo.access = access_match.group(2)
1984
1985 # Check that access keywords are indented +1 space. Skip this
erg@google.comc6671232013-10-25 21:44:03 +00001986 # check if the keywords are not preceded by whitespaces.
erg@google.comfd5da632013-10-25 17:39:45 +00001987 indent = access_match.group(1)
1988 if (len(indent) != classinfo.class_indent + 1 and
1989 Match(r'^\s*$', indent)):
1990 if classinfo.is_struct:
1991 parent = 'struct ' + classinfo.name
1992 else:
1993 parent = 'class ' + classinfo.name
1994 slots = ''
1995 if access_match.group(3):
1996 slots = access_match.group(3)
1997 error(filename, linenum, 'whitespace/indent', 3,
1998 '%s%s: should be indented +1 space inside %s' % (
1999 access_match.group(2), slots, parent))
erg@google.comd350fe52013-01-14 17:51:48 +00002000
2001 # Consume braces or semicolons from what's left of the line
2002 while True:
2003 # Match first brace, semicolon, or closed parenthesis.
2004 matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2005 if not matched:
2006 break
2007
2008 token = matched.group(1)
2009 if token == '{':
2010 # If namespace or class hasn't seen a opening brace yet, mark
2011 # namespace/class head as complete. Push a new block onto the
2012 # stack otherwise.
2013 if not self.SeenOpenBrace():
2014 self.stack[-1].seen_open_brace = True
2015 else:
2016 self.stack.append(_BlockInfo(True))
2017 if _MATCH_ASM.match(line):
2018 self.stack[-1].inline_asm = _BLOCK_ASM
2019 elif token == ';' or token == ')':
2020 # If we haven't seen an opening brace yet, but we already saw
2021 # a semicolon, this is probably a forward declaration. Pop
2022 # the stack for these.
2023 #
2024 # Similarly, if we haven't seen an opening brace yet, but we
2025 # already saw a closing parenthesis, then these are probably
2026 # function arguments with extra "class" or "struct" keywords.
2027 # Also pop these stack for these.
2028 if not self.SeenOpenBrace():
2029 self.stack.pop()
2030 else: # token == '}'
2031 # Perform end of block checks and pop the stack.
2032 if self.stack:
2033 self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2034 self.stack.pop()
2035 line = matched.group(2)
2036
2037 def InnermostClass(self):
2038 """Get class info on the top of the stack.
2039
2040 Returns:
2041 A _ClassInfo object if we are inside a class, or None otherwise.
2042 """
2043 for i in range(len(self.stack), 0, -1):
2044 classinfo = self.stack[i - 1]
2045 if isinstance(classinfo, _ClassInfo):
2046 return classinfo
2047 return None
2048
erg@google.com2aa59982013-10-28 19:09:25 +00002049 def CheckCompletedBlocks(self, filename, error):
2050 """Checks that all classes and namespaces have been completely parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002051
2052 Call this when all lines in a file have been processed.
2053 Args:
2054 filename: The name of the current file.
2055 error: The function to call with any errors found.
2056 """
erg@google.comd350fe52013-01-14 17:51:48 +00002057 # Note: This test can result in false positives if #ifdef constructs
2058 # get in the way of brace matching. See the testBuildClass test in
2059 # cpplint_unittest.py for an example of this.
2060 for obj in self.stack:
2061 if isinstance(obj, _ClassInfo):
2062 error(filename, obj.starting_linenum, 'build/class', 5,
2063 'Failed to find complete declaration of class %s' %
2064 obj.name)
erg@google.com2aa59982013-10-28 19:09:25 +00002065 elif isinstance(obj, _NamespaceInfo):
2066 error(filename, obj.starting_linenum, 'build/namespaces', 5,
2067 'Failed to find complete declaration of namespace %s' %
2068 obj.name)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002069
2070
2071def CheckForNonStandardConstructs(filename, clean_lines, linenum,
erg@google.comd350fe52013-01-14 17:51:48 +00002072 nesting_state, error):
erg@google.com2aa59982013-10-28 19:09:25 +00002073 r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002074
2075 Complain about several constructs which gcc-2 accepts, but which are
2076 not standard C++. Warning about these in lint is one way to ease the
2077 transition to new compilers.
2078 - put storage class first (e.g. "static const" instead of "const static").
2079 - "%lld" instead of %qd" in printf-type functions.
2080 - "%1$d" is non-standard in printf-type functions.
2081 - "\%" is an undefined character escape sequence.
2082 - text after #endif is not allowed.
2083 - invalid inner-style forward declaration.
2084 - >? and <? operators, and their >?= and <?= cousins.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002085
erg@google.coma868d2d2009-10-09 21:18:45 +00002086 Additionally, check for constructor/destructor style violations and reference
2087 members, as it is very convenient to do so while checking for
2088 gcc-2 compliance.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002089
2090 Args:
2091 filename: The name of the current file.
2092 clean_lines: A CleansedLines instance containing the file.
2093 linenum: The number of the line to check.
erg@google.comd350fe52013-01-14 17:51:48 +00002094 nesting_state: A _NestingState instance which maintains information about
2095 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002096 error: A callable to which errors are reported, which takes 4 arguments:
2097 filename, line number, error level, and message
2098 """
2099
2100 # Remove comments from the line, but leave in strings for now.
2101 line = clean_lines.lines[linenum]
2102
2103 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
2104 error(filename, linenum, 'runtime/printf_format', 3,
2105 '%q in format strings is deprecated. Use %ll instead.')
2106
2107 if Search(r'printf\s*\(.*".*%\d+\$', line):
2108 error(filename, linenum, 'runtime/printf_format', 2,
2109 '%N$ formats are unconventional. Try rewriting to avoid them.')
2110
2111 # Remove escaped backslashes before looking for undefined escapes.
2112 line = line.replace('\\\\', '')
2113
2114 if Search(r'("|\').*\\(%|\[|\(|{)', line):
2115 error(filename, linenum, 'build/printf_format', 3,
2116 '%, [, (, and { are undefined character escapes. Unescape them.')
2117
2118 # For the rest, work with both comments and strings removed.
2119 line = clean_lines.elided[linenum]
2120
2121 if Search(r'\b(const|volatile|void|char|short|int|long'
2122 r'|float|double|signed|unsigned'
2123 r'|schar|u?int8|u?int16|u?int32|u?int64)'
erg@google.comd350fe52013-01-14 17:51:48 +00002124 r'\s+(register|static|extern|typedef)\b',
erg@google.com4e00b9a2009-01-12 23:05:11 +00002125 line):
2126 error(filename, linenum, 'build/storage_class', 5,
2127 'Storage class (static, extern, typedef, etc) should be first.')
2128
2129 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
2130 error(filename, linenum, 'build/endif_comment', 5,
2131 'Uncommented text after #endif is non-standard. Use a comment.')
2132
2133 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
2134 error(filename, linenum, 'build/forward_decl', 5,
2135 'Inner-style forward declarations are invalid. Remove this line.')
2136
2137 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
2138 line):
2139 error(filename, linenum, 'build/deprecated', 3,
2140 '>? and <? (max and min) operators are non-standard and deprecated.')
2141
erg@google.coma868d2d2009-10-09 21:18:45 +00002142 if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
2143 # TODO(unknown): Could it be expanded safely to arbitrary references,
2144 # without triggering too many false positives? The first
2145 # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2146 # the restriction.
2147 # Here's the original regexp, for the reference:
2148 # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2149 # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2150 error(filename, linenum, 'runtime/member_string_references', 2,
2151 'const string& members are dangerous. It is much better to use '
2152 'alternatives, such as pointers or simple constants.')
2153
erg@google.comd350fe52013-01-14 17:51:48 +00002154 # Everything else in this function operates on class declarations.
2155 # Return early if the top of the nesting stack is not a class, or if
2156 # the class head is not completed yet.
2157 classinfo = nesting_state.InnermostClass()
2158 if not classinfo or not classinfo.seen_open_brace:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002159 return
2160
erg@google.com4e00b9a2009-01-12 23:05:11 +00002161 # The class may have been declared with namespace or classname qualifiers.
2162 # The constructor and destructor will not have those qualifiers.
2163 base_classname = classinfo.name.split('::')[-1]
2164
2165 # Look for single-argument constructors that aren't marked explicit.
2166 # Technically a valid construct, but against style.
erg@google.com8a95ecc2011-09-08 00:45:54 +00002167 args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
erg@google.com4e00b9a2009-01-12 23:05:11 +00002168 % re.escape(base_classname),
2169 line)
2170 if (args and
2171 args.group(1) != 'void' and
erg@google.comfd5da632013-10-25 17:39:45 +00002172 not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
2173 % re.escape(base_classname), args.group(1).strip())):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002174 error(filename, linenum, 'runtime/explicit', 5,
2175 'Single-argument constructors should be marked explicit.')
2176
erg@google.com4e00b9a2009-01-12 23:05:11 +00002177
2178def CheckSpacingForFunctionCall(filename, line, linenum, error):
2179 """Checks for the correctness of various spacing around function calls.
2180
2181 Args:
2182 filename: The name of the current file.
2183 line: The text of the line to check.
2184 linenum: The number of the line to check.
2185 error: The function to call with any errors found.
2186 """
2187
2188 # Since function calls often occur inside if/for/while/switch
2189 # expressions - which have their own, more liberal conventions - we
2190 # first see if we should be looking inside such an expression for a
2191 # function call, to which we can apply more strict standards.
2192 fncall = line # if there's no control flow construct, look at whole line
2193 for pattern in (r'\bif\s*\((.*)\)\s*{',
2194 r'\bfor\s*\((.*)\)\s*{',
2195 r'\bwhile\s*\((.*)\)\s*[{;]',
2196 r'\bswitch\s*\((.*)\)\s*{'):
2197 match = Search(pattern, line)
2198 if match:
2199 fncall = match.group(1) # look inside the parens for function calls
2200 break
2201
2202 # Except in if/for/while/switch, there should never be space
2203 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
2204 # for nested parens ( (a+b) + c ). Likewise, there should never be
2205 # a space before a ( when it's a function argument. I assume it's a
2206 # function argument when the char before the whitespace is legal in
2207 # a function name (alnum + _) and we're not starting a macro. Also ignore
2208 # pointers and references to arrays and functions coz they're too tricky:
2209 # we use a very simple way to recognize these:
2210 # " (something)(maybe-something)" or
2211 # " (something)(maybe-something," or
2212 # " (something)[something]"
2213 # Note that we assume the contents of [] to be short enough that
2214 # they'll never need to wrap.
2215 if ( # Ignore control structures.
erg@google.com2aa59982013-10-28 19:09:25 +00002216 not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
erg@google.comc6671232013-10-25 21:44:03 +00002217 fncall) and
erg@google.com4e00b9a2009-01-12 23:05:11 +00002218 # Ignore pointers/references to functions.
2219 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2220 # Ignore pointers/references to arrays.
2221 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com36649102009-03-25 21:18:36 +00002222 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
erg@google.com4e00b9a2009-01-12 23:05:11 +00002223 error(filename, linenum, 'whitespace/parens', 4,
2224 'Extra space after ( in function call')
erg@google.com36649102009-03-25 21:18:36 +00002225 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002226 error(filename, linenum, 'whitespace/parens', 2,
2227 'Extra space after (')
2228 if (Search(r'\w\s+\(', fncall) and
erg@google.comd350fe52013-01-14 17:51:48 +00002229 not Search(r'#\s*define|typedef', fncall) and
erg@google.com2aa59982013-10-28 19:09:25 +00002230 not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002231 error(filename, linenum, 'whitespace/parens', 4,
2232 'Extra space before ( in function call')
2233 # If the ) is followed only by a newline or a { + newline, assume it's
2234 # part of a control statement (if/while/etc), and don't complain
2235 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002236 # If the closing parenthesis is preceded by only whitespaces,
2237 # try to give a more descriptive error message.
2238 if Search(r'^\s+\)', fncall):
2239 error(filename, linenum, 'whitespace/parens', 2,
2240 'Closing ) should be moved to the previous line')
2241 else:
2242 error(filename, linenum, 'whitespace/parens', 2,
2243 'Extra space before )')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002244
2245
2246def IsBlankLine(line):
2247 """Returns true if the given line is blank.
2248
2249 We consider a line to be blank if the line is empty or consists of
2250 only white spaces.
2251
2252 Args:
2253 line: A line of a string.
2254
2255 Returns:
2256 True, if the given line is blank.
2257 """
2258 return not line or line.isspace()
2259
2260
2261def CheckForFunctionLengths(filename, clean_lines, linenum,
2262 function_state, error):
2263 """Reports for long function bodies.
2264
2265 For an overview why this is done, see:
2266 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2267
2268 Uses a simplistic algorithm assuming other style guidelines
2269 (especially spacing) are followed.
2270 Only checks unindented functions, so class members are unchecked.
2271 Trivial bodies are unchecked, so constructors with huge initializer lists
2272 may be missed.
2273 Blank/comment lines are not counted so as to avoid encouraging the removal
erg@google.com8a95ecc2011-09-08 00:45:54 +00002274 of vertical space and comments just to get through a lint check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002275 NOLINT *on the last line of a function* disables this check.
2276
2277 Args:
2278 filename: The name of the current file.
2279 clean_lines: A CleansedLines instance containing the file.
2280 linenum: The number of the line to check.
2281 function_state: Current function name and lines in body so far.
2282 error: The function to call with any errors found.
2283 """
2284 lines = clean_lines.lines
2285 line = lines[linenum]
2286 raw = clean_lines.raw_lines
2287 raw_line = raw[linenum]
2288 joined_line = ''
2289
2290 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00002291 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00002292 match_result = Match(regexp, line)
2293 if match_result:
2294 # If the name is all caps and underscores, figure it's a macro and
2295 # ignore it, unless it's TEST or TEST_F.
2296 function_name = match_result.group(1).split()[-1]
2297 if function_name == 'TEST' or function_name == 'TEST_F' or (
2298 not Match(r'[A-Z_]+$', function_name)):
2299 starting_func = True
2300
2301 if starting_func:
2302 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00002303 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002304 start_line = lines[start_linenum]
2305 joined_line += ' ' + start_line.lstrip()
2306 if Search(r'(;|})', start_line): # Declarations and trivial functions
2307 body_found = True
2308 break # ... ignore
2309 elif Search(r'{', start_line):
2310 body_found = True
2311 function = Search(r'((\w|:)*)\(', line).group(1)
2312 if Match(r'TEST', function): # Handle TEST... macros
2313 parameter_regexp = Search(r'(\(.*\))', joined_line)
2314 if parameter_regexp: # Ignore bad syntax
2315 function += parameter_regexp.group(1)
2316 else:
2317 function += '()'
2318 function_state.Begin(function)
2319 break
2320 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00002321 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002322 error(filename, linenum, 'readability/fn_size', 5,
2323 'Lint failed to find start of function body.')
2324 elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com05189642010-04-30 20:43:03 +00002325 function_state.Check(error, filename, linenum)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002326 function_state.End()
2327 elif not Match(r'^\s*$', line):
2328 function_state.Count() # Count non-blank/non-comment lines.
2329
2330
2331_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2332
2333
2334def CheckComment(comment, filename, linenum, error):
2335 """Checks for common mistakes in TODO comments.
2336
2337 Args:
2338 comment: The text of the comment from the line in question.
2339 filename: The name of the current file.
2340 linenum: The number of the line to check.
2341 error: The function to call with any errors found.
2342 """
2343 match = _RE_PATTERN_TODO.match(comment)
2344 if match:
2345 # One whitespace is correct; zero whitespace is handled elsewhere.
2346 leading_whitespace = match.group(1)
2347 if len(leading_whitespace) > 1:
2348 error(filename, linenum, 'whitespace/todo', 2,
2349 'Too many spaces before TODO')
2350
2351 username = match.group(2)
2352 if not username:
2353 error(filename, linenum, 'readability/todo', 2,
2354 'Missing username in TODO; it should look like '
2355 '"// TODO(my_username): Stuff."')
2356
2357 middle_whitespace = match.group(3)
erg@google.com2aa59982013-10-28 19:09:25 +00002358 # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
erg@google.com4e00b9a2009-01-12 23:05:11 +00002359 if middle_whitespace != ' ' and middle_whitespace != '':
2360 error(filename, linenum, 'whitespace/todo', 2,
2361 'TODO(my_username) should be followed by a space')
2362
erg@google.comd350fe52013-01-14 17:51:48 +00002363def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2364 """Checks for improper use of DISALLOW* macros.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002365
erg@google.comd350fe52013-01-14 17:51:48 +00002366 Args:
2367 filename: The name of the current file.
2368 clean_lines: A CleansedLines instance containing the file.
2369 linenum: The number of the line to check.
2370 nesting_state: A _NestingState instance which maintains information about
2371 the current stack of nested blocks being parsed.
2372 error: The function to call with any errors found.
2373 """
2374 line = clean_lines.elided[linenum] # get rid of comments and strings
2375
2376 matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2377 r'DISALLOW_EVIL_CONSTRUCTORS|'
2378 r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2379 if not matched:
2380 return
2381 if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2382 if nesting_state.stack[-1].access != 'private':
2383 error(filename, linenum, 'readability/constructors', 3,
2384 '%s must be in the private: section' % matched.group(1))
2385
2386 else:
2387 # Found DISALLOW* macro outside a class declaration, or perhaps it
2388 # was used inside a function when it should have been part of the
2389 # class declaration. We could issue a warning here, but it
2390 # probably resulted in a compiler error already.
2391 pass
2392
2393
2394def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
2395 """Find the corresponding > to close a template.
2396
2397 Args:
2398 clean_lines: A CleansedLines instance containing the file.
2399 linenum: Current line number.
2400 init_suffix: Remainder of the current line after the initial <.
2401
2402 Returns:
2403 True if a matching bracket exists.
2404 """
2405 line = init_suffix
2406 nesting_stack = ['<']
2407 while True:
2408 # Find the next operator that can tell us whether < is used as an
2409 # opening bracket or as a less-than operator. We only want to
2410 # warn on the latter case.
2411 #
2412 # We could also check all other operators and terminate the search
2413 # early, e.g. if we got something like this "a<b+c", the "<" is
2414 # most likely a less-than operator, but then we will get false
erg@google.comc6671232013-10-25 21:44:03 +00002415 # positives for default arguments and other template expressions.
erg@google.comd350fe52013-01-14 17:51:48 +00002416 match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
2417 if match:
2418 # Found an operator, update nesting stack
2419 operator = match.group(1)
2420 line = match.group(2)
2421
2422 if nesting_stack[-1] == '<':
2423 # Expecting closing angle bracket
2424 if operator in ('<', '(', '['):
2425 nesting_stack.append(operator)
2426 elif operator == '>':
2427 nesting_stack.pop()
2428 if not nesting_stack:
2429 # Found matching angle bracket
2430 return True
2431 elif operator == ',':
2432 # Got a comma after a bracket, this is most likely a template
2433 # argument. We have not seen a closing angle bracket yet, but
2434 # it's probably a few lines later if we look for it, so just
2435 # return early here.
2436 return True
2437 else:
2438 # Got some other operator.
2439 return False
2440
2441 else:
2442 # Expecting closing parenthesis or closing bracket
2443 if operator in ('<', '(', '['):
2444 nesting_stack.append(operator)
2445 elif operator in (')', ']'):
2446 # We don't bother checking for matching () or []. If we got
2447 # something like (] or [), it would have been a syntax error.
2448 nesting_stack.pop()
2449
2450 else:
2451 # Scan the next line
2452 linenum += 1
2453 if linenum >= len(clean_lines.elided):
2454 break
2455 line = clean_lines.elided[linenum]
2456
2457 # Exhausted all remaining lines and still no matching angle bracket.
2458 # Most likely the input was incomplete, otherwise we should have
2459 # seen a semicolon and returned early.
2460 return True
2461
2462
2463def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
2464 """Find the corresponding < that started a template.
2465
2466 Args:
2467 clean_lines: A CleansedLines instance containing the file.
2468 linenum: Current line number.
2469 init_prefix: Part of the current line before the initial >.
2470
2471 Returns:
2472 True if a matching bracket exists.
2473 """
2474 line = init_prefix
2475 nesting_stack = ['>']
2476 while True:
2477 # Find the previous operator
2478 match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
2479 if match:
2480 # Found an operator, update nesting stack
2481 operator = match.group(2)
2482 line = match.group(1)
2483
2484 if nesting_stack[-1] == '>':
2485 # Expecting opening angle bracket
2486 if operator in ('>', ')', ']'):
2487 nesting_stack.append(operator)
2488 elif operator == '<':
2489 nesting_stack.pop()
2490 if not nesting_stack:
2491 # Found matching angle bracket
2492 return True
2493 elif operator == ',':
2494 # Got a comma before a bracket, this is most likely a
2495 # template argument. The opening angle bracket is probably
2496 # there if we look for it, so just return early here.
2497 return True
2498 else:
2499 # Got some other operator.
2500 return False
2501
2502 else:
2503 # Expecting opening parenthesis or opening bracket
2504 if operator in ('>', ')', ']'):
2505 nesting_stack.append(operator)
2506 elif operator in ('(', '['):
2507 nesting_stack.pop()
2508
2509 else:
2510 # Scan the previous line
2511 linenum -= 1
2512 if linenum < 0:
2513 break
2514 line = clean_lines.elided[linenum]
2515
2516 # Exhausted all earlier lines and still no matching angle bracket.
2517 return False
2518
2519
2520def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002521 """Checks for the correctness of various spacing issues in the code.
2522
2523 Things we check for: spaces around operators, spaces after
2524 if/for/while/switch, no spaces around parens in function calls, two
2525 spaces between code and comment, don't start a block with a blank
erg@google.com8a95ecc2011-09-08 00:45:54 +00002526 line, don't end a function with a blank line, don't add a blank line
2527 after public/protected/private, don't have too many blank lines in a row.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002528
2529 Args:
2530 filename: The name of the current file.
2531 clean_lines: A CleansedLines instance containing the file.
2532 linenum: The number of the line to check.
erg@google.comd350fe52013-01-14 17:51:48 +00002533 nesting_state: A _NestingState instance which maintains information about
2534 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002535 error: The function to call with any errors found.
2536 """
2537
erg@google.com2aa59982013-10-28 19:09:25 +00002538 # Don't use "elided" lines here, otherwise we can't check commented lines.
2539 # Don't want to use "raw" either, because we don't want to check inside C++11
2540 # raw strings,
2541 raw = clean_lines.lines_without_raw_strings
erg@google.com4e00b9a2009-01-12 23:05:11 +00002542 line = raw[linenum]
2543
2544 # Before nixing comments, check if the line is blank for no good
2545 # reason. This includes the first line after a block is opened, and
2546 # blank lines at the end of a function (ie, right before a line like '}'
erg@google.comd350fe52013-01-14 17:51:48 +00002547 #
2548 # Skip all the blank line checks if we are immediately inside a
2549 # namespace body. In other words, don't issue blank line warnings
2550 # for this block:
2551 # namespace {
2552 #
2553 # }
2554 #
2555 # A warning about missing end of namespace comments will be issued instead.
2556 if IsBlankLine(line) and not nesting_state.InNamespaceBody():
erg@google.com4e00b9a2009-01-12 23:05:11 +00002557 elided = clean_lines.elided
2558 prev_line = elided[linenum - 1]
2559 prevbrace = prev_line.rfind('{')
2560 # TODO(unknown): Don't complain if line before blank line, and line after,
2561 # both start with alnums and are indented the same amount.
2562 # This ignores whitespace at the start of a namespace block
2563 # because those are not usually indented.
erg@google.comd350fe52013-01-14 17:51:48 +00002564 if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002565 # OK, we have a blank line at the start of a code block. Before we
2566 # complain, we check if it is an exception to the rule: The previous
erg@google.com8a95ecc2011-09-08 00:45:54 +00002567 # non-empty line has the parameters of a function header that are indented
erg@google.com4e00b9a2009-01-12 23:05:11 +00002568 # 4 spaces (because they did not fit in a 80 column line when placed on
2569 # the same line as the function name). We also check for the case where
2570 # the previous line is indented 6 spaces, which may happen when the
2571 # initializers of a constructor do not fit into a 80 column line.
2572 exception = False
2573 if Match(r' {6}\w', prev_line): # Initializer list?
2574 # We are looking for the opening column of initializer list, which
2575 # should be indented 4 spaces to cause 6 space indentation afterwards.
2576 search_position = linenum-2
2577 while (search_position >= 0
2578 and Match(r' {6}\w', elided[search_position])):
2579 search_position -= 1
2580 exception = (search_position >= 0
2581 and elided[search_position][:5] == ' :')
2582 else:
2583 # Search for the function arguments or an initializer list. We use a
2584 # simple heuristic here: If the line is indented 4 spaces; and we have a
2585 # closing paren, without the opening paren, followed by an opening brace
2586 # or colon (for initializer lists) we assume that it is the last line of
2587 # a function header. If we have a colon indented 4 spaces, it is an
2588 # initializer list.
2589 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2590 prev_line)
2591 or Match(r' {4}:', prev_line))
2592
2593 if not exception:
2594 error(filename, linenum, 'whitespace/blank_line', 2,
erg@google.com2aa59982013-10-28 19:09:25 +00002595 'Redundant blank line at the start of a code block '
2596 'should be deleted.')
erg@google.comd350fe52013-01-14 17:51:48 +00002597 # Ignore blank lines at the end of a block in a long if-else
erg@google.com4e00b9a2009-01-12 23:05:11 +00002598 # chain, like this:
2599 # if (condition1) {
2600 # // Something followed by a blank line
2601 #
2602 # } else if (condition2) {
2603 # // Something else
2604 # }
2605 if linenum + 1 < clean_lines.NumLines():
2606 next_line = raw[linenum + 1]
2607 if (next_line
2608 and Match(r'\s*}', next_line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002609 and next_line.find('} else ') == -1):
2610 error(filename, linenum, 'whitespace/blank_line', 3,
erg@google.com2aa59982013-10-28 19:09:25 +00002611 'Redundant blank line at the end of a code block '
2612 'should be deleted.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002613
erg@google.com8a95ecc2011-09-08 00:45:54 +00002614 matched = Match(r'\s*(public|protected|private):', prev_line)
2615 if matched:
2616 error(filename, linenum, 'whitespace/blank_line', 3,
2617 'Do not leave a blank line after "%s:"' % matched.group(1))
2618
erg@google.com4e00b9a2009-01-12 23:05:11 +00002619 # Next, we complain if there's a comment too near the text
2620 commentpos = line.find('//')
2621 if commentpos != -1:
2622 # Check if the // may be in quotes. If so, ignore it
erg@google.com2aa59982013-10-28 19:09:25 +00002623 # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com4e00b9a2009-01-12 23:05:11 +00002624 if (line.count('"', 0, commentpos) -
2625 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
2626 # Allow one space for new scopes, two spaces otherwise:
2627 if (not Match(r'^\s*{ //', line) and
2628 ((commentpos >= 1 and
2629 line[commentpos-1] not in string.whitespace) or
2630 (commentpos >= 2 and
2631 line[commentpos-2] not in string.whitespace))):
2632 error(filename, linenum, 'whitespace/comments', 2,
2633 'At least two spaces is best between code and comments')
2634 # There should always be a space between the // and the comment
2635 commentend = commentpos + 2
2636 if commentend < len(line) and not line[commentend] == ' ':
2637 # but some lines are exceptions -- e.g. if they're big
2638 # comment delimiters like:
2639 # //----------------------------------------------------------
erg@google.coma51c16b2010-11-17 18:09:31 +00002640 # or are an empty C++ style Doxygen comment, like:
2641 # ///
erg@google.com6d8d9832013-10-31 19:46:18 +00002642 # or C++ style Doxygen comments placed after the variable:
2643 # ///< Header comment
2644 # //!< Header comment
erg@google.come35f7652009-06-19 20:52:09 +00002645 # or they begin with multiple slashes followed by a space:
2646 # //////// Header comment
2647 match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.coma51c16b2010-11-17 18:09:31 +00002648 Search(r'^/$', line[commentend:]) or
erg@google.com6d8d9832013-10-31 19:46:18 +00002649 Search(r'^!< ', line[commentend:]) or
2650 Search(r'^/< ', line[commentend:]) or
erg@google.come35f7652009-06-19 20:52:09 +00002651 Search(r'^/+ ', line[commentend:]))
erg@google.com4e00b9a2009-01-12 23:05:11 +00002652 if not match:
2653 error(filename, linenum, 'whitespace/comments', 4,
2654 'Should have a space between // and comment')
2655 CheckComment(line[commentpos:], filename, linenum, error)
2656
2657 line = clean_lines.elided[linenum] # get rid of comments and strings
2658
2659 # Don't try to do spacing checks for operator methods
2660 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
2661
2662 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2663 # Otherwise not. Note we only check for non-spaces on *both* sides;
2664 # sometimes people put non-spaces on one side when aligning ='s among
2665 # many lines (not that this is behavior that I approve of...)
2666 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2667 error(filename, linenum, 'whitespace/operators', 4,
2668 'Missing spaces around =')
2669
2670 # It's ok not to have spaces around binary operators like + - * /, but if
2671 # there's too little whitespace, we get concerned. It's hard to tell,
2672 # though, so we punt on this one for now. TODO.
2673
2674 # You should always have whitespace around binary operators.
erg@google.comd350fe52013-01-14 17:51:48 +00002675 #
2676 # Check <= and >= first to avoid false positives with < and >, then
2677 # check non-include lines for spacing around < and >.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002678 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002679 if match:
2680 error(filename, linenum, 'whitespace/operators', 3,
2681 'Missing spaces around %s' % match.group(1))
erg@google.comd350fe52013-01-14 17:51:48 +00002682 # We allow no-spaces around << when used like this: 10<<20, but
erg@google.com4e00b9a2009-01-12 23:05:11 +00002683 # not otherwise (particularly, not when used as streams)
erg@google.com0075d142013-11-05 22:28:07 +00002684 # Also ignore using ns::operator<<;
2685 match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
2686 if (match and
2687 not (match.group(1).isdigit() and match.group(2).isdigit()) and
2688 not (match.group(1) == 'operator' and match.group(2) == ';')):
erg@google.comd350fe52013-01-14 17:51:48 +00002689 error(filename, linenum, 'whitespace/operators', 3,
2690 'Missing spaces around <<')
2691 elif not Match(r'#.*include', line):
2692 # Avoid false positives on ->
2693 reduced_line = line.replace('->', '')
2694
2695 # Look for < that is not surrounded by spaces. This is only
2696 # triggered if both sides are missing spaces, even though
2697 # technically should should flag if at least one side is missing a
2698 # space. This is done to avoid some false positives with shifts.
2699 match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
2700 if (match and
2701 not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
2702 error(filename, linenum, 'whitespace/operators', 3,
2703 'Missing spaces around <')
2704
2705 # Look for > that is not surrounded by spaces. Similar to the
2706 # above, we only trigger if both sides are missing spaces to avoid
2707 # false positives with shifts.
2708 match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
2709 if (match and
2710 not FindPreviousMatchingAngleBracket(clean_lines, linenum,
2711 match.group(1))):
2712 error(filename, linenum, 'whitespace/operators', 3,
2713 'Missing spaces around >')
2714
2715 # We allow no-spaces around >> for almost anything. This is because
2716 # C++11 allows ">>" to close nested templates, which accounts for
2717 # most cases when ">>" is not followed by a space.
2718 #
2719 # We still warn on ">>" followed by alpha character, because that is
2720 # likely due to ">>" being used for right shifts, e.g.:
2721 # value >> alpha
2722 #
2723 # When ">>" is used to close templates, the alphanumeric letter that
2724 # follows would be part of an identifier, and there should still be
2725 # a space separating the template type and the identifier.
2726 # type<type<type>> alpha
2727 match = Search(r'>>[a-zA-Z_]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002728 if match:
2729 error(filename, linenum, 'whitespace/operators', 3,
erg@google.comd350fe52013-01-14 17:51:48 +00002730 'Missing spaces around >>')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002731
2732 # There shouldn't be space around unary operators
2733 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
2734 if match:
2735 error(filename, linenum, 'whitespace/operators', 4,
2736 'Extra space for operator %s' % match.group(1))
2737
2738 # A pet peeve of mine: no spaces after an if, while, switch, or for
2739 match = Search(r' (if\(|for\(|while\(|switch\()', line)
2740 if match:
2741 error(filename, linenum, 'whitespace/parens', 5,
2742 'Missing space before ( in %s' % match.group(1))
2743
2744 # For if/for/while/switch, the left and right parens should be
2745 # consistent about how many spaces are inside the parens, and
2746 # there should either be zero or one spaces inside the parens.
2747 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.come35f7652009-06-19 20:52:09 +00002748 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002749 match = Search(r'\b(if|for|while|switch)\s*'
2750 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
2751 line)
2752 if match:
2753 if len(match.group(2)) != len(match.group(4)):
2754 if not (match.group(3) == ';' and
erg@google.come35f7652009-06-19 20:52:09 +00002755 len(match.group(2)) == 1 + len(match.group(4)) or
2756 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002757 error(filename, linenum, 'whitespace/parens', 5,
2758 'Mismatching spaces inside () in %s' % match.group(1))
erg@google.comc6671232013-10-25 21:44:03 +00002759 if len(match.group(2)) not in [0, 1]:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002760 error(filename, linenum, 'whitespace/parens', 5,
2761 'Should have zero or one spaces inside ( and ) in %s' %
2762 match.group(1))
2763
2764 # You should always have a space after a comma (either as fn arg or operator)
erg@google.comc6671232013-10-25 21:44:03 +00002765 #
2766 # This does not apply when the non-space character following the
2767 # comma is another comma, since the only time when that happens is
2768 # for empty macro arguments.
erg@google.com2aa59982013-10-28 19:09:25 +00002769 #
2770 # We run this check in two passes: first pass on elided lines to
2771 # verify that lines contain missing whitespaces, second pass on raw
2772 # lines to confirm that those missing whitespaces are not due to
2773 # elided comments.
2774 if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002775 error(filename, linenum, 'whitespace/comma', 3,
2776 'Missing space after ,')
2777
erg@google.comd7d27472011-09-07 17:36:35 +00002778 # You should always have a space after a semicolon
2779 # except for few corner cases
2780 # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
2781 # space after ;
2782 if Search(r';[^\s};\\)/]', line):
2783 error(filename, linenum, 'whitespace/semicolon', 3,
2784 'Missing space after ;')
2785
erg@google.com4e00b9a2009-01-12 23:05:11 +00002786 # Next we will look for issues with function calls.
2787 CheckSpacingForFunctionCall(filename, line, linenum, error)
2788
erg@google.com8a95ecc2011-09-08 00:45:54 +00002789 # Except after an opening paren, or after another opening brace (in case of
2790 # an initializer list, for instance), you should have spaces before your
2791 # braces. And since you should never have braces at the beginning of a line,
2792 # this is an easy test.
erg@google.com2aa59982013-10-28 19:09:25 +00002793 match = Match(r'^(.*[^ ({]){', line)
2794 if match:
2795 # Try a bit harder to check for brace initialization. This
2796 # happens in one of the following forms:
2797 # Constructor() : initializer_list_{} { ... }
2798 # Constructor{}.MemberFunction()
2799 # Type variable{};
2800 # FunctionCall(type{}, ...);
2801 # LastArgument(..., type{});
2802 # LOG(INFO) << type{} << " ...";
2803 # map_of_type[{...}] = ...;
2804 #
2805 # We check for the character following the closing brace, and
2806 # silence the warning if it's one of those listed above, i.e.
2807 # "{.;,)<]".
2808 #
2809 # To account for nested initializer list, we allow any number of
2810 # closing braces up to "{;,)<". We can't simply silence the
2811 # warning on first sight of closing brace, because that would
2812 # cause false negatives for things that are not initializer lists.
2813 # Silence this: But not this:
2814 # Outer{ if (...) {
2815 # Inner{...} if (...){ // Missing space before {
2816 # }; }
2817 #
2818 # There is a false negative with this approach if people inserted
2819 # spurious semicolons, e.g. "if (cond){};", but we will catch the
2820 # spurious semicolon with a separate check.
2821 (endline, endlinenum, endpos) = CloseExpression(
2822 clean_lines, linenum, len(match.group(1)))
2823 trailing_text = ''
2824 if endpos > -1:
2825 trailing_text = endline[endpos:]
2826 for offset in xrange(endlinenum + 1,
2827 min(endlinenum + 3, clean_lines.NumLines() - 1)):
2828 trailing_text += clean_lines.elided[offset]
2829 if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
2830 error(filename, linenum, 'whitespace/braces', 5,
2831 'Missing space before {')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002832
2833 # Make sure '} else {' has spaces.
2834 if Search(r'}else', line):
2835 error(filename, linenum, 'whitespace/braces', 5,
2836 'Missing space before else')
2837
2838 # You shouldn't have spaces before your brackets, except maybe after
2839 # 'delete []' or 'new char * []'.
2840 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
2841 error(filename, linenum, 'whitespace/braces', 5,
2842 'Extra space before [')
2843
2844 # You shouldn't have a space before a semicolon at the end of the line.
2845 # There's a special case for "for" since the style guide allows space before
2846 # the semicolon there.
2847 if Search(r':\s*;\s*$', line):
2848 error(filename, linenum, 'whitespace/semicolon', 5,
erg@google.comd350fe52013-01-14 17:51:48 +00002849 'Semicolon defining empty statement. Use {} instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002850 elif Search(r'^\s*;\s*$', line):
2851 error(filename, linenum, 'whitespace/semicolon', 5,
2852 'Line contains only semicolon. If this should be an empty statement, '
erg@google.comd350fe52013-01-14 17:51:48 +00002853 'use {} instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002854 elif (Search(r'\s+;\s*$', line) and
2855 not Search(r'\bfor\b', line)):
2856 error(filename, linenum, 'whitespace/semicolon', 5,
2857 'Extra space before last semicolon. If this should be an empty '
erg@google.comd350fe52013-01-14 17:51:48 +00002858 'statement, use {} instead.')
2859
2860 # In range-based for, we wanted spaces before and after the colon, but
2861 # not around "::" tokens that might appear.
2862 if (Search('for *\(.*[^:]:[^: ]', line) or
2863 Search('for *\(.*[^: ]:[^:]', line)):
2864 error(filename, linenum, 'whitespace/forcolon', 2,
2865 'Missing space around colon in range-based for loop')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002866
2867
erg@google.com8a95ecc2011-09-08 00:45:54 +00002868def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
2869 """Checks for additional blank line issues related to sections.
2870
2871 Currently the only thing checked here is blank line before protected/private.
2872
2873 Args:
2874 filename: The name of the current file.
2875 clean_lines: A CleansedLines instance containing the file.
2876 class_info: A _ClassInfo objects.
2877 linenum: The number of the line to check.
2878 error: The function to call with any errors found.
2879 """
2880 # Skip checks if the class is small, where small means 25 lines or less.
2881 # 25 lines seems like a good cutoff since that's the usual height of
2882 # terminals, and any class that can't fit in one screen can't really
2883 # be considered "small".
2884 #
2885 # Also skip checks if we are on the first line. This accounts for
2886 # classes that look like
2887 # class Foo { public: ... };
2888 #
2889 # If we didn't find the end of the class, last_line would be zero,
2890 # and the check will be skipped by the first condition.
erg@google.comd350fe52013-01-14 17:51:48 +00002891 if (class_info.last_line - class_info.starting_linenum <= 24 or
2892 linenum <= class_info.starting_linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002893 return
2894
2895 matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
2896 if matched:
2897 # Issue warning if the line before public/protected/private was
2898 # not a blank line, but don't do this if the previous line contains
2899 # "class" or "struct". This can happen two ways:
2900 # - We are at the beginning of the class.
2901 # - We are forward-declaring an inner class that is semantically
2902 # private, but needed to be public for implementation reasons.
erg@google.comd350fe52013-01-14 17:51:48 +00002903 # Also ignores cases where the previous line ends with a backslash as can be
2904 # common when defining classes in C macros.
erg@google.com8a95ecc2011-09-08 00:45:54 +00002905 prev_line = clean_lines.lines[linenum - 1]
2906 if (not IsBlankLine(prev_line) and
erg@google.comd350fe52013-01-14 17:51:48 +00002907 not Search(r'\b(class|struct)\b', prev_line) and
2908 not Search(r'\\$', prev_line)):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002909 # Try a bit harder to find the beginning of the class. This is to
2910 # account for multi-line base-specifier lists, e.g.:
2911 # class Derived
2912 # : public Base {
erg@google.comd350fe52013-01-14 17:51:48 +00002913 end_class_head = class_info.starting_linenum
2914 for i in range(class_info.starting_linenum, linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002915 if Search(r'\{\s*$', clean_lines.lines[i]):
2916 end_class_head = i
2917 break
2918 if end_class_head < linenum - 1:
2919 error(filename, linenum, 'whitespace/blank_line', 3,
2920 '"%s:" should be preceded by a blank line' % matched.group(1))
2921
2922
erg@google.com4e00b9a2009-01-12 23:05:11 +00002923def GetPreviousNonBlankLine(clean_lines, linenum):
2924 """Return the most recent non-blank line and its line number.
2925
2926 Args:
2927 clean_lines: A CleansedLines instance containing the file contents.
2928 linenum: The number of the line to check.
2929
2930 Returns:
2931 A tuple with two elements. The first element is the contents of the last
2932 non-blank line before the current line, or the empty string if this is the
2933 first non-blank line. The second is the line number of that line, or -1
2934 if this is the first non-blank line.
2935 """
2936
2937 prevlinenum = linenum - 1
2938 while prevlinenum >= 0:
2939 prevline = clean_lines.elided[prevlinenum]
2940 if not IsBlankLine(prevline): # if not a blank line...
2941 return (prevline, prevlinenum)
2942 prevlinenum -= 1
2943 return ('', -1)
2944
2945
2946def CheckBraces(filename, clean_lines, linenum, error):
2947 """Looks for misplaced braces (e.g. at the end of line).
2948
2949 Args:
2950 filename: The name of the current file.
2951 clean_lines: A CleansedLines instance containing the file.
2952 linenum: The number of the line to check.
2953 error: The function to call with any errors found.
2954 """
2955
2956 line = clean_lines.elided[linenum] # get rid of comments and strings
2957
2958 if Match(r'\s*{\s*$', line):
erg@google.com2aa59982013-10-28 19:09:25 +00002959 # We allow an open brace to start a line in the case where someone is using
2960 # braces in a block to explicitly create a new scope, which is commonly used
2961 # to control the lifetime of stack-allocated variables. Braces are also
2962 # used for brace initializers inside function calls. We don't detect this
2963 # perfectly: we just don't complain if the last non-whitespace character on
2964 # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
erg@google.comc6671232013-10-25 21:44:03 +00002965 # previous line starts a preprocessor block.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002966 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
erg@google.com2aa59982013-10-28 19:09:25 +00002967 if (not Search(r'[,;:}{(]\s*$', prevline) and
erg@google.comd350fe52013-01-14 17:51:48 +00002968 not Match(r'\s*#', prevline)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002969 error(filename, linenum, 'whitespace/braces', 4,
2970 '{ should almost always be at the end of the previous line')
2971
2972 # An else clause should be on the same line as the preceding closing brace.
2973 if Match(r'\s*else\s*', line):
2974 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2975 if Match(r'\s*}\s*$', prevline):
2976 error(filename, linenum, 'whitespace/newline', 4,
2977 'An else should appear on the same line as the preceding }')
2978
2979 # If braces come on one side of an else, they should be on both.
2980 # However, we have to worry about "else if" that spans multiple lines!
2981 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
2982 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
2983 # find the ( after the if
2984 pos = line.find('else if')
2985 pos = line.find('(', pos)
2986 if pos > 0:
2987 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
2988 if endline[endpos:].find('{') == -1: # must be brace after if
2989 error(filename, linenum, 'readability/braces', 5,
2990 'If an else has a brace on one side, it should have it on both')
2991 else: # common case: else not followed by a multi-line if
2992 error(filename, linenum, 'readability/braces', 5,
2993 'If an else has a brace on one side, it should have it on both')
2994
2995 # Likewise, an else should never have the else clause on the same line
2996 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
2997 error(filename, linenum, 'whitespace/newline', 4,
2998 'Else clause should never be on same line as else (use 2 lines)')
2999
3000 # In the same way, a do/while should never be on one line
3001 if Match(r'\s*do [^\s{]', line):
3002 error(filename, linenum, 'whitespace/newline', 4,
3003 'do/while clauses should not be on a single line')
3004
erg@google.com2aa59982013-10-28 19:09:25 +00003005 # Block bodies should not be followed by a semicolon. Due to C++11
3006 # brace initialization, there are more places where semicolons are
3007 # required than not, so we use a whitelist approach to check these
3008 # rather than a blacklist. These are the places where "};" should
3009 # be replaced by just "}":
3010 # 1. Some flavor of block following closing parenthesis:
3011 # for (;;) {};
3012 # while (...) {};
3013 # switch (...) {};
3014 # Function(...) {};
3015 # if (...) {};
3016 # if (...) else if (...) {};
3017 #
3018 # 2. else block:
3019 # if (...) else {};
3020 #
3021 # 3. const member function:
3022 # Function(...) const {};
3023 #
3024 # 4. Block following some statement:
3025 # x = 42;
3026 # {};
3027 #
3028 # 5. Block at the beginning of a function:
3029 # Function(...) {
3030 # {};
3031 # }
3032 #
3033 # Note that naively checking for the preceding "{" will also match
3034 # braces inside multi-dimensional arrays, but this is fine since
3035 # that expression will not contain semicolons.
3036 #
3037 # 6. Block following another block:
3038 # while (true) {}
3039 # {};
3040 #
3041 # 7. End of namespaces:
3042 # namespace {};
3043 #
3044 # These semicolons seems far more common than other kinds of
3045 # redundant semicolons, possibly due to people converting classes
3046 # to namespaces. For now we do not warn for this case.
3047 #
3048 # Try matching case 1 first.
3049 match = Match(r'^(.*\)\s*)\{', line)
3050 if match:
3051 # Matched closing parenthesis (case 1). Check the token before the
3052 # matching opening parenthesis, and don't warn if it looks like a
3053 # macro. This avoids these false positives:
3054 # - macro that defines a base class
3055 # - multi-line macro that defines a base class
3056 # - macro that defines the whole class-head
3057 #
3058 # But we still issue warnings for macros that we know are safe to
3059 # warn, specifically:
3060 # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
3061 # - TYPED_TEST
3062 # - INTERFACE_DEF
3063 # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
3064 #
3065 # We implement a whitelist of safe macros instead of a blacklist of
3066 # unsafe macros, even though the latter appears less frequently in
3067 # google code and would have been easier to implement. This is because
3068 # the downside for getting the whitelist wrong means some extra
3069 # semicolons, while the downside for getting the blacklist wrong
3070 # would result in compile errors.
3071 #
3072 # In addition to macros, we also don't want to warn on compound
3073 # literals.
3074 closing_brace_pos = match.group(1).rfind(')')
3075 opening_parenthesis = ReverseCloseExpression(
3076 clean_lines, linenum, closing_brace_pos)
3077 if opening_parenthesis[2] > -1:
3078 line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
3079 macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
3080 if ((macro and
3081 macro.group(1) not in (
3082 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
3083 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
3084 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
3085 Search(r'\s+=\s*$', line_prefix)):
3086 match = None
3087
3088 else:
3089 # Try matching cases 2-3.
3090 match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
3091 if not match:
3092 # Try matching cases 4-6. These are always matched on separate lines.
3093 #
3094 # Note that we can't simply concatenate the previous line to the
3095 # current line and do a single match, otherwise we may output
3096 # duplicate warnings for the blank line case:
3097 # if (cond) {
3098 # // blank line
3099 # }
3100 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3101 if prevline and Search(r'[;{}]\s*$', prevline):
3102 match = Match(r'^(\s*)\{', line)
3103
3104 # Check matching closing brace
3105 if match:
3106 (endline, endlinenum, endpos) = CloseExpression(
3107 clean_lines, linenum, len(match.group(1)))
3108 if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
3109 # Current {} pair is eligible for semicolon check, and we have found
3110 # the redundant semicolon, output warning here.
3111 #
3112 # Note: because we are scanning forward for opening braces, and
3113 # outputting warnings for the matching closing brace, if there are
3114 # nested blocks with trailing semicolons, we will get the error
3115 # messages in reversed order.
3116 error(filename, endlinenum, 'readability/braces', 4,
3117 "You don't need a ; after a }")
erg@google.com4e00b9a2009-01-12 23:05:11 +00003118
3119
erg@google.comc6671232013-10-25 21:44:03 +00003120def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
3121 """Look for empty loop/conditional body with only a single semicolon.
erg@google.comd350fe52013-01-14 17:51:48 +00003122
3123 Args:
3124 filename: The name of the current file.
3125 clean_lines: A CleansedLines instance containing the file.
3126 linenum: The number of the line to check.
3127 error: The function to call with any errors found.
3128 """
3129
3130 # Search for loop keywords at the beginning of the line. Because only
3131 # whitespaces are allowed before the keywords, this will also ignore most
3132 # do-while-loops, since those lines should start with closing brace.
erg@google.comc6671232013-10-25 21:44:03 +00003133 #
3134 # We also check "if" blocks here, since an empty conditional block
3135 # is likely an error.
erg@google.comd350fe52013-01-14 17:51:48 +00003136 line = clean_lines.elided[linenum]
erg@google.comc6671232013-10-25 21:44:03 +00003137 matched = Match(r'\s*(for|while|if)\s*\(', line)
3138 if matched:
erg@google.comd350fe52013-01-14 17:51:48 +00003139 # Find the end of the conditional expression
3140 (end_line, end_linenum, end_pos) = CloseExpression(
3141 clean_lines, linenum, line.find('('))
3142
3143 # Output warning if what follows the condition expression is a semicolon.
3144 # No warning for all other cases, including whitespace or newline, since we
3145 # have a separate check for semicolons preceded by whitespace.
3146 if end_pos >= 0 and Match(r';', end_line[end_pos:]):
erg@google.comc6671232013-10-25 21:44:03 +00003147 if matched.group(1) == 'if':
3148 error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
3149 'Empty conditional bodies should use {}')
3150 else:
3151 error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
3152 'Empty loop bodies should use {} or continue')
erg@google.com4e00b9a2009-01-12 23:05:11 +00003153
3154
3155def CheckCheck(filename, clean_lines, linenum, error):
3156 """Checks the use of CHECK and EXPECT macros.
3157
3158 Args:
3159 filename: The name of the current file.
3160 clean_lines: A CleansedLines instance containing the file.
3161 linenum: The number of the line to check.
3162 error: The function to call with any errors found.
3163 """
3164
3165 # Decide the set of replacement macros that should be suggested
erg@google.comc6671232013-10-25 21:44:03 +00003166 lines = clean_lines.elided
3167 check_macro = None
3168 start_pos = -1
erg@google.com4e00b9a2009-01-12 23:05:11 +00003169 for macro in _CHECK_MACROS:
erg@google.comc6671232013-10-25 21:44:03 +00003170 i = lines[linenum].find(macro)
3171 if i >= 0:
3172 check_macro = macro
3173
3174 # Find opening parenthesis. Do a regular expression match here
3175 # to make sure that we are matching the expected CHECK macro, as
3176 # opposed to some other macro that happens to contain the CHECK
3177 # substring.
3178 matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum])
3179 if not matched:
3180 continue
3181 start_pos = len(matched.group(1))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003182 break
erg@google.comc6671232013-10-25 21:44:03 +00003183 if not check_macro or start_pos < 0:
erg@google.com4e00b9a2009-01-12 23:05:11 +00003184 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
3185 return
3186
erg@google.comc6671232013-10-25 21:44:03 +00003187 # Find end of the boolean expression by matching parentheses
3188 (last_line, end_line, end_pos) = CloseExpression(
3189 clean_lines, linenum, start_pos)
3190 if end_pos < 0:
3191 return
3192 if linenum == end_line:
3193 expression = lines[linenum][start_pos + 1:end_pos - 1]
3194 else:
3195 expression = lines[linenum][start_pos + 1:]
3196 for i in xrange(linenum + 1, end_line):
3197 expression += lines[i]
3198 expression += last_line[0:end_pos - 1]
erg@google.com4e00b9a2009-01-12 23:05:11 +00003199
erg@google.comc6671232013-10-25 21:44:03 +00003200 # Parse expression so that we can take parentheses into account.
3201 # This avoids false positives for inputs like "CHECK((a < 4) == b)",
3202 # which is not replaceable by CHECK_LE.
3203 lhs = ''
3204 rhs = ''
3205 operator = None
3206 while expression:
3207 matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
3208 r'==|!=|>=|>|<=|<|\()(.*)$', expression)
3209 if matched:
3210 token = matched.group(1)
3211 if token == '(':
3212 # Parenthesized operand
3213 expression = matched.group(2)
erg@google.com2aa59982013-10-28 19:09:25 +00003214 (end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
erg@google.comc6671232013-10-25 21:44:03 +00003215 if end < 0:
3216 return # Unmatched parenthesis
3217 lhs += '(' + expression[0:end]
3218 expression = expression[end:]
3219 elif token in ('&&', '||'):
3220 # Logical and/or operators. This means the expression
3221 # contains more than one term, for example:
3222 # CHECK(42 < a && a < b);
3223 #
3224 # These are not replaceable with CHECK_LE, so bail out early.
3225 return
3226 elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
3227 # Non-relational operator
3228 lhs += token
3229 expression = matched.group(2)
3230 else:
3231 # Relational operator
3232 operator = token
3233 rhs = matched.group(2)
3234 break
3235 else:
3236 # Unparenthesized operand. Instead of appending to lhs one character
3237 # at a time, we do another regular expression match to consume several
3238 # characters at once if possible. Trivial benchmark shows that this
3239 # is more efficient when the operands are longer than a single
3240 # character, which is generally the case.
3241 matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
3242 if not matched:
3243 matched = Match(r'^(\s*\S)(.*)$', expression)
3244 if not matched:
3245 break
3246 lhs += matched.group(1)
3247 expression = matched.group(2)
3248
3249 # Only apply checks if we got all parts of the boolean expression
3250 if not (lhs and operator and rhs):
3251 return
3252
3253 # Check that rhs do not contain logical operators. We already know
3254 # that lhs is fine since the loop above parses out && and ||.
3255 if rhs.find('&&') > -1 or rhs.find('||') > -1:
3256 return
3257
3258 # At least one of the operands must be a constant literal. This is
3259 # to avoid suggesting replacements for unprintable things like
3260 # CHECK(variable != iterator)
3261 #
3262 # The following pattern matches decimal, hex integers, strings, and
3263 # characters (in that order).
3264 lhs = lhs.strip()
3265 rhs = rhs.strip()
3266 match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
3267 if Match(match_constant, lhs) or Match(match_constant, rhs):
3268 # Note: since we know both lhs and rhs, we can provide a more
3269 # descriptive error message like:
3270 # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
3271 # Instead of:
3272 # Consider using CHECK_EQ instead of CHECK(a == b)
3273 #
3274 # We are still keeping the less descriptive message because if lhs
3275 # or rhs gets long, the error message might become unreadable.
3276 error(filename, linenum, 'readability/check', 2,
3277 'Consider using %s instead of %s(a %s b)' % (
3278 _CHECK_REPLACEMENT[check_macro][operator],
3279 check_macro, operator))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003280
3281
erg@google.comd350fe52013-01-14 17:51:48 +00003282def CheckAltTokens(filename, clean_lines, linenum, error):
3283 """Check alternative keywords being used in boolean expressions.
3284
3285 Args:
3286 filename: The name of the current file.
3287 clean_lines: A CleansedLines instance containing the file.
3288 linenum: The number of the line to check.
3289 error: The function to call with any errors found.
3290 """
3291 line = clean_lines.elided[linenum]
3292
3293 # Avoid preprocessor lines
3294 if Match(r'^\s*#', line):
3295 return
3296
3297 # Last ditch effort to avoid multi-line comments. This will not help
3298 # if the comment started before the current line or ended after the
3299 # current line, but it catches most of the false positives. At least,
3300 # it provides a way to workaround this warning for people who use
3301 # multi-line comments in preprocessor macros.
3302 #
3303 # TODO(unknown): remove this once cpplint has better support for
3304 # multi-line comments.
3305 if line.find('/*') >= 0 or line.find('*/') >= 0:
3306 return
3307
3308 for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
3309 error(filename, linenum, 'readability/alt_tokens', 2,
3310 'Use operator %s instead of %s' % (
3311 _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
3312
3313
erg@google.com4e00b9a2009-01-12 23:05:11 +00003314def GetLineWidth(line):
3315 """Determines the width of the line in column positions.
3316
3317 Args:
3318 line: A string, which may be a Unicode string.
3319
3320 Returns:
3321 The width of the line in column positions, accounting for Unicode
3322 combining characters and wide characters.
3323 """
3324 if isinstance(line, unicode):
3325 width = 0
erg@google.com8a95ecc2011-09-08 00:45:54 +00003326 for uc in unicodedata.normalize('NFC', line):
3327 if unicodedata.east_asian_width(uc) in ('W', 'F'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003328 width += 2
erg@google.com8a95ecc2011-09-08 00:45:54 +00003329 elif not unicodedata.combining(uc):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003330 width += 1
3331 return width
3332 else:
3333 return len(line)
3334
3335
erg@google.comd350fe52013-01-14 17:51:48 +00003336def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
erg@google.com8a95ecc2011-09-08 00:45:54 +00003337 error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003338 """Checks rules from the 'C++ style rules' section of cppguide.html.
3339
3340 Most of these rules are hard to test (naming, comment style), but we
3341 do what we can. In particular we check for 2-space indents, line lengths,
3342 tab usage, spaces inside code, etc.
3343
3344 Args:
3345 filename: The name of the current file.
3346 clean_lines: A CleansedLines instance containing the file.
3347 linenum: The number of the line to check.
3348 file_extension: The extension (without the dot) of the filename.
erg@google.comd350fe52013-01-14 17:51:48 +00003349 nesting_state: A _NestingState instance which maintains information about
3350 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003351 error: The function to call with any errors found.
3352 """
3353
erg@google.com2aa59982013-10-28 19:09:25 +00003354 # Don't use "elided" lines here, otherwise we can't check commented lines.
3355 # Don't want to use "raw" either, because we don't want to check inside C++11
3356 # raw strings,
3357 raw_lines = clean_lines.lines_without_raw_strings
erg@google.com4e00b9a2009-01-12 23:05:11 +00003358 line = raw_lines[linenum]
3359
3360 if line.find('\t') != -1:
3361 error(filename, linenum, 'whitespace/tab', 1,
3362 'Tab found; better to use spaces')
3363
3364 # One or three blank spaces at the beginning of the line is weird; it's
3365 # hard to reconcile that with 2-space indents.
3366 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
3367 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
3368 # if(RLENGTH > 20) complain = 0;
3369 # if(match($0, " +(error|private|public|protected):")) complain = 0;
3370 # if(match(prev, "&& *$")) complain = 0;
3371 # if(match(prev, "\\|\\| *$")) complain = 0;
3372 # if(match(prev, "[\",=><] *$")) complain = 0;
3373 # if(match($0, " <<")) complain = 0;
3374 # if(match(prev, " +for \\(")) complain = 0;
3375 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
3376 initial_spaces = 0
3377 cleansed_line = clean_lines.elided[linenum]
3378 while initial_spaces < len(line) and line[initial_spaces] == ' ':
3379 initial_spaces += 1
3380 if line and line[-1].isspace():
3381 error(filename, linenum, 'whitespace/end_of_line', 4,
3382 'Line ends in whitespace. Consider deleting these extra spaces.')
erg@google.comfd5da632013-10-25 17:39:45 +00003383 # There are certain situations we allow one space, notably for section labels
erg@google.com4e00b9a2009-01-12 23:05:11 +00003384 elif ((initial_spaces == 1 or initial_spaces == 3) and
3385 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
3386 error(filename, linenum, 'whitespace/indent', 3,
3387 'Weird number of spaces at line-start. '
3388 'Are you using a 2-space indent?')
erg@google.com4e00b9a2009-01-12 23:05:11 +00003389
3390 # Check if the line is a header guard.
3391 is_header_guard = False
3392 if file_extension == 'h':
3393 cppvar = GetHeaderGuardCPPVariable(filename)
3394 if (line.startswith('#ifndef %s' % cppvar) or
3395 line.startswith('#define %s' % cppvar) or
3396 line.startswith('#endif // %s' % cppvar)):
3397 is_header_guard = True
3398 # #include lines and header guards can be long, since there's no clean way to
3399 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00003400 #
3401 # URLs can be long too. It's possible to split these, but it makes them
3402 # harder to cut&paste.
erg@google.comd7d27472011-09-07 17:36:35 +00003403 #
3404 # The "$Id:...$" comment may also get very long without it being the
3405 # developers fault.
erg@google.coma87abb82009-02-24 01:41:01 +00003406 if (not line.startswith('#include') and not is_header_guard and
erg@google.comd7d27472011-09-07 17:36:35 +00003407 not Match(r'^\s*//.*http(s?)://\S*$', line) and
3408 not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003409 line_width = GetLineWidth(line)
erg@google.comab53edf2013-11-05 22:23:37 +00003410 extended_length = int((_line_length * 1.25))
3411 if line_width > extended_length:
erg@google.com4e00b9a2009-01-12 23:05:11 +00003412 error(filename, linenum, 'whitespace/line_length', 4,
erg@google.comab53edf2013-11-05 22:23:37 +00003413 'Lines should very rarely be longer than %i characters' %
3414 extended_length)
3415 elif line_width > _line_length:
erg@google.com4e00b9a2009-01-12 23:05:11 +00003416 error(filename, linenum, 'whitespace/line_length', 2,
erg@google.comab53edf2013-11-05 22:23:37 +00003417 'Lines should be <= %i characters long' % _line_length)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003418
3419 if (cleansed_line.count(';') > 1 and
3420 # for loops are allowed two ;'s (and may run over two lines).
3421 cleansed_line.find('for') == -1 and
3422 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
3423 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
3424 # It's ok to have many commands in a switch case that fits in 1 line
3425 not ((cleansed_line.find('case ') != -1 or
3426 cleansed_line.find('default:') != -1) and
3427 cleansed_line.find('break;') != -1)):
erg@google.comd350fe52013-01-14 17:51:48 +00003428 error(filename, linenum, 'whitespace/newline', 0,
erg@google.com4e00b9a2009-01-12 23:05:11 +00003429 'More than one command on the same line')
3430
3431 # Some more style checks
3432 CheckBraces(filename, clean_lines, linenum, error)
erg@google.comc6671232013-10-25 21:44:03 +00003433 CheckEmptyBlockBody(filename, clean_lines, linenum, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003434 CheckAccess(filename, clean_lines, linenum, nesting_state, error)
3435 CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003436 CheckCheck(filename, clean_lines, linenum, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003437 CheckAltTokens(filename, clean_lines, linenum, error)
3438 classinfo = nesting_state.InnermostClass()
3439 if classinfo:
3440 CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003441
3442
3443_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
3444_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
3445# Matches the first component of a filename delimited by -s and _s. That is:
3446# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
3447# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
3448# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
3449# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
3450_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
3451
3452
3453def _DropCommonSuffixes(filename):
3454 """Drops common suffixes like _test.cc or -inl.h from filename.
3455
3456 For example:
3457 >>> _DropCommonSuffixes('foo/foo-inl.h')
3458 'foo/foo'
3459 >>> _DropCommonSuffixes('foo/bar/foo.cc')
3460 'foo/bar/foo'
3461 >>> _DropCommonSuffixes('foo/foo_internal.h')
3462 'foo/foo'
3463 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
3464 'foo/foo_unusualinternal'
3465
3466 Args:
3467 filename: The input filename.
3468
3469 Returns:
3470 The filename with the common suffix removed.
3471 """
3472 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
3473 'inl.h', 'impl.h', 'internal.h'):
3474 if (filename.endswith(suffix) and len(filename) > len(suffix) and
3475 filename[-len(suffix) - 1] in ('-', '_')):
3476 return filename[:-len(suffix) - 1]
3477 return os.path.splitext(filename)[0]
3478
3479
3480def _IsTestFilename(filename):
3481 """Determines if the given filename has a suffix that identifies it as a test.
3482
3483 Args:
3484 filename: The input filename.
3485
3486 Returns:
3487 True if 'filename' looks like a test, False otherwise.
3488 """
3489 if (filename.endswith('_test.cc') or
3490 filename.endswith('_unittest.cc') or
3491 filename.endswith('_regtest.cc')):
3492 return True
3493 else:
3494 return False
3495
3496
3497def _ClassifyInclude(fileinfo, include, is_system):
3498 """Figures out what kind of header 'include' is.
3499
3500 Args:
3501 fileinfo: The current file cpplint is running over. A FileInfo instance.
3502 include: The path to a #included file.
3503 is_system: True if the #include used <> rather than "".
3504
3505 Returns:
3506 One of the _XXX_HEADER constants.
3507
3508 For example:
3509 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
3510 _C_SYS_HEADER
3511 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
3512 _CPP_SYS_HEADER
3513 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
3514 _LIKELY_MY_HEADER
3515 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
3516 ... 'bar/foo_other_ext.h', False)
3517 _POSSIBLE_MY_HEADER
3518 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
3519 _OTHER_HEADER
3520 """
3521 # This is a list of all standard c++ header files, except
3522 # those already checked for above.
erg@google.comfd5da632013-10-25 17:39:45 +00003523 is_cpp_h = include in _CPP_HEADERS
erg@google.com4e00b9a2009-01-12 23:05:11 +00003524
3525 if is_system:
3526 if is_cpp_h:
3527 return _CPP_SYS_HEADER
3528 else:
3529 return _C_SYS_HEADER
3530
3531 # If the target file and the include we're checking share a
3532 # basename when we drop common extensions, and the include
3533 # lives in . , then it's likely to be owned by the target file.
3534 target_dir, target_base = (
3535 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
3536 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
3537 if target_base == include_base and (
3538 include_dir == target_dir or
3539 include_dir == os.path.normpath(target_dir + '/../public')):
3540 return _LIKELY_MY_HEADER
3541
3542 # If the target and include share some initial basename
3543 # component, it's possible the target is implementing the
3544 # include, so it's allowed to be first, but we'll never
3545 # complain if it's not there.
3546 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
3547 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
3548 if (target_first_component and include_first_component and
3549 target_first_component.group(0) ==
3550 include_first_component.group(0)):
3551 return _POSSIBLE_MY_HEADER
3552
3553 return _OTHER_HEADER
3554
3555
erg@google.coma87abb82009-02-24 01:41:01 +00003556
erg@google.come35f7652009-06-19 20:52:09 +00003557def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
3558 """Check rules that are applicable to #include lines.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003559
erg@google.come35f7652009-06-19 20:52:09 +00003560 Strings on #include lines are NOT removed from elided line, to make
3561 certain tasks easier. However, to prevent false positives, checks
3562 applicable to #include lines in CheckLanguage must be put here.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003563
3564 Args:
3565 filename: The name of the current file.
3566 clean_lines: A CleansedLines instance containing the file.
3567 linenum: The number of the line to check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003568 include_state: An _IncludeState instance in which the headers are inserted.
3569 error: The function to call with any errors found.
3570 """
3571 fileinfo = FileInfo(filename)
3572
erg@google.come35f7652009-06-19 20:52:09 +00003573 line = clean_lines.lines[linenum]
erg@google.com4e00b9a2009-01-12 23:05:11 +00003574
3575 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.come35f7652009-06-19 20:52:09 +00003576 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003577 error(filename, linenum, 'build/include', 4,
3578 'Include the directory when naming .h files')
3579
3580 # we shouldn't include a file more than once. actually, there are a
3581 # handful of instances where doing so is okay, but in general it's
3582 # not.
erg@google.come35f7652009-06-19 20:52:09 +00003583 match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003584 if match:
3585 include = match.group(2)
3586 is_system = (match.group(1) == '<')
3587 if include in include_state:
3588 error(filename, linenum, 'build/include', 4,
3589 '"%s" already included at %s:%s' %
3590 (include, filename, include_state[include]))
3591 else:
3592 include_state[include] = linenum
3593
3594 # We want to ensure that headers appear in the right order:
3595 # 1) for foo.cc, foo.h (preferred location)
3596 # 2) c system files
3597 # 3) cpp system files
3598 # 4) for foo.cc, foo.h (deprecated location)
3599 # 5) other google headers
3600 #
3601 # We classify each include statement as one of those 5 types
3602 # using a number of techniques. The include_state object keeps
3603 # track of the highest type seen, and complains if we see a
3604 # lower type after that.
3605 error_message = include_state.CheckNextIncludeOrder(
3606 _ClassifyInclude(fileinfo, include, is_system))
3607 if error_message:
3608 error(filename, linenum, 'build/include_order', 4,
3609 '%s. Should be: %s.h, c system, c++ system, other.' %
3610 (error_message, fileinfo.BaseName()))
erg@google.comfd5da632013-10-25 17:39:45 +00003611 canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
3612 if not include_state.IsInAlphabeticalOrder(
3613 clean_lines, linenum, canonical_include):
erg@google.coma868d2d2009-10-09 21:18:45 +00003614 error(filename, linenum, 'build/include_alpha', 4,
3615 'Include "%s" not in alphabetical order' % include)
erg@google.comfd5da632013-10-25 17:39:45 +00003616 include_state.SetLastHeader(canonical_include)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003617
erg@google.come35f7652009-06-19 20:52:09 +00003618 # Look for any of the stream classes that are part of standard C++.
3619 match = _RE_PATTERN_INCLUDE.match(line)
3620 if match:
3621 include = match.group(2)
3622 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
3623 # Many unit tests use cout, so we exempt them.
3624 if not _IsTestFilename(filename):
3625 error(filename, linenum, 'readability/streams', 3,
3626 'Streams are highly discouraged.')
3627
erg@google.com8a95ecc2011-09-08 00:45:54 +00003628
3629def _GetTextInside(text, start_pattern):
erg@google.com2aa59982013-10-28 19:09:25 +00003630 r"""Retrieves all the text between matching open and close parentheses.
erg@google.com8a95ecc2011-09-08 00:45:54 +00003631
3632 Given a string of lines and a regular expression string, retrieve all the text
3633 following the expression and between opening punctuation symbols like
3634 (, [, or {, and the matching close-punctuation symbol. This properly nested
3635 occurrences of the punctuations, so for the text like
3636 printf(a(), b(c()));
3637 a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
3638 start_pattern must match string having an open punctuation symbol at the end.
3639
3640 Args:
3641 text: The lines to extract text. Its comments and strings must be elided.
3642 It can be single line and can span multiple lines.
3643 start_pattern: The regexp string indicating where to start extracting
3644 the text.
3645 Returns:
3646 The extracted text.
3647 None if either the opening string or ending punctuation could not be found.
3648 """
3649 # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
3650 # rewritten to use _GetTextInside (and use inferior regexp matching today).
3651
3652 # Give opening punctuations to get the matching close-punctuations.
3653 matching_punctuation = {'(': ')', '{': '}', '[': ']'}
3654 closing_punctuation = set(matching_punctuation.itervalues())
3655
3656 # Find the position to start extracting text.
3657 match = re.search(start_pattern, text, re.M)
3658 if not match: # start_pattern not found in text.
3659 return None
3660 start_position = match.end(0)
3661
3662 assert start_position > 0, (
3663 'start_pattern must ends with an opening punctuation.')
3664 assert text[start_position - 1] in matching_punctuation, (
3665 'start_pattern must ends with an opening punctuation.')
3666 # Stack of closing punctuations we expect to have in text after position.
3667 punctuation_stack = [matching_punctuation[text[start_position - 1]]]
3668 position = start_position
3669 while punctuation_stack and position < len(text):
3670 if text[position] == punctuation_stack[-1]:
3671 punctuation_stack.pop()
3672 elif text[position] in closing_punctuation:
3673 # A closing punctuation without matching opening punctuations.
3674 return None
3675 elif text[position] in matching_punctuation:
3676 punctuation_stack.append(matching_punctuation[text[position]])
3677 position += 1
3678 if punctuation_stack:
3679 # Opening punctuations left without matching close-punctuations.
3680 return None
3681 # punctuations match.
3682 return text[start_position:position - 1]
3683
3684
erg@google.comfd5da632013-10-25 17:39:45 +00003685# Patterns for matching call-by-reference parameters.
erg@google.com2aa59982013-10-28 19:09:25 +00003686#
3687# Supports nested templates up to 2 levels deep using this messy pattern:
3688# < (?: < (?: < [^<>]*
3689# >
3690# | [^<>] )*
3691# >
3692# | [^<>] )*
3693# >
erg@google.comfd5da632013-10-25 17:39:45 +00003694_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]*
3695_RE_PATTERN_TYPE = (
3696 r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
erg@google.com2aa59982013-10-28 19:09:25 +00003697 r'(?:\w|'
3698 r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
3699 r'::)+')
erg@google.comfd5da632013-10-25 17:39:45 +00003700# A call-by-reference parameter ends with '& identifier'.
3701_RE_PATTERN_REF_PARAM = re.compile(
3702 r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
3703 r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
3704# A call-by-const-reference parameter either ends with 'const& identifier'
3705# or looks like 'const type& identifier' when 'type' is atomic.
3706_RE_PATTERN_CONST_REF_PARAM = (
3707 r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
3708 r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
3709
3710
3711def CheckLanguage(filename, clean_lines, linenum, file_extension,
3712 include_state, nesting_state, error):
erg@google.come35f7652009-06-19 20:52:09 +00003713 """Checks rules from the 'C++ language rules' section of cppguide.html.
3714
3715 Some of these rules are hard to test (function overloading, using
3716 uint32 inappropriately), but we do the best we can.
3717
3718 Args:
3719 filename: The name of the current file.
3720 clean_lines: A CleansedLines instance containing the file.
3721 linenum: The number of the line to check.
3722 file_extension: The extension (without the dot) of the filename.
3723 include_state: An _IncludeState instance in which the headers are inserted.
erg@google.comfd5da632013-10-25 17:39:45 +00003724 nesting_state: A _NestingState instance which maintains information about
3725 the current stack of nested blocks being parsed.
erg@google.come35f7652009-06-19 20:52:09 +00003726 error: The function to call with any errors found.
3727 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00003728 # If the line is empty or consists of entirely a comment, no need to
3729 # check it.
3730 line = clean_lines.elided[linenum]
3731 if not line:
3732 return
3733
erg@google.come35f7652009-06-19 20:52:09 +00003734 match = _RE_PATTERN_INCLUDE.search(line)
3735 if match:
3736 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
3737 return
3738
erg@google.com2aa59982013-10-28 19:09:25 +00003739 # Reset include state across preprocessor directives. This is meant
3740 # to silence warnings for conditional includes.
3741 if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
3742 include_state.ResetSection()
3743
erg@google.com4e00b9a2009-01-12 23:05:11 +00003744 # Make Windows paths like Unix.
3745 fullname = os.path.abspath(filename).replace('\\', '/')
3746
3747 # TODO(unknown): figure out if they're using default arguments in fn proto.
3748
erg@google.com4e00b9a2009-01-12 23:05:11 +00003749 # Check to see if they're using an conversion function cast.
3750 # I just try to capture the most common basic types, though there are more.
3751 # Parameterless conversion functions, such as bool(), are allowed as they are
3752 # probably a member operator declaration or default constructor.
3753 match = Search(
erg@google.coma868d2d2009-10-09 21:18:45 +00003754 r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
erg@google.comc6671232013-10-25 21:44:03 +00003755 r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
3756 r'(\([^)].*)', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003757 if match:
erg@google.comc6671232013-10-25 21:44:03 +00003758 matched_new = match.group(1)
3759 matched_type = match.group(2)
3760 matched_funcptr = match.group(3)
3761
erg@google.com4e00b9a2009-01-12 23:05:11 +00003762 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
3763 # where type may be float(), int(string), etc. Without context they are
erg@google.comd7d27472011-09-07 17:36:35 +00003764 # virtually indistinguishable from int(x) casts. Likewise, gMock's
3765 # MockCallback takes a template parameter of the form return_type(arg_type),
3766 # which looks much like the cast we're trying to detect.
erg@google.comc6671232013-10-25 21:44:03 +00003767 #
3768 # std::function<> wrapper has a similar problem.
3769 #
3770 # Return types for function pointers also look like casts if they
3771 # don't have an extra space.
3772 if (matched_new is None and # If new operator, then this isn't a cast
erg@google.comd7d27472011-09-07 17:36:35 +00003773 not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
erg@google.comc6671232013-10-25 21:44:03 +00003774 Search(r'\bMockCallback<.*>', line) or
3775 Search(r'\bstd::function<.*>', line)) and
3776 not (matched_funcptr and
3777 Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
3778 matched_funcptr))):
erg@google.comd350fe52013-01-14 17:51:48 +00003779 # Try a bit harder to catch gmock lines: the only place where
3780 # something looks like an old-style cast is where we declare the
3781 # return type of the mocked method, and the only time when we
3782 # are missing context is if MOCK_METHOD was split across
erg@google.comc6671232013-10-25 21:44:03 +00003783 # multiple lines. The missing MOCK_METHOD is usually one or two
3784 # lines back, so scan back one or two lines.
3785 #
3786 # It's not possible for gmock macros to appear in the first 2
3787 # lines, since the class head + section name takes up 2 lines.
3788 if (linenum < 2 or
3789 not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
3790 clean_lines.elided[linenum - 1]) or
3791 Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
3792 clean_lines.elided[linenum - 2]))):
erg@google.comd350fe52013-01-14 17:51:48 +00003793 error(filename, linenum, 'readability/casting', 4,
3794 'Using deprecated casting style. '
3795 'Use static_cast<%s>(...) instead' %
erg@google.comc6671232013-10-25 21:44:03 +00003796 matched_type)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003797
3798 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3799 'static_cast',
erg@google.com8a95ecc2011-09-08 00:45:54 +00003800 r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
3801
3802 # This doesn't catch all cases. Consider (const char * const)"hello".
3803 #
3804 # (char *) "foo" should always be a const_cast (reinterpret_cast won't
3805 # compile).
3806 if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3807 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
3808 pass
3809 else:
3810 # Check pointer casts for other than string constants
3811 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3812 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003813
3814 # In addition, we look for people taking the address of a cast. This
3815 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
3816 # point where you think.
erg@google.comc6671232013-10-25 21:44:03 +00003817 match = Search(
3818 r'(?:&\(([^)]+)\)[\w(])|'
3819 r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
3820 if match and match.group(1) != '*':
erg@google.com4e00b9a2009-01-12 23:05:11 +00003821 error(filename, linenum, 'runtime/casting', 4,
3822 ('Are you taking an address of a cast? '
3823 'This is dangerous: could be a temp var. '
3824 'Take the address before doing the cast, rather than after'))
3825
erg@google.comc6671232013-10-25 21:44:03 +00003826 # Create an extended_line, which is the concatenation of the current and
3827 # next lines, for more effective checking of code that may span more than one
3828 # line.
3829 if linenum + 1 < clean_lines.NumLines():
3830 extended_line = line + clean_lines.elided[linenum + 1]
3831 else:
3832 extended_line = line
3833
erg@google.com4e00b9a2009-01-12 23:05:11 +00003834 # Check for people declaring static/global STL strings at the top level.
3835 # This is dangerous because the C++ language does not guarantee that
3836 # globals with constructors are initialized before the first access.
3837 match = Match(
3838 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
3839 line)
3840 # Make sure it's not a function.
3841 # Function template specialization looks like: "string foo<Type>(...".
3842 # Class template definitions look like: "string Foo<Type>::Method(...".
erg@google.com2aa59982013-10-28 19:09:25 +00003843 #
3844 # Also ignore things that look like operators. These are matched separately
3845 # because operator names cross non-word boundaries. If we change the pattern
3846 # above, we would decrease the accuracy of matching identifiers.
3847 if (match and
3848 not Search(r'\boperator\W', line) and
3849 not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003850 error(filename, linenum, 'runtime/string', 4,
3851 'For a static/global string constant, use a C style string instead: '
3852 '"%schar %s[]".' %
3853 (match.group(1), match.group(2)))
3854
erg@google.com4e00b9a2009-01-12 23:05:11 +00003855 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
3856 error(filename, linenum, 'runtime/init', 4,
3857 'You seem to be initializing a member variable with itself.')
3858
3859 if file_extension == 'h':
3860 # TODO(unknown): check that 1-arg constructors are explicit.
3861 # How to tell it's a constructor?
3862 # (handled in CheckForNonStandardConstructs for now)
3863 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
3864 # (level 1 error)
3865 pass
3866
3867 # Check if people are using the verboten C basic types. The only exception
3868 # we regularly allow is "unsigned short port" for port.
3869 if Search(r'\bshort port\b', line):
3870 if not Search(r'\bunsigned short port\b', line):
3871 error(filename, linenum, 'runtime/int', 4,
3872 'Use "unsigned short" for ports, not "short"')
3873 else:
3874 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
3875 if match:
3876 error(filename, linenum, 'runtime/int', 4,
3877 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
3878
3879 # When snprintf is used, the second argument shouldn't be a literal.
3880 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
erg+personal@google.com05189642010-04-30 20:43:03 +00003881 if match and match.group(2) != '0':
3882 # If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003883 error(filename, linenum, 'runtime/printf', 3,
3884 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
3885 'to snprintf.' % (match.group(1), match.group(2)))
3886
3887 # Check if some verboten C functions are being used.
3888 if Search(r'\bsprintf\b', line):
3889 error(filename, linenum, 'runtime/printf', 5,
3890 'Never use sprintf. Use snprintf instead.')
3891 match = Search(r'\b(strcpy|strcat)\b', line)
3892 if match:
3893 error(filename, linenum, 'runtime/printf', 4,
3894 'Almost always, snprintf is better than %s' % match.group(1))
3895
erg@google.coma868d2d2009-10-09 21:18:45 +00003896 # Check if some verboten operator overloading is going on
3897 # TODO(unknown): catch out-of-line unary operator&:
3898 # class X {};
3899 # int operator&(const X& x) { return 42; } // unary operator&
3900 # The trick is it's hard to tell apart from binary operator&:
3901 # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
3902 if Search(r'\boperator\s*&\s*\(\s*\)', line):
3903 error(filename, linenum, 'runtime/operator', 4,
3904 'Unary operator& is dangerous. Do not use it.')
3905
erg@google.com4e00b9a2009-01-12 23:05:11 +00003906 # Check for suspicious usage of "if" like
3907 # } if (a == b) {
3908 if Search(r'\}\s*if\s*\(', line):
3909 error(filename, linenum, 'readability/braces', 4,
3910 'Did you mean "else if"? If not, start a new line for "if".')
3911
3912 # Check for potential format string bugs like printf(foo).
3913 # We constrain the pattern not to pick things like DocidForPrintf(foo).
3914 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
erg@google.com8a95ecc2011-09-08 00:45:54 +00003915 # TODO(sugawarayu): Catch the following case. Need to change the calling
3916 # convention of the whole function to process multiple line to handle it.
3917 # printf(
3918 # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
3919 printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
3920 if printf_args:
3921 match = Match(r'([\w.\->()]+)$', printf_args)
erg@google.comd350fe52013-01-14 17:51:48 +00003922 if match and match.group(1) != '__VA_ARGS__':
erg@google.com8a95ecc2011-09-08 00:45:54 +00003923 function_name = re.search(r'\b((?:string)?printf)\s*\(',
3924 line, re.I).group(1)
3925 error(filename, linenum, 'runtime/printf', 4,
3926 'Potential format string bug. Do %s("%%s", %s) instead.'
3927 % (function_name, match.group(1)))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003928
3929 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
3930 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
3931 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
3932 error(filename, linenum, 'runtime/memset', 4,
3933 'Did you mean "memset(%s, 0, %s)"?'
3934 % (match.group(1), match.group(2)))
3935
3936 if Search(r'\busing namespace\b', line):
3937 error(filename, linenum, 'build/namespaces', 5,
3938 'Do not use namespace using-directives. '
3939 'Use using-declarations instead.')
3940
3941 # Detect variable-length arrays.
3942 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
3943 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
3944 match.group(3).find(']') == -1):
3945 # Split the size using space and arithmetic operators as delimiters.
3946 # If any of the resulting tokens are not compile time constants then
3947 # report the error.
3948 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
3949 is_const = True
3950 skip_next = False
3951 for tok in tokens:
3952 if skip_next:
3953 skip_next = False
3954 continue
3955
3956 if Search(r'sizeof\(.+\)', tok): continue
3957 if Search(r'arraysize\(\w+\)', tok): continue
3958
3959 tok = tok.lstrip('(')
3960 tok = tok.rstrip(')')
3961 if not tok: continue
3962 if Match(r'\d+', tok): continue
3963 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
3964 if Match(r'k[A-Z0-9]\w*', tok): continue
3965 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
3966 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
3967 # A catch all for tricky sizeof cases, including 'sizeof expression',
3968 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
erg@google.com8a95ecc2011-09-08 00:45:54 +00003969 # requires skipping the next token because we split on ' ' and '*'.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003970 if tok.startswith('sizeof'):
3971 skip_next = True
3972 continue
3973 is_const = False
3974 break
3975 if not is_const:
3976 error(filename, linenum, 'runtime/arrays', 1,
3977 'Do not use variable-length arrays. Use an appropriately named '
3978 "('k' followed by CamelCase) compile-time constant for the size.")
3979
3980 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
3981 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
3982 # in the class declaration.
3983 match = Match(
3984 (r'\s*'
3985 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
3986 r'\(.*\);$'),
3987 line)
3988 if match and linenum + 1 < clean_lines.NumLines():
3989 next_line = clean_lines.elided[linenum + 1]
erg@google.com8a95ecc2011-09-08 00:45:54 +00003990 # We allow some, but not all, declarations of variables to be present
3991 # in the statement that defines the class. The [\w\*,\s]* fragment of
3992 # the regular expression below allows users to declare instances of
3993 # the class or pointers to instances, but not less common types such
3994 # as function pointers or arrays. It's a tradeoff between allowing
3995 # reasonable code and avoiding trying to parse more C++ using regexps.
3996 if not Search(r'^\s*}[\w\*,\s]*;', next_line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003997 error(filename, linenum, 'readability/constructors', 3,
3998 match.group(1) + ' should be the last thing in the class')
3999
4000 # Check for use of unnamed namespaces in header files. Registration
4001 # macros are typically OK, so we allow use of "namespace {" on lines
4002 # that end with backslashes.
4003 if (file_extension == 'h'
4004 and Search(r'\bnamespace\s*{', line)
4005 and line[-1] != '\\'):
4006 error(filename, linenum, 'build/namespaces', 4,
4007 'Do not use unnamed namespaces in header files. See '
4008 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
4009 ' for more information.')
4010
erg@google.comc6671232013-10-25 21:44:03 +00004011def CheckForNonConstReference(filename, clean_lines, linenum,
4012 nesting_state, error):
4013 """Check for non-const references.
4014
4015 Separate from CheckLanguage since it scans backwards from current
4016 line, instead of scanning forward.
4017
4018 Args:
4019 filename: The name of the current file.
4020 clean_lines: A CleansedLines instance containing the file.
4021 linenum: The number of the line to check.
4022 nesting_state: A _NestingState instance which maintains information about
4023 the current stack of nested blocks being parsed.
4024 error: The function to call with any errors found.
4025 """
4026 # Do nothing if there is no '&' on current line.
4027 line = clean_lines.elided[linenum]
4028 if '&' not in line:
4029 return
4030
erg@google.com2aa59982013-10-28 19:09:25 +00004031 # Long type names may be broken across multiple lines, usually in one
4032 # of these forms:
4033 # LongType
4034 # ::LongTypeContinued &identifier
4035 # LongType::
4036 # LongTypeContinued &identifier
4037 # LongType<
4038 # ...>::LongTypeContinued &identifier
4039 #
4040 # If we detected a type split across two lines, join the previous
4041 # line to current line so that we can match const references
4042 # accordingly.
erg@google.comc6671232013-10-25 21:44:03 +00004043 #
4044 # Note that this only scans back one line, since scanning back
4045 # arbitrary number of lines would be expensive. If you have a type
4046 # that spans more than 2 lines, please use a typedef.
4047 if linenum > 1:
4048 previous = None
erg@google.com2aa59982013-10-28 19:09:25 +00004049 if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
erg@google.comc6671232013-10-25 21:44:03 +00004050 # previous_line\n + ::current_line
erg@google.com2aa59982013-10-28 19:09:25 +00004051 previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
erg@google.comc6671232013-10-25 21:44:03 +00004052 clean_lines.elided[linenum - 1])
erg@google.com2aa59982013-10-28 19:09:25 +00004053 elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
erg@google.comc6671232013-10-25 21:44:03 +00004054 # previous_line::\n + current_line
erg@google.com2aa59982013-10-28 19:09:25 +00004055 previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
erg@google.comc6671232013-10-25 21:44:03 +00004056 clean_lines.elided[linenum - 1])
4057 if previous:
4058 line = previous.group(1) + line.lstrip()
erg@google.com2aa59982013-10-28 19:09:25 +00004059 else:
4060 # Check for templated parameter that is split across multiple lines
4061 endpos = line.rfind('>')
4062 if endpos > -1:
4063 (_, startline, startpos) = ReverseCloseExpression(
4064 clean_lines, linenum, endpos)
4065 if startpos > -1 and startline < linenum:
4066 # Found the matching < on an earlier line, collect all
4067 # pieces up to current line.
4068 line = ''
4069 for i in xrange(startline, linenum + 1):
4070 line += clean_lines.elided[i].strip()
erg@google.comc6671232013-10-25 21:44:03 +00004071
4072 # Check for non-const references in function parameters. A single '&' may
4073 # found in the following places:
4074 # inside expression: binary & for bitwise AND
4075 # inside expression: unary & for taking the address of something
4076 # inside declarators: reference parameter
4077 # We will exclude the first two cases by checking that we are not inside a
4078 # function body, including one that was just introduced by a trailing '{'.
4079 # TODO(unknwon): Doesn't account for preprocessor directives.
4080 # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
4081 check_params = False
4082 if not nesting_state.stack:
4083 check_params = True # top level
4084 elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
4085 isinstance(nesting_state.stack[-1], _NamespaceInfo)):
4086 check_params = True # within class or namespace
4087 elif Match(r'.*{\s*$', line):
4088 if (len(nesting_state.stack) == 1 or
4089 isinstance(nesting_state.stack[-2], _ClassInfo) or
4090 isinstance(nesting_state.stack[-2], _NamespaceInfo)):
4091 check_params = True # just opened global/class/namespace block
4092 # We allow non-const references in a few standard places, like functions
4093 # called "swap()" or iostream operators like "<<" or ">>". Do not check
4094 # those function parameters.
4095 #
4096 # We also accept & in static_assert, which looks like a function but
4097 # it's actually a declaration expression.
4098 whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
4099 r'operator\s*[<>][<>]|'
4100 r'static_assert|COMPILE_ASSERT'
4101 r')\s*\(')
4102 if Search(whitelisted_functions, line):
4103 check_params = False
4104 elif not Search(r'\S+\([^)]*$', line):
4105 # Don't see a whitelisted function on this line. Actually we
4106 # didn't see any function name on this line, so this is likely a
4107 # multi-line parameter list. Try a bit harder to catch this case.
4108 for i in xrange(2):
4109 if (linenum > i and
4110 Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
4111 check_params = False
4112 break
4113
4114 if check_params:
4115 decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
4116 for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
4117 if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
4118 error(filename, linenum, 'runtime/references', 2,
4119 'Is this a non-const reference? '
erg@google.com2aa59982013-10-28 19:09:25 +00004120 'If so, make const or use a pointer: ' +
4121 ReplaceAll(' *<', '<', parameter))
erg@google.comc6671232013-10-25 21:44:03 +00004122
erg@google.com4e00b9a2009-01-12 23:05:11 +00004123
4124def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
4125 error):
4126 """Checks for a C-style cast by looking for the pattern.
4127
erg@google.com4e00b9a2009-01-12 23:05:11 +00004128 Args:
4129 filename: The name of the current file.
4130 linenum: The number of the line to check.
4131 line: The line of code to check.
4132 raw_line: The raw line of code to check, with comments.
4133 cast_type: The string for the C++ cast to recommend. This is either
erg@google.com8a95ecc2011-09-08 00:45:54 +00004134 reinterpret_cast, static_cast, or const_cast, depending.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004135 pattern: The regular expression used to find C-style casts.
4136 error: The function to call with any errors found.
erg@google.com8a95ecc2011-09-08 00:45:54 +00004137
4138 Returns:
4139 True if an error was emitted.
4140 False otherwise.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004141 """
4142 match = Search(pattern, line)
4143 if not match:
erg@google.com8a95ecc2011-09-08 00:45:54 +00004144 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +00004145
erg@google.comfd5da632013-10-25 17:39:45 +00004146 # Exclude lines with sizeof, since sizeof looks like a cast.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004147 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
4148 if sizeof_match:
erg@google.comfd5da632013-10-25 17:39:45 +00004149 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +00004150
erg@google.comd350fe52013-01-14 17:51:48 +00004151 # operator++(int) and operator--(int)
4152 if (line[0:match.start(1) - 1].endswith(' operator++') or
4153 line[0:match.start(1) - 1].endswith(' operator--')):
4154 return False
4155
erg@google.comc6671232013-10-25 21:44:03 +00004156 # A single unnamed argument for a function tends to look like old
4157 # style cast. If we see those, don't issue warnings for deprecated
4158 # casts, instead issue warnings for unnamed arguments where
4159 # appropriate.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004160 #
erg@google.comc6671232013-10-25 21:44:03 +00004161 # These are things that we want warnings for, since the style guide
4162 # explicitly require all parameters to be named:
4163 # Function(int);
4164 # Function(int) {
4165 # ConstMember(int) const;
4166 # ConstMember(int) const {
4167 # ExceptionMember(int) throw (...);
4168 # ExceptionMember(int) throw (...) {
4169 # PureVirtual(int) = 0;
4170 #
4171 # These are functions of some sort, where the compiler would be fine
4172 # if they had named parameters, but people often omit those
4173 # identifiers to reduce clutter:
4174 # (FunctionPointer)(int);
4175 # (FunctionPointer)(int) = value;
4176 # Function((function_pointer_arg)(int))
4177 # <TemplateArgument(int)>;
4178 # <(FunctionPointerTemplateArgument)(int)>;
4179 remainder = line[match.end(0):]
4180 if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder):
4181 # Looks like an unnamed parameter.
4182
4183 # Don't warn on any kind of template arguments.
4184 if Match(r'^\s*>', remainder):
4185 return False
4186
4187 # Don't warn on assignments to function pointers, but keep warnings for
4188 # unnamed parameters to pure virtual functions. Note that this pattern
4189 # will also pass on assignments of "0" to function pointers, but the
4190 # preferred values for those would be "nullptr" or "NULL".
4191 matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
4192 if matched_zero and matched_zero.group(1) != '0':
4193 return False
4194
4195 # Don't warn on function pointer declarations. For this we need
4196 # to check what came before the "(type)" string.
4197 if Match(r'.*\)\s*$', line[0:match.start(0)]):
4198 return False
4199
4200 # Don't warn if the parameter is named with block comments, e.g.:
4201 # Function(int /*unused_param*/);
4202 if '/*' in raw_line:
4203 return False
4204
4205 # Passed all filters, issue warning here.
4206 error(filename, linenum, 'readability/function', 3,
4207 'All parameters should be named in a function')
erg@google.com8a95ecc2011-09-08 00:45:54 +00004208 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +00004209
4210 # At this point, all that should be left is actual casts.
4211 error(filename, linenum, 'readability/casting', 4,
4212 'Using C-style cast. Use %s<%s>(...) instead' %
4213 (cast_type, match.group(1)))
4214
erg@google.com8a95ecc2011-09-08 00:45:54 +00004215 return True
4216
erg@google.com4e00b9a2009-01-12 23:05:11 +00004217
4218_HEADERS_CONTAINING_TEMPLATES = (
4219 ('<deque>', ('deque',)),
4220 ('<functional>', ('unary_function', 'binary_function',
4221 'plus', 'minus', 'multiplies', 'divides', 'modulus',
4222 'negate',
4223 'equal_to', 'not_equal_to', 'greater', 'less',
4224 'greater_equal', 'less_equal',
4225 'logical_and', 'logical_or', 'logical_not',
4226 'unary_negate', 'not1', 'binary_negate', 'not2',
4227 'bind1st', 'bind2nd',
4228 'pointer_to_unary_function',
4229 'pointer_to_binary_function',
4230 'ptr_fun',
4231 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
4232 'mem_fun_ref_t',
4233 'const_mem_fun_t', 'const_mem_fun1_t',
4234 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
4235 'mem_fun_ref',
4236 )),
4237 ('<limits>', ('numeric_limits',)),
4238 ('<list>', ('list',)),
4239 ('<map>', ('map', 'multimap',)),
4240 ('<memory>', ('allocator',)),
4241 ('<queue>', ('queue', 'priority_queue',)),
4242 ('<set>', ('set', 'multiset',)),
4243 ('<stack>', ('stack',)),
4244 ('<string>', ('char_traits', 'basic_string',)),
4245 ('<utility>', ('pair',)),
4246 ('<vector>', ('vector',)),
4247
4248 # gcc extensions.
4249 # Note: std::hash is their hash, ::hash is our hash
4250 ('<hash_map>', ('hash_map', 'hash_multimap',)),
4251 ('<hash_set>', ('hash_set', 'hash_multiset',)),
4252 ('<slist>', ('slist',)),
4253 )
4254
erg@google.com4e00b9a2009-01-12 23:05:11 +00004255_RE_PATTERN_STRING = re.compile(r'\bstring\b')
4256
4257_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00004258for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
4259 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004260 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
4261 # type::max().
4262 _re_pattern_algorithm_header.append(
4263 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
4264 _template,
4265 '<algorithm>'))
4266
4267_re_pattern_templates = []
4268for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
4269 for _template in _templates:
4270 _re_pattern_templates.append(
4271 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
4272 _template + '<>',
4273 _header))
4274
4275
erg@google.come35f7652009-06-19 20:52:09 +00004276def FilesBelongToSameModule(filename_cc, filename_h):
4277 """Check if these two filenames belong to the same module.
4278
4279 The concept of a 'module' here is a as follows:
4280 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
4281 same 'module' if they are in the same directory.
4282 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
4283 to belong to the same module here.
4284
4285 If the filename_cc contains a longer path than the filename_h, for example,
4286 '/absolute/path/to/base/sysinfo.cc', and this file would include
4287 'base/sysinfo.h', this function also produces the prefix needed to open the
4288 header. This is used by the caller of this function to more robustly open the
4289 header file. We don't have access to the real include paths in this context,
4290 so we need this guesswork here.
4291
4292 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
4293 according to this implementation. Because of this, this function gives
4294 some false positives. This should be sufficiently rare in practice.
4295
4296 Args:
4297 filename_cc: is the path for the .cc file
4298 filename_h: is the path for the header path
4299
4300 Returns:
4301 Tuple with a bool and a string:
4302 bool: True if filename_cc and filename_h belong to the same module.
4303 string: the additional prefix needed to open the header file.
4304 """
4305
4306 if not filename_cc.endswith('.cc'):
4307 return (False, '')
4308 filename_cc = filename_cc[:-len('.cc')]
4309 if filename_cc.endswith('_unittest'):
4310 filename_cc = filename_cc[:-len('_unittest')]
4311 elif filename_cc.endswith('_test'):
4312 filename_cc = filename_cc[:-len('_test')]
4313 filename_cc = filename_cc.replace('/public/', '/')
4314 filename_cc = filename_cc.replace('/internal/', '/')
4315
4316 if not filename_h.endswith('.h'):
4317 return (False, '')
4318 filename_h = filename_h[:-len('.h')]
4319 if filename_h.endswith('-inl'):
4320 filename_h = filename_h[:-len('-inl')]
4321 filename_h = filename_h.replace('/public/', '/')
4322 filename_h = filename_h.replace('/internal/', '/')
4323
4324 files_belong_to_same_module = filename_cc.endswith(filename_h)
4325 common_path = ''
4326 if files_belong_to_same_module:
4327 common_path = filename_cc[:-len(filename_h)]
4328 return files_belong_to_same_module, common_path
4329
4330
4331def UpdateIncludeState(filename, include_state, io=codecs):
4332 """Fill up the include_state with new includes found from the file.
4333
4334 Args:
4335 filename: the name of the header to read.
4336 include_state: an _IncludeState instance in which the headers are inserted.
4337 io: The io factory to use to read the file. Provided for testability.
4338
4339 Returns:
4340 True if a header was succesfully added. False otherwise.
4341 """
4342 headerfile = None
4343 try:
4344 headerfile = io.open(filename, 'r', 'utf8', 'replace')
4345 except IOError:
4346 return False
4347 linenum = 0
4348 for line in headerfile:
4349 linenum += 1
4350 clean_line = CleanseComments(line)
4351 match = _RE_PATTERN_INCLUDE.search(clean_line)
4352 if match:
4353 include = match.group(2)
4354 # The value formatting is cute, but not really used right now.
4355 # What matters here is that the key is in include_state.
4356 include_state.setdefault(include, '%s:%d' % (filename, linenum))
4357 return True
4358
4359
4360def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
4361 io=codecs):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004362 """Reports for missing stl includes.
4363
4364 This function will output warnings to make sure you are including the headers
4365 necessary for the stl containers and functions that you use. We only give one
4366 reason to include a header. For example, if you use both equal_to<> and
4367 less<> in a .h file, only one (the latter in the file) of these will be
4368 reported as a reason to include the <functional>.
4369
erg@google.com4e00b9a2009-01-12 23:05:11 +00004370 Args:
4371 filename: The name of the current file.
4372 clean_lines: A CleansedLines instance containing the file.
4373 include_state: An _IncludeState instance.
4374 error: The function to call with any errors found.
erg@google.come35f7652009-06-19 20:52:09 +00004375 io: The IO factory to use to read the header file. Provided for unittest
4376 injection.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004377 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00004378 required = {} # A map of header name to linenumber and the template entity.
4379 # Example of required: { '<functional>': (1219, 'less<>') }
4380
4381 for linenum in xrange(clean_lines.NumLines()):
4382 line = clean_lines.elided[linenum]
4383 if not line or line[0] == '#':
4384 continue
4385
4386 # String is special -- it is a non-templatized type in STL.
erg@google.com8a95ecc2011-09-08 00:45:54 +00004387 matched = _RE_PATTERN_STRING.search(line)
4388 if matched:
erg+personal@google.com05189642010-04-30 20:43:03 +00004389 # Don't warn about strings in non-STL namespaces:
4390 # (We check only the first match per line; good enough.)
erg@google.com8a95ecc2011-09-08 00:45:54 +00004391 prefix = line[:matched.start()]
erg+personal@google.com05189642010-04-30 20:43:03 +00004392 if prefix.endswith('std::') or not prefix.endswith('::'):
4393 required['<string>'] = (linenum, 'string')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004394
4395 for pattern, template, header in _re_pattern_algorithm_header:
4396 if pattern.search(line):
4397 required[header] = (linenum, template)
4398
4399 # The following function is just a speed up, no semantics are changed.
4400 if not '<' in line: # Reduces the cpu time usage by skipping lines.
4401 continue
4402
4403 for pattern, template, header in _re_pattern_templates:
4404 if pattern.search(line):
4405 required[header] = (linenum, template)
4406
erg@google.come35f7652009-06-19 20:52:09 +00004407 # The policy is that if you #include something in foo.h you don't need to
4408 # include it again in foo.cc. Here, we will look at possible includes.
4409 # Let's copy the include_state so it is only messed up within this function.
4410 include_state = include_state.copy()
4411
4412 # Did we find the header for this file (if any) and succesfully load it?
4413 header_found = False
4414
4415 # Use the absolute path so that matching works properly.
erg@google.com90ecb622012-01-30 19:34:23 +00004416 abs_filename = FileInfo(filename).FullName()
erg@google.come35f7652009-06-19 20:52:09 +00004417
4418 # For Emacs's flymake.
4419 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
4420 # by flymake and that file name might end with '_flymake.cc'. In that case,
4421 # restore original file name here so that the corresponding header file can be
4422 # found.
4423 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
4424 # instead of 'foo_flymake.h'
erg+personal@google.com05189642010-04-30 20:43:03 +00004425 abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.come35f7652009-06-19 20:52:09 +00004426
4427 # include_state is modified during iteration, so we iterate over a copy of
4428 # the keys.
erg@google.com8a95ecc2011-09-08 00:45:54 +00004429 header_keys = include_state.keys()
4430 for header in header_keys:
erg@google.come35f7652009-06-19 20:52:09 +00004431 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
4432 fullpath = common_path + header
4433 if same_module and UpdateIncludeState(fullpath, include_state, io):
4434 header_found = True
4435
4436 # If we can't find the header file for a .cc, assume it's because we don't
4437 # know where to look. In that case we'll give up as we're not sure they
4438 # didn't include it in the .h file.
4439 # TODO(unknown): Do a better job of finding .h files so we are confident that
4440 # not having the .h file means there isn't one.
4441 if filename.endswith('.cc') and not header_found:
4442 return
4443
erg@google.com4e00b9a2009-01-12 23:05:11 +00004444 # All the lines have been processed, report the errors found.
4445 for required_header_unstripped in required:
4446 template = required[required_header_unstripped][1]
erg@google.com4e00b9a2009-01-12 23:05:11 +00004447 if required_header_unstripped.strip('<>"') not in include_state:
4448 error(filename, required[required_header_unstripped][0],
4449 'build/include_what_you_use', 4,
4450 'Add #include ' + required_header_unstripped + ' for ' + template)
4451
4452
erg@google.com8a95ecc2011-09-08 00:45:54 +00004453_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
4454
4455
4456def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
4457 """Check that make_pair's template arguments are deduced.
4458
4459 G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
4460 specified explicitly, and such use isn't intended in any case.
4461
4462 Args:
4463 filename: The name of the current file.
4464 clean_lines: A CleansedLines instance containing the file.
4465 linenum: The number of the line to check.
4466 error: The function to call with any errors found.
4467 """
erg@google.com2aa59982013-10-28 19:09:25 +00004468 line = clean_lines.elided[linenum]
erg@google.com8a95ecc2011-09-08 00:45:54 +00004469 match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
4470 if match:
4471 error(filename, linenum, 'build/explicit_make_pair',
4472 4, # 4 = high confidence
erg@google.comd350fe52013-01-14 17:51:48 +00004473 'For C++11-compatibility, omit template arguments from make_pair'
4474 ' OR use pair directly OR if appropriate, construct a pair directly')
erg@google.com8a95ecc2011-09-08 00:45:54 +00004475
4476
erg@google.comd350fe52013-01-14 17:51:48 +00004477def ProcessLine(filename, file_extension, clean_lines, line,
4478 include_state, function_state, nesting_state, error,
4479 extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004480 """Processes a single line in the file.
4481
4482 Args:
4483 filename: Filename of the file that is being processed.
4484 file_extension: The extension (dot not included) of the file.
4485 clean_lines: An array of strings, each representing a line of the file,
4486 with comments stripped.
4487 line: Number of line being processed.
4488 include_state: An _IncludeState instance in which the headers are inserted.
4489 function_state: A _FunctionState instance which counts function lines, etc.
erg@google.comd350fe52013-01-14 17:51:48 +00004490 nesting_state: A _NestingState instance which maintains information about
4491 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004492 error: A callable to which errors are reported, which takes 4 arguments:
4493 filename, line number, error level, and message
erg@google.comefeacdf2011-09-07 21:12:16 +00004494 extra_check_functions: An array of additional check functions that will be
4495 run on each source line. Each function takes 4
4496 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004497 """
4498 raw_lines = clean_lines.raw_lines
erg+personal@google.com05189642010-04-30 20:43:03 +00004499 ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.comd350fe52013-01-14 17:51:48 +00004500 nesting_state.Update(filename, clean_lines, line, error)
4501 if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
4502 return
erg@google.com4e00b9a2009-01-12 23:05:11 +00004503 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004504 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
erg@google.comd350fe52013-01-14 17:51:48 +00004505 CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004506 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
erg@google.comfd5da632013-10-25 17:39:45 +00004507 nesting_state, error)
erg@google.comc6671232013-10-25 21:44:03 +00004508 CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004509 CheckForNonStandardConstructs(filename, clean_lines, line,
erg@google.comd350fe52013-01-14 17:51:48 +00004510 nesting_state, error)
erg@google.com2aa59982013-10-28 19:09:25 +00004511 CheckVlogArguments(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004512 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com36649102009-03-25 21:18:36 +00004513 CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com8a95ecc2011-09-08 00:45:54 +00004514 CheckMakePairUsesDeduction(filename, clean_lines, line, error)
erg@google.comefeacdf2011-09-07 21:12:16 +00004515 for check_fn in extra_check_functions:
4516 check_fn(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004517
erg@google.comefeacdf2011-09-07 21:12:16 +00004518def ProcessFileData(filename, file_extension, lines, error,
4519 extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004520 """Performs lint checks and reports any errors to the given error function.
4521
4522 Args:
4523 filename: Filename of the file that is being processed.
4524 file_extension: The extension (dot not included) of the file.
4525 lines: An array of strings, each representing a line of the file, with the
erg@google.com8a95ecc2011-09-08 00:45:54 +00004526 last element being empty if the file is terminated with a newline.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004527 error: A callable to which errors are reported, which takes 4 arguments:
erg@google.comefeacdf2011-09-07 21:12:16 +00004528 filename, line number, error level, and message
4529 extra_check_functions: An array of additional check functions that will be
4530 run on each source line. Each function takes 4
4531 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004532 """
4533 lines = (['// marker so line numbers and indices both start at 1'] + lines +
4534 ['// marker so line numbers end in a known way'])
4535
4536 include_state = _IncludeState()
4537 function_state = _FunctionState()
erg@google.comd350fe52013-01-14 17:51:48 +00004538 nesting_state = _NestingState()
erg@google.com4e00b9a2009-01-12 23:05:11 +00004539
erg+personal@google.com05189642010-04-30 20:43:03 +00004540 ResetNolintSuppressions()
4541
erg@google.com4e00b9a2009-01-12 23:05:11 +00004542 CheckForCopyright(filename, lines, error)
4543
4544 if file_extension == 'h':
4545 CheckForHeaderGuard(filename, lines, error)
4546
4547 RemoveMultiLineComments(filename, lines, error)
4548 clean_lines = CleansedLines(lines)
4549 for line in xrange(clean_lines.NumLines()):
4550 ProcessLine(filename, file_extension, clean_lines, line,
erg@google.comd350fe52013-01-14 17:51:48 +00004551 include_state, function_state, nesting_state, error,
erg@google.comefeacdf2011-09-07 21:12:16 +00004552 extra_check_functions)
erg@google.com2aa59982013-10-28 19:09:25 +00004553 nesting_state.CheckCompletedBlocks(filename, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004554
4555 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
4556
4557 # We check here rather than inside ProcessLine so that we see raw
4558 # lines rather than "cleaned" lines.
erg@google.com2aa59982013-10-28 19:09:25 +00004559 CheckForBadCharacters(filename, lines, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004560
4561 CheckForNewlineAtEOF(filename, lines, error)
4562
erg@google.comefeacdf2011-09-07 21:12:16 +00004563def ProcessFile(filename, vlevel, extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004564 """Does google-lint on a single file.
4565
4566 Args:
4567 filename: The name of the file to parse.
4568
4569 vlevel: The level of errors to report. Every error of confidence
4570 >= verbose_level will be reported. 0 is a good default.
erg@google.comefeacdf2011-09-07 21:12:16 +00004571
4572 extra_check_functions: An array of additional check functions that will be
4573 run on each source line. Each function takes 4
4574 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004575 """
4576
4577 _SetVerboseLevel(vlevel)
4578
4579 try:
4580 # Support the UNIX convention of using "-" for stdin. Note that
4581 # we are not opening the file with universal newline support
4582 # (which codecs doesn't support anyway), so the resulting lines do
4583 # contain trailing '\r' characters if we are reading a file that
4584 # has CRLF endings.
4585 # If after the split a trailing '\r' is present, it is removed
4586 # below. If it is not expected to be present (i.e. os.linesep !=
4587 # '\r\n' as in Windows), a warning is issued below if this file
4588 # is processed.
4589
4590 if filename == '-':
4591 lines = codecs.StreamReaderWriter(sys.stdin,
4592 codecs.getreader('utf8'),
4593 codecs.getwriter('utf8'),
4594 'replace').read().split('\n')
4595 else:
4596 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
4597
4598 carriage_return_found = False
4599 # Remove trailing '\r'.
4600 for linenum in range(len(lines)):
4601 if lines[linenum].endswith('\r'):
4602 lines[linenum] = lines[linenum].rstrip('\r')
4603 carriage_return_found = True
4604
4605 except IOError:
4606 sys.stderr.write(
4607 "Skipping input '%s': Can't open for reading\n" % filename)
4608 return
4609
4610 # Note, if no dot is found, this will give the entire filename as the ext.
4611 file_extension = filename[filename.rfind('.') + 1:]
4612
4613 # When reading from stdin, the extension is unknown, so no cpplint tests
4614 # should rely on the extension.
erg@google.com2aa59982013-10-28 19:09:25 +00004615 valid_extensions = ['cc', 'h', 'cpp', 'cu', 'cuh']
4616 if filename != '-' and file_extension not in valid_extensions:
4617 sys.stderr.write('Ignoring %s; not a valid file name '
4618 '(.cc, .h, .cpp, .cu, .cuh)\n' % filename)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004619 else:
erg@google.comefeacdf2011-09-07 21:12:16 +00004620 ProcessFileData(filename, file_extension, lines, Error,
4621 extra_check_functions)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004622 if carriage_return_found and os.linesep != '\r\n':
erg@google.com8a95ecc2011-09-08 00:45:54 +00004623 # Use 0 for linenum since outputting only one error for potentially
erg@google.com4e00b9a2009-01-12 23:05:11 +00004624 # several lines.
4625 Error(filename, 0, 'whitespace/newline', 1,
4626 'One or more unexpected \\r (^M) found;'
4627 'better to use only a \\n')
4628
4629 sys.stderr.write('Done processing %s\n' % filename)
4630
4631
4632def PrintUsage(message):
4633 """Prints a brief usage string and exits, optionally with an error message.
4634
4635 Args:
4636 message: The optional error message.
4637 """
4638 sys.stderr.write(_USAGE)
4639 if message:
4640 sys.exit('\nFATAL ERROR: ' + message)
4641 else:
4642 sys.exit(1)
4643
4644
4645def PrintCategories():
4646 """Prints a list of all the error-categories used by error messages.
4647
4648 These are the categories used to filter messages via --filter.
4649 """
erg+personal@google.com05189642010-04-30 20:43:03 +00004650 sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com4e00b9a2009-01-12 23:05:11 +00004651 sys.exit(0)
4652
4653
4654def ParseArguments(args):
4655 """Parses the command line arguments.
4656
4657 This may set the output format and verbosity level as side-effects.
4658
4659 Args:
4660 args: The command line arguments:
4661
4662 Returns:
4663 The list of filenames to lint.
4664 """
4665 try:
4666 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.coma868d2d2009-10-09 21:18:45 +00004667 'counting=',
erg@google.com4d70a882013-04-16 21:06:32 +00004668 'filter=',
erg@google.comab53edf2013-11-05 22:23:37 +00004669 'root=',
4670 'linelength='])
erg@google.com4e00b9a2009-01-12 23:05:11 +00004671 except getopt.GetoptError:
4672 PrintUsage('Invalid arguments.')
4673
4674 verbosity = _VerboseLevel()
4675 output_format = _OutputFormat()
4676 filters = ''
erg@google.coma868d2d2009-10-09 21:18:45 +00004677 counting_style = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +00004678
4679 for (opt, val) in opts:
4680 if opt == '--help':
4681 PrintUsage(None)
4682 elif opt == '--output':
erg@google.comc6671232013-10-25 21:44:03 +00004683 if val not in ('emacs', 'vs7', 'eclipse'):
erg@google.com02c27fd2013-05-28 21:34:34 +00004684 PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004685 output_format = val
4686 elif opt == '--verbose':
4687 verbosity = int(val)
4688 elif opt == '--filter':
4689 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00004690 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00004691 PrintCategories()
erg@google.coma868d2d2009-10-09 21:18:45 +00004692 elif opt == '--counting':
4693 if val not in ('total', 'toplevel', 'detailed'):
4694 PrintUsage('Valid counting options are total, toplevel, and detailed')
4695 counting_style = val
erg@google.com4d70a882013-04-16 21:06:32 +00004696 elif opt == '--root':
4697 global _root
4698 _root = val
erg@google.comab53edf2013-11-05 22:23:37 +00004699 elif opt == '--linelength':
4700 global _line_length
4701 try:
4702 _line_length = int(val)
4703 except ValueError:
4704 PrintUsage('Line length must be digits.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004705
4706 if not filenames:
4707 PrintUsage('No files were specified.')
4708
4709 _SetOutputFormat(output_format)
4710 _SetVerboseLevel(verbosity)
4711 _SetFilters(filters)
erg@google.coma868d2d2009-10-09 21:18:45 +00004712 _SetCountingStyle(counting_style)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004713
4714 return filenames
4715
4716
4717def main():
4718 filenames = ParseArguments(sys.argv[1:])
4719
4720 # Change stderr to write with replacement characters so we don't die
4721 # if we try to print something containing non-ASCII characters.
4722 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
4723 codecs.getreader('utf8'),
4724 codecs.getwriter('utf8'),
4725 'replace')
4726
erg@google.coma868d2d2009-10-09 21:18:45 +00004727 _cpplint_state.ResetErrorCounts()
erg@google.com4e00b9a2009-01-12 23:05:11 +00004728 for filename in filenames:
4729 ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.coma868d2d2009-10-09 21:18:45 +00004730 _cpplint_state.PrintErrorCounts()
4731
erg@google.com4e00b9a2009-01-12 23:05:11 +00004732 sys.exit(_cpplint_state.error_count > 0)
4733
4734
4735if __name__ == '__main__':
4736 main()