blob: 99152797f4e53a957779cf36ccca44a27f15a207 [file] [log] [blame]
erg@google.com720121a2012-05-11 16:31:47 +00001#!/usr/bin/python
erg@google.com4e00b9a2009-01-12 23:05:11 +00002#
erg@google.com8f91ab22011-09-06 21:04:45 +00003# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004#
erg@google.com969161c2009-06-26 22:06:46 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
erg@google.com4e00b9a2009-01-12 23:05:11 +00008#
erg@google.com969161c2009-06-26 22:06:46 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
erg@google.com4e00b9a2009-01-12 23:05:11 +000018#
erg@google.com969161c2009-06-26 22:06:46 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com4e00b9a2009-01-12 23:05:11 +000030
erg@google.com4e00b9a2009-01-12 23:05:11 +000031"""Does google-lint on c++ files.
32
33The goal of this script is to identify places in the code that *may*
34be in non-compliance with google style. It does not attempt to fix
35up these problems -- the point is to educate. It does also not
36attempt to find all problems, or to ensure that everything it does
37find is legitimately a problem.
38
39In particular, we can get very confused by /* and // inside strings!
40We do a small hack, which is to ignore //'s with "'s after them on the
41same line, but it is far from perfect (in either direction).
42"""
43
44import codecs
erg@google.comd350fe52013-01-14 17:51:48 +000045import copy
erg@google.com4e00b9a2009-01-12 23:05:11 +000046import getopt
47import math # for log
48import os
49import re
50import sre_compile
51import string
52import sys
53import unicodedata
54
55
56_USAGE = """
57Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.coma868d2d2009-10-09 21:18:45 +000058 [--counting=total|toplevel|detailed]
erg@google.com4e00b9a2009-01-12 23:05:11 +000059 <file> [file] ...
60
61 The style guidelines this tries to follow are those in
62 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
63
64 Every problem is given a confidence score from 1-5, with 5 meaning we are
65 certain of the problem, and 1 meaning it could be a legitimate construct.
66 This will miss some errors, and is not a substitute for a code review.
67
erg+personal@google.com05189642010-04-30 20:43:03 +000068 To suppress false-positive errors of a certain category, add a
69 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
70 suppresses errors of all categories on that line.
erg@google.com4e00b9a2009-01-12 23:05:11 +000071
72 The files passed in will be linted; at least one file must be provided.
73 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
74
75 Flags:
76
77 output=vs7
78 By default, the output is formatted to ease emacs parsing. Visual Studio
79 compatible output (vs7) may also be used. Other formats are unsupported.
80
81 verbose=#
82 Specify a number 0-5 to restrict errors to certain verbosity levels.
83
84 filter=-x,+y,...
85 Specify a comma-separated list of category-filters to apply: only
86 error messages whose category names pass the filters will be printed.
87 (Category names are printed with the message and look like
88 "[whitespace/indent]".) Filters are evaluated left to right.
89 "-FOO" and "FOO" means "do not print categories that start with FOO".
90 "+FOO" means "do print categories that start with FOO".
91
92 Examples: --filter=-whitespace,+whitespace/braces
93 --filter=whitespace,runtime/printf,+runtime/printf_format
94 --filter=-,+build/include_what_you_use
95
96 To see a list of all the categories used in cpplint, pass no arg:
97 --filter=
erg@google.coma868d2d2009-10-09 21:18:45 +000098
99 counting=total|toplevel|detailed
100 The total number of errors found is always printed. If
101 'toplevel' is provided, then the count of errors in each of
102 the top-level categories like 'build' and 'whitespace' will
103 also be printed. If 'detailed' is provided, then a count
104 is provided for each category like 'build/class'.
erg@google.com4d70a882013-04-16 21:06:32 +0000105
106 root=subdir
107 The root directory used for deriving header guard CPP variable.
108 By default, the header guard CPP variable is calculated as the relative
109 path to the directory that contains .git, .hg, or .svn. When this flag
110 is specified, the relative path is calculated from the specified
111 directory. If the specified directory does not exist, this flag is
112 ignored.
113
114 Examples:
115 Assuing that src/.git exists, the header guard CPP variables for
116 src/chrome/browser/ui/browser.h are:
117
118 No flag => CHROME_BROWSER_UI_BROWSER_H_
119 --root=chrome => BROWSER_UI_BROWSER_H_
120 --root=chrome/browser => UI_BROWSER_H_
erg@google.com4e00b9a2009-01-12 23:05:11 +0000121"""
122
123# We categorize each error message we print. Here are the categories.
124# We want an explicit list so we can list them all in cpplint --filter=.
125# If you add a new error message with a new category, add it to the list
126# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.coma87abb82009-02-24 01:41:01 +0000127# \ used for clearer layout -- pylint: disable-msg=C6013
erg+personal@google.com05189642010-04-30 20:43:03 +0000128_ERROR_CATEGORIES = [
129 'build/class',
130 'build/deprecated',
131 'build/endif_comment',
erg@google.com8a95ecc2011-09-08 00:45:54 +0000132 'build/explicit_make_pair',
erg+personal@google.com05189642010-04-30 20:43:03 +0000133 'build/forward_decl',
134 'build/header_guard',
135 'build/include',
136 'build/include_alpha',
137 'build/include_order',
138 'build/include_what_you_use',
139 'build/namespaces',
140 'build/printf_format',
141 'build/storage_class',
142 'legal/copyright',
erg@google.comd350fe52013-01-14 17:51:48 +0000143 'readability/alt_tokens',
erg+personal@google.com05189642010-04-30 20:43:03 +0000144 'readability/braces',
145 'readability/casting',
146 'readability/check',
147 'readability/constructors',
148 'readability/fn_size',
149 'readability/function',
150 'readability/multiline_comment',
151 'readability/multiline_string',
erg@google.comd350fe52013-01-14 17:51:48 +0000152 'readability/namespace',
erg+personal@google.com05189642010-04-30 20:43:03 +0000153 'readability/nolint',
154 'readability/streams',
155 'readability/todo',
156 'readability/utf8',
157 'runtime/arrays',
158 'runtime/casting',
159 'runtime/explicit',
160 'runtime/int',
161 'runtime/init',
162 'runtime/invalid_increment',
163 'runtime/member_string_references',
164 'runtime/memset',
165 'runtime/operator',
166 'runtime/printf',
167 'runtime/printf_format',
168 'runtime/references',
erg+personal@google.com05189642010-04-30 20:43:03 +0000169 'runtime/string',
170 'runtime/threadsafe_fn',
erg@google.comc6671232013-10-25 21:44:03 +0000171 'whitespace/blank_line',
172 'whitespace/braces',
173 'whitespace/comma',
174 'whitespace/comments',
175 'whitespace/empty_conditional_body',
176 'whitespace/empty_loop_body',
177 'whitespace/end_of_line',
178 'whitespace/ending_newline',
179 'whitespace/forcolon',
erg+personal@google.com05189642010-04-30 20:43:03 +0000180 'whitespace/indent',
erg+personal@google.com05189642010-04-30 20:43:03 +0000181 'whitespace/line_length',
182 'whitespace/newline',
183 'whitespace/operators',
184 'whitespace/parens',
185 'whitespace/semicolon',
186 'whitespace/tab',
187 'whitespace/todo'
188 ]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000189
erg@google.come35f7652009-06-19 20:52:09 +0000190# The default state of the category filter. This is overrided by the --filter=
191# flag. By default all errors are on, so only add here categories that should be
192# off by default (i.e., categories that must be enabled by the --filter= flags).
193# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com8a95ecc2011-09-08 00:45:54 +0000194_DEFAULT_FILTERS = ['-build/include_alpha']
erg@google.come35f7652009-06-19 20:52:09 +0000195
erg@google.com4e00b9a2009-01-12 23:05:11 +0000196# We used to check for high-bit characters, but after much discussion we
197# decided those were OK, as long as they were in UTF-8 and didn't represent
erg@google.com8a95ecc2011-09-08 00:45:54 +0000198# hard-coded international strings, which belong in a separate i18n file.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000199
erg@google.com4e00b9a2009-01-12 23:05:11 +0000200
erg@google.comfd5da632013-10-25 17:39:45 +0000201# C++ headers
erg@google.com4e00b9a2009-01-12 23:05:11 +0000202_CPP_HEADERS = frozenset([
erg@google.comfd5da632013-10-25 17:39:45 +0000203 # Legacy
204 'algobase.h',
205 'algo.h',
206 'alloc.h',
207 'builtinbuf.h',
208 'bvector.h',
209 'complex.h',
210 'defalloc.h',
211 'deque.h',
212 'editbuf.h',
213 'fstream.h',
214 'function.h',
215 'hash_map',
216 'hash_map.h',
217 'hash_set',
218 'hash_set.h',
219 'hashtable.h',
220 'heap.h',
221 'indstream.h',
222 'iomanip.h',
223 'iostream.h',
224 'istream.h',
225 'iterator.h',
226 'list.h',
227 'map.h',
228 'multimap.h',
229 'multiset.h',
230 'ostream.h',
231 'pair.h',
232 'parsestream.h',
233 'pfstream.h',
234 'procbuf.h',
235 'pthread_alloc',
236 'pthread_alloc.h',
237 'rope',
238 'rope.h',
239 'ropeimpl.h',
240 'set.h',
241 'slist',
242 'slist.h',
243 'stack.h',
244 'stdiostream.h',
245 'stl_alloc.h',
246 'stl_relops.h',
247 'streambuf.h',
248 'stream.h',
249 'strfile.h',
250 'strstream.h',
251 'tempbuf.h',
252 'tree.h',
253 'type_traits.h',
254 'vector.h',
255 # 17.6.1.2 C++ library headers
256 'algorithm',
257 'array',
258 'atomic',
259 'bitset',
260 'chrono',
261 'codecvt',
262 'complex',
263 'condition_variable',
264 'deque',
265 'exception',
266 'forward_list',
267 'fstream',
268 'functional',
269 'future',
270 'initializer_list',
271 'iomanip',
272 'ios',
273 'iosfwd',
274 'iostream',
275 'istream',
276 'iterator',
277 'limits',
278 'list',
279 'locale',
280 'map',
281 'memory',
282 'mutex',
283 'new',
284 'numeric',
285 'ostream',
286 'queue',
287 'random',
288 'ratio',
289 'regex',
290 'set',
291 'sstream',
292 'stack',
293 'stdexcept',
294 'streambuf',
295 'string',
296 'strstream',
297 'system_error',
298 'thread',
299 'tuple',
300 'typeindex',
301 'typeinfo',
302 'type_traits',
303 'unordered_map',
304 'unordered_set',
305 'utility',
erg@google.com5d00c562013-07-12 19:57:05 +0000306 'valarray',
erg@google.comfd5da632013-10-25 17:39:45 +0000307 'vector',
308 # 17.6.1.2 C++ headers for C library facilities
309 'cassert',
310 'ccomplex',
311 'cctype',
312 'cerrno',
313 'cfenv',
314 'cfloat',
315 'cinttypes',
316 'ciso646',
317 'climits',
318 'clocale',
319 'cmath',
320 'csetjmp',
321 'csignal',
322 'cstdalign',
323 'cstdarg',
324 'cstdbool',
325 'cstddef',
326 'cstdint',
327 'cstdio',
328 'cstdlib',
329 'cstring',
330 'ctgmath',
331 'ctime',
332 'cuchar',
333 'cwchar',
334 'cwctype',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000335 ])
336
erg@google.com4e00b9a2009-01-12 23:05:11 +0000337# Assertion macros. These are defined in base/logging.h and
338# testing/base/gunit.h. Note that the _M versions need to come first
339# for substring matching to work.
340_CHECK_MACROS = [
erg@google.come35f7652009-06-19 20:52:09 +0000341 'DCHECK', 'CHECK',
erg@google.com4e00b9a2009-01-12 23:05:11 +0000342 'EXPECT_TRUE_M', 'EXPECT_TRUE',
343 'ASSERT_TRUE_M', 'ASSERT_TRUE',
344 'EXPECT_FALSE_M', 'EXPECT_FALSE',
345 'ASSERT_FALSE_M', 'ASSERT_FALSE',
346 ]
347
erg@google.come35f7652009-06-19 20:52:09 +0000348# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com4e00b9a2009-01-12 23:05:11 +0000349_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
350
351for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
352 ('>=', 'GE'), ('>', 'GT'),
353 ('<=', 'LE'), ('<', 'LT')]:
erg@google.come35f7652009-06-19 20:52:09 +0000354 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com4e00b9a2009-01-12 23:05:11 +0000355 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
356 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
357 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
358 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
359 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
360
361for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
362 ('>=', 'LT'), ('>', 'LE'),
363 ('<=', 'GT'), ('<', 'GE')]:
364 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
365 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
366 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
367 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
368
erg@google.comd350fe52013-01-14 17:51:48 +0000369# Alternative tokens and their replacements. For full list, see section 2.5
370# Alternative tokens [lex.digraph] in the C++ standard.
371#
372# Digraphs (such as '%:') are not included here since it's a mess to
373# match those on a word boundary.
374_ALT_TOKEN_REPLACEMENT = {
375 'and': '&&',
376 'bitor': '|',
377 'or': '||',
378 'xor': '^',
379 'compl': '~',
380 'bitand': '&',
381 'and_eq': '&=',
382 'or_eq': '|=',
383 'xor_eq': '^=',
384 'not': '!',
385 'not_eq': '!='
386 }
387
388# Compile regular expression that matches all the above keywords. The "[ =()]"
389# bit is meant to avoid matching these keywords outside of boolean expressions.
390#
erg@google.comc6671232013-10-25 21:44:03 +0000391# False positives include C-style multi-line comments and multi-line strings
392# but those have always been troublesome for cpplint.
erg@google.comd350fe52013-01-14 17:51:48 +0000393_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
394 r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
395
erg@google.com4e00b9a2009-01-12 23:05:11 +0000396
397# These constants define types of headers for use with
398# _IncludeState.CheckNextIncludeOrder().
399_C_SYS_HEADER = 1
400_CPP_SYS_HEADER = 2
401_LIKELY_MY_HEADER = 3
402_POSSIBLE_MY_HEADER = 4
403_OTHER_HEADER = 5
404
erg@google.comd350fe52013-01-14 17:51:48 +0000405# These constants define the current inline assembly state
406_NO_ASM = 0 # Outside of inline assembly block
407_INSIDE_ASM = 1 # Inside inline assembly block
408_END_ASM = 2 # Last line of inline assembly block
409_BLOCK_ASM = 3 # The whole block is an inline assembly block
410
411# Match start of assembly blocks
412_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
413 r'(?:\s+(volatile|__volatile__))?'
414 r'\s*[{(]')
415
erg@google.com4e00b9a2009-01-12 23:05:11 +0000416
417_regexp_compile_cache = {}
418
erg+personal@google.com05189642010-04-30 20:43:03 +0000419# Finds occurrences of NOLINT or NOLINT(...).
420_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
421
422# {str, set(int)}: a map from error categories to sets of linenumbers
423# on which those errors are expected and should be suppressed.
424_error_suppressions = {}
425
erg@google.com4d70a882013-04-16 21:06:32 +0000426# The root directory used for deriving header guard CPP variable.
427# This is set by --root flag.
428_root = None
429
erg+personal@google.com05189642010-04-30 20:43:03 +0000430def ParseNolintSuppressions(filename, raw_line, linenum, error):
431 """Updates the global list of error-suppressions.
432
433 Parses any NOLINT comments on the current line, updating the global
434 error_suppressions store. Reports an error if the NOLINT comment
435 was malformed.
436
437 Args:
438 filename: str, the name of the input file.
439 raw_line: str, the line of input text, with comments.
440 linenum: int, the number of the current line.
441 error: function, an error handler.
442 """
443 # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
erg@google.com8a95ecc2011-09-08 00:45:54 +0000444 matched = _RE_SUPPRESSION.search(raw_line)
445 if matched:
446 category = matched.group(1)
erg+personal@google.com05189642010-04-30 20:43:03 +0000447 if category in (None, '(*)'): # => "suppress all"
448 _error_suppressions.setdefault(None, set()).add(linenum)
449 else:
450 if category.startswith('(') and category.endswith(')'):
451 category = category[1:-1]
452 if category in _ERROR_CATEGORIES:
453 _error_suppressions.setdefault(category, set()).add(linenum)
454 else:
455 error(filename, linenum, 'readability/nolint', 5,
erg@google.com8a95ecc2011-09-08 00:45:54 +0000456 'Unknown NOLINT error category: %s' % category)
erg+personal@google.com05189642010-04-30 20:43:03 +0000457
458
459def ResetNolintSuppressions():
460 "Resets the set of NOLINT suppressions to empty."
461 _error_suppressions.clear()
462
463
464def IsErrorSuppressedByNolint(category, linenum):
465 """Returns true if the specified error category is suppressed on this line.
466
467 Consults the global error_suppressions map populated by
468 ParseNolintSuppressions/ResetNolintSuppressions.
469
470 Args:
471 category: str, the category of the error.
472 linenum: int, the current line number.
473 Returns:
474 bool, True iff the error should be suppressed due to a NOLINT comment.
475 """
476 return (linenum in _error_suppressions.get(category, set()) or
477 linenum in _error_suppressions.get(None, set()))
erg@google.com4e00b9a2009-01-12 23:05:11 +0000478
479def Match(pattern, s):
480 """Matches the string with the pattern, caching the compiled regexp."""
481 # The regexp compilation caching is inlined in both Match and Search for
482 # performance reasons; factoring it out into a separate function turns out
483 # to be noticeably expensive.
erg@google.comc6671232013-10-25 21:44:03 +0000484 if pattern not in _regexp_compile_cache:
erg@google.com4e00b9a2009-01-12 23:05:11 +0000485 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
486 return _regexp_compile_cache[pattern].match(s)
487
488
erg@google.comfd5da632013-10-25 17:39:45 +0000489def ReplaceAll(pattern, rep, s):
490 """Replaces instances of pattern in a string with a replacement.
491
492 The compiled regex is kept in a cache shared by Match and Search.
493
494 Args:
495 pattern: regex pattern
496 rep: replacement text
497 s: search string
498
499 Returns:
500 string with replacements made (or original string if no replacements)
501 """
502 if pattern not in _regexp_compile_cache:
503 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
504 return _regexp_compile_cache[pattern].sub(rep, s)
505
506
erg@google.com4e00b9a2009-01-12 23:05:11 +0000507def Search(pattern, s):
508 """Searches the string for the pattern, caching the compiled regexp."""
erg@google.comc6671232013-10-25 21:44:03 +0000509 if pattern not in _regexp_compile_cache:
erg@google.com4e00b9a2009-01-12 23:05:11 +0000510 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
511 return _regexp_compile_cache[pattern].search(s)
512
513
514class _IncludeState(dict):
515 """Tracks line numbers for includes, and the order in which includes appear.
516
517 As a dict, an _IncludeState object serves as a mapping between include
518 filename and line number on which that file was included.
519
520 Call CheckNextIncludeOrder() once for each header in the file, passing
521 in the type constants defined above. Calls in an illegal order will
522 raise an _IncludeError with an appropriate error message.
523
524 """
525 # self._section will move monotonically through this set. If it ever
526 # needs to move backwards, CheckNextIncludeOrder will raise an error.
527 _INITIAL_SECTION = 0
528 _MY_H_SECTION = 1
529 _C_SECTION = 2
530 _CPP_SECTION = 3
531 _OTHER_H_SECTION = 4
532
533 _TYPE_NAMES = {
534 _C_SYS_HEADER: 'C system header',
535 _CPP_SYS_HEADER: 'C++ system header',
536 _LIKELY_MY_HEADER: 'header this file implements',
537 _POSSIBLE_MY_HEADER: 'header this file may implement',
538 _OTHER_HEADER: 'other header',
539 }
540 _SECTION_NAMES = {
541 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
542 _MY_H_SECTION: 'a header this file implements',
543 _C_SECTION: 'C system header',
544 _CPP_SECTION: 'C++ system header',
545 _OTHER_H_SECTION: 'other header',
546 }
547
548 def __init__(self):
549 dict.__init__(self)
erg@google.coma868d2d2009-10-09 21:18:45 +0000550 # The name of the current section.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000551 self._section = self._INITIAL_SECTION
erg@google.coma868d2d2009-10-09 21:18:45 +0000552 # The path of last found header.
553 self._last_header = ''
554
erg@google.comfd5da632013-10-25 17:39:45 +0000555 def SetLastHeader(self, header_path):
556 self._last_header = header_path
557
erg@google.coma868d2d2009-10-09 21:18:45 +0000558 def CanonicalizeAlphabeticalOrder(self, header_path):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000559 """Returns a path canonicalized for alphabetical comparison.
erg@google.coma868d2d2009-10-09 21:18:45 +0000560
561 - replaces "-" with "_" so they both cmp the same.
562 - removes '-inl' since we don't require them to be after the main header.
563 - lowercase everything, just in case.
564
565 Args:
566 header_path: Path to be canonicalized.
567
568 Returns:
569 Canonicalized path.
570 """
571 return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
572
erg@google.comfd5da632013-10-25 17:39:45 +0000573 def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
erg@google.coma868d2d2009-10-09 21:18:45 +0000574 """Check if a header is in alphabetical order with the previous header.
575
576 Args:
erg@google.comfd5da632013-10-25 17:39:45 +0000577 clean_lines: A CleansedLines instance containing the file.
578 linenum: The number of the line to check.
579 header_path: Canonicalized header to be checked.
erg@google.coma868d2d2009-10-09 21:18:45 +0000580
581 Returns:
582 Returns true if the header is in alphabetical order.
583 """
erg@google.comfd5da632013-10-25 17:39:45 +0000584 # If previous section is different from current section, _last_header will
585 # be reset to empty string, so it's always less than current header.
586 #
587 # If previous line was a blank line, assume that the headers are
588 # intentionally sorted the way they are.
589 if (self._last_header > header_path and
590 not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
erg@google.coma868d2d2009-10-09 21:18:45 +0000591 return False
erg@google.coma868d2d2009-10-09 21:18:45 +0000592 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +0000593
594 def CheckNextIncludeOrder(self, header_type):
595 """Returns a non-empty error message if the next header is out of order.
596
597 This function also updates the internal state to be ready to check
598 the next include.
599
600 Args:
601 header_type: One of the _XXX_HEADER constants defined above.
602
603 Returns:
604 The empty string if the header is in the right order, or an
605 error message describing what's wrong.
606
607 """
608 error_message = ('Found %s after %s' %
609 (self._TYPE_NAMES[header_type],
610 self._SECTION_NAMES[self._section]))
611
erg@google.coma868d2d2009-10-09 21:18:45 +0000612 last_section = self._section
613
erg@google.com4e00b9a2009-01-12 23:05:11 +0000614 if header_type == _C_SYS_HEADER:
615 if self._section <= self._C_SECTION:
616 self._section = self._C_SECTION
617 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000618 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000619 return error_message
620 elif header_type == _CPP_SYS_HEADER:
621 if self._section <= self._CPP_SECTION:
622 self._section = self._CPP_SECTION
623 else:
erg@google.coma868d2d2009-10-09 21:18:45 +0000624 self._last_header = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +0000625 return error_message
626 elif header_type == _LIKELY_MY_HEADER:
627 if self._section <= self._MY_H_SECTION:
628 self._section = self._MY_H_SECTION
629 else:
630 self._section = self._OTHER_H_SECTION
631 elif header_type == _POSSIBLE_MY_HEADER:
632 if self._section <= self._MY_H_SECTION:
633 self._section = self._MY_H_SECTION
634 else:
635 # This will always be the fallback because we're not sure
636 # enough that the header is associated with this file.
637 self._section = self._OTHER_H_SECTION
638 else:
639 assert header_type == _OTHER_HEADER
640 self._section = self._OTHER_H_SECTION
641
erg@google.coma868d2d2009-10-09 21:18:45 +0000642 if last_section != self._section:
643 self._last_header = ''
644
erg@google.com4e00b9a2009-01-12 23:05:11 +0000645 return ''
646
647
648class _CppLintState(object):
649 """Maintains module-wide state.."""
650
651 def __init__(self):
652 self.verbose_level = 1 # global setting.
653 self.error_count = 0 # global count of reported errors
erg@google.come35f7652009-06-19 20:52:09 +0000654 # filters to apply when emitting error messages
655 self.filters = _DEFAULT_FILTERS[:]
erg@google.coma868d2d2009-10-09 21:18:45 +0000656 self.counting = 'total' # In what way are we counting errors?
657 self.errors_by_category = {} # string to int dict storing error counts
erg@google.com4e00b9a2009-01-12 23:05:11 +0000658
659 # output format:
660 # "emacs" - format that emacs can parse (default)
661 # "vs7" - format that Microsoft Visual Studio 7 can parse
662 self.output_format = 'emacs'
663
664 def SetOutputFormat(self, output_format):
665 """Sets the output format for errors."""
666 self.output_format = output_format
667
668 def SetVerboseLevel(self, level):
669 """Sets the module's verbosity, and returns the previous setting."""
670 last_verbose_level = self.verbose_level
671 self.verbose_level = level
672 return last_verbose_level
673
erg@google.coma868d2d2009-10-09 21:18:45 +0000674 def SetCountingStyle(self, counting_style):
675 """Sets the module's counting options."""
676 self.counting = counting_style
677
erg@google.com4e00b9a2009-01-12 23:05:11 +0000678 def SetFilters(self, filters):
679 """Sets the error-message filters.
680
681 These filters are applied when deciding whether to emit a given
682 error message.
683
684 Args:
685 filters: A string of comma-separated filters (eg "+whitespace/indent").
686 Each filter should start with + or -; else we die.
erg@google.coma87abb82009-02-24 01:41:01 +0000687
688 Raises:
689 ValueError: The comma-separated filters did not all start with '+' or '-'.
690 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com4e00b9a2009-01-12 23:05:11 +0000691 """
erg@google.come35f7652009-06-19 20:52:09 +0000692 # Default filters always have less priority than the flag ones.
693 self.filters = _DEFAULT_FILTERS[:]
694 for filt in filters.split(','):
695 clean_filt = filt.strip()
696 if clean_filt:
697 self.filters.append(clean_filt)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000698 for filt in self.filters:
699 if not (filt.startswith('+') or filt.startswith('-')):
700 raise ValueError('Every filter in --filters must start with + or -'
701 ' (%s does not)' % filt)
702
erg@google.coma868d2d2009-10-09 21:18:45 +0000703 def ResetErrorCounts(self):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000704 """Sets the module's error statistic back to zero."""
705 self.error_count = 0
erg@google.coma868d2d2009-10-09 21:18:45 +0000706 self.errors_by_category = {}
erg@google.com4e00b9a2009-01-12 23:05:11 +0000707
erg@google.coma868d2d2009-10-09 21:18:45 +0000708 def IncrementErrorCount(self, category):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000709 """Bumps the module's error statistic."""
710 self.error_count += 1
erg@google.coma868d2d2009-10-09 21:18:45 +0000711 if self.counting in ('toplevel', 'detailed'):
712 if self.counting != 'detailed':
713 category = category.split('/')[0]
714 if category not in self.errors_by_category:
715 self.errors_by_category[category] = 0
716 self.errors_by_category[category] += 1
erg@google.com4e00b9a2009-01-12 23:05:11 +0000717
erg@google.coma868d2d2009-10-09 21:18:45 +0000718 def PrintErrorCounts(self):
719 """Print a summary of errors by category, and the total."""
720 for category, count in self.errors_by_category.iteritems():
721 sys.stderr.write('Category \'%s\' errors found: %d\n' %
722 (category, count))
723 sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000724
725_cpplint_state = _CppLintState()
726
727
728def _OutputFormat():
729 """Gets the module's output format."""
730 return _cpplint_state.output_format
731
732
733def _SetOutputFormat(output_format):
734 """Sets the module's output format."""
735 _cpplint_state.SetOutputFormat(output_format)
736
737
738def _VerboseLevel():
739 """Returns the module's verbosity setting."""
740 return _cpplint_state.verbose_level
741
742
743def _SetVerboseLevel(level):
744 """Sets the module's verbosity, and returns the previous setting."""
745 return _cpplint_state.SetVerboseLevel(level)
746
747
erg@google.coma868d2d2009-10-09 21:18:45 +0000748def _SetCountingStyle(level):
749 """Sets the module's counting options."""
750 _cpplint_state.SetCountingStyle(level)
751
752
erg@google.com4e00b9a2009-01-12 23:05:11 +0000753def _Filters():
754 """Returns the module's list of output filters, as a list."""
755 return _cpplint_state.filters
756
757
758def _SetFilters(filters):
759 """Sets the module's error-message filters.
760
761 These filters are applied when deciding whether to emit a given
762 error message.
763
764 Args:
765 filters: A string of comma-separated filters (eg "whitespace/indent").
766 Each filter should start with + or -; else we die.
767 """
768 _cpplint_state.SetFilters(filters)
769
770
771class _FunctionState(object):
772 """Tracks current function name and the number of lines in its body."""
773
774 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
775 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
776
777 def __init__(self):
778 self.in_a_function = False
779 self.lines_in_function = 0
780 self.current_function = ''
781
782 def Begin(self, function_name):
783 """Start analyzing function body.
784
785 Args:
786 function_name: The name of the function being tracked.
787 """
788 self.in_a_function = True
789 self.lines_in_function = 0
790 self.current_function = function_name
791
792 def Count(self):
793 """Count line in current function body."""
794 if self.in_a_function:
795 self.lines_in_function += 1
796
797 def Check(self, error, filename, linenum):
798 """Report if too many lines in function body.
799
800 Args:
801 error: The function to call with any errors found.
802 filename: The name of the current file.
803 linenum: The number of the line to check.
804 """
805 if Match(r'T(EST|est)', self.current_function):
806 base_trigger = self._TEST_TRIGGER
807 else:
808 base_trigger = self._NORMAL_TRIGGER
809 trigger = base_trigger * 2**_VerboseLevel()
810
811 if self.lines_in_function > trigger:
812 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
813 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
814 if error_level > 5:
815 error_level = 5
816 error(filename, linenum, 'readability/fn_size', error_level,
817 'Small and focused functions are preferred:'
818 ' %s has %d non-comment lines'
819 ' (error triggered by exceeding %d lines).' % (
820 self.current_function, self.lines_in_function, trigger))
821
822 def End(self):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000823 """Stop analyzing function body."""
erg@google.com4e00b9a2009-01-12 23:05:11 +0000824 self.in_a_function = False
825
826
827class _IncludeError(Exception):
828 """Indicates a problem with the include order in a file."""
829 pass
830
831
832class FileInfo:
833 """Provides utility functions for filenames.
834
835 FileInfo provides easy access to the components of a file's path
836 relative to the project root.
837 """
838
839 def __init__(self, filename):
840 self._filename = filename
841
842 def FullName(self):
843 """Make Windows paths like Unix."""
844 return os.path.abspath(self._filename).replace('\\', '/')
845
846 def RepositoryName(self):
847 """FullName after removing the local path to the repository.
848
849 If we have a real absolute path name here we can try to do something smart:
850 detecting the root of the checkout and truncating /path/to/checkout from
851 the name so that we get header guards that don't include things like
852 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
853 people on different computers who have checked the source out to different
854 locations won't see bogus errors.
855 """
856 fullname = self.FullName()
857
858 if os.path.exists(fullname):
859 project_dir = os.path.dirname(fullname)
860
861 if os.path.exists(os.path.join(project_dir, ".svn")):
862 # If there's a .svn file in the current directory, we recursively look
863 # up the directory tree for the top of the SVN checkout
864 root_dir = project_dir
865 one_up_dir = os.path.dirname(root_dir)
866 while os.path.exists(os.path.join(one_up_dir, ".svn")):
867 root_dir = os.path.dirname(root_dir)
868 one_up_dir = os.path.dirname(one_up_dir)
869
870 prefix = os.path.commonprefix([root_dir, project_dir])
871 return fullname[len(prefix) + 1:]
872
erg@google.com3dc74262011-11-30 01:12:00 +0000873 # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
874 # searching up from the current path.
erg@google.com4e00b9a2009-01-12 23:05:11 +0000875 root_dir = os.path.dirname(fullname)
876 while (root_dir != os.path.dirname(root_dir) and
erg@google.com5e169692010-01-28 20:17:01 +0000877 not os.path.exists(os.path.join(root_dir, ".git")) and
erg@google.com3dc74262011-11-30 01:12:00 +0000878 not os.path.exists(os.path.join(root_dir, ".hg")) and
879 not os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com4e00b9a2009-01-12 23:05:11 +0000880 root_dir = os.path.dirname(root_dir)
erg@google.com42e59b02010-10-04 22:18:07 +0000881
882 if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@google.com3dc74262011-11-30 01:12:00 +0000883 os.path.exists(os.path.join(root_dir, ".hg")) or
884 os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com42e59b02010-10-04 22:18:07 +0000885 prefix = os.path.commonprefix([root_dir, project_dir])
886 return fullname[len(prefix) + 1:]
erg@google.com4e00b9a2009-01-12 23:05:11 +0000887
888 # Don't know what to do; header guard warnings may be wrong...
889 return fullname
890
891 def Split(self):
892 """Splits the file into the directory, basename, and extension.
893
894 For 'chrome/browser/browser.cc', Split() would
895 return ('chrome/browser', 'browser', '.cc')
896
897 Returns:
898 A tuple of (directory, basename, extension).
899 """
900
901 googlename = self.RepositoryName()
902 project, rest = os.path.split(googlename)
903 return (project,) + os.path.splitext(rest)
904
905 def BaseName(self):
906 """File base name - text after the final slash, before the final period."""
907 return self.Split()[1]
908
909 def Extension(self):
910 """File extension - text following the final period."""
911 return self.Split()[2]
912
913 def NoExtension(self):
914 """File has no source file extension."""
915 return '/'.join(self.Split()[0:2])
916
917 def IsSource(self):
918 """File has a source file extension."""
919 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
920
921
erg+personal@google.com05189642010-04-30 20:43:03 +0000922def _ShouldPrintError(category, confidence, linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +0000923 """If confidence >= verbose, category passes filter and is not suppressed."""
erg+personal@google.com05189642010-04-30 20:43:03 +0000924
925 # There are three ways we might decide not to print an error message:
926 # a "NOLINT(category)" comment appears in the source,
erg@google.com4e00b9a2009-01-12 23:05:11 +0000927 # the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com05189642010-04-30 20:43:03 +0000928 if IsErrorSuppressedByNolint(category, linenum):
929 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +0000930 if confidence < _cpplint_state.verbose_level:
931 return False
932
933 is_filtered = False
934 for one_filter in _Filters():
935 if one_filter.startswith('-'):
936 if category.startswith(one_filter[1:]):
937 is_filtered = True
938 elif one_filter.startswith('+'):
939 if category.startswith(one_filter[1:]):
940 is_filtered = False
941 else:
942 assert False # should have been checked for in SetFilter.
943 if is_filtered:
944 return False
945
946 return True
947
948
949def Error(filename, linenum, category, confidence, message):
950 """Logs the fact we've found a lint error.
951
952 We log where the error was found, and also our confidence in the error,
953 that is, how certain we are this is a legitimate style regression, and
954 not a misidentification or a use that's sometimes justified.
955
erg+personal@google.com05189642010-04-30 20:43:03 +0000956 False positives can be suppressed by the use of
957 "cpplint(category)" comments on the offending line. These are
958 parsed into _error_suppressions.
959
erg@google.com4e00b9a2009-01-12 23:05:11 +0000960 Args:
961 filename: The name of the file containing the error.
962 linenum: The number of the line containing the error.
963 category: A string used to describe the "category" this bug
964 falls under: "whitespace", say, or "runtime". Categories
965 may have a hierarchy separated by slashes: "whitespace/indent".
966 confidence: A number from 1-5 representing a confidence score for
967 the error, with 5 meaning that we are certain of the problem,
968 and 1 meaning that it could be a legitimate construct.
969 message: The error message.
970 """
erg+personal@google.com05189642010-04-30 20:43:03 +0000971 if _ShouldPrintError(category, confidence, linenum):
erg@google.coma868d2d2009-10-09 21:18:45 +0000972 _cpplint_state.IncrementErrorCount(category)
erg@google.com4e00b9a2009-01-12 23:05:11 +0000973 if _cpplint_state.output_format == 'vs7':
974 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
975 filename, linenum, message, category, confidence))
erg@google.com02c27fd2013-05-28 21:34:34 +0000976 elif _cpplint_state.output_format == 'eclipse':
977 sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
978 filename, linenum, message, category, confidence))
erg@google.com4e00b9a2009-01-12 23:05:11 +0000979 else:
980 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
981 filename, linenum, message, category, confidence))
982
983
984# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
985_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
986 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
987# Matches strings. Escape codes should already be removed by ESCAPES.
988_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
989# Matches characters. Escape codes should already be removed by ESCAPES.
990_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
991# Matches multi-line C++ comments.
992# This RE is a little bit more complicated than one might expect, because we
993# have to take care of space removals tools so we can handle comments inside
994# statements better.
995# The current rule is: We only clear spaces from both sides when we're at the
996# end of the line. Otherwise, we try to remove spaces from the right side,
997# if this doesn't work we try on left side but only if there's a non-character
998# on the right.
999_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1000 r"""(\s*/\*.*\*/\s*$|
1001 /\*.*\*/\s+|
1002 \s+/\*.*\*/(?=\W)|
1003 /\*.*\*/)""", re.VERBOSE)
1004
1005
1006def IsCppString(line):
1007 """Does line terminate so, that the next symbol is in string constant.
1008
1009 This function does not consider single-line nor multi-line comments.
1010
1011 Args:
1012 line: is a partial line of code starting from the 0..n.
1013
1014 Returns:
1015 True, if next character appended to 'line' is inside a
1016 string constant.
1017 """
1018
1019 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
1020 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1021
1022
1023def FindNextMultiLineCommentStart(lines, lineix):
1024 """Find the beginning marker for a multiline comment."""
1025 while lineix < len(lines):
1026 if lines[lineix].strip().startswith('/*'):
1027 # Only return this marker if the comment goes beyond this line
1028 if lines[lineix].strip().find('*/', 2) < 0:
1029 return lineix
1030 lineix += 1
1031 return len(lines)
1032
1033
1034def FindNextMultiLineCommentEnd(lines, lineix):
1035 """We are inside a comment, find the end marker."""
1036 while lineix < len(lines):
1037 if lines[lineix].strip().endswith('*/'):
1038 return lineix
1039 lineix += 1
1040 return len(lines)
1041
1042
1043def RemoveMultiLineCommentsFromRange(lines, begin, end):
1044 """Clears a range of lines for multi-line comments."""
1045 # Having // dummy comments makes the lines non-empty, so we will not get
1046 # unnecessary blank line warnings later in the code.
1047 for i in range(begin, end):
1048 lines[i] = '// dummy'
1049
1050
1051def RemoveMultiLineComments(filename, lines, error):
1052 """Removes multiline (c-style) comments from lines."""
1053 lineix = 0
1054 while lineix < len(lines):
1055 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1056 if lineix_begin >= len(lines):
1057 return
1058 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1059 if lineix_end >= len(lines):
1060 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1061 'Could not find end of multi-line comment')
1062 return
1063 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1064 lineix = lineix_end + 1
1065
1066
1067def CleanseComments(line):
1068 """Removes //-comments and single-line C-style /* */ comments.
1069
1070 Args:
1071 line: A line of C++ source.
1072
1073 Returns:
1074 The line with single-line comments removed.
1075 """
1076 commentpos = line.find('//')
1077 if commentpos != -1 and not IsCppString(line[:commentpos]):
erg@google.comd7d27472011-09-07 17:36:35 +00001078 line = line[:commentpos].rstrip()
erg@google.com4e00b9a2009-01-12 23:05:11 +00001079 # get rid of /* ... */
1080 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1081
1082
erg@google.coma87abb82009-02-24 01:41:01 +00001083class CleansedLines(object):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001084 """Holds 3 copies of all lines with different preprocessing applied to them.
1085
1086 1) elided member contains lines without strings and comments,
1087 2) lines member contains lines without comments, and
erg@google.comd350fe52013-01-14 17:51:48 +00001088 3) raw_lines member contains all the lines without processing.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001089 All these three members are of <type 'list'>, and of the same length.
1090 """
1091
1092 def __init__(self, lines):
1093 self.elided = []
1094 self.lines = []
1095 self.raw_lines = lines
1096 self.num_lines = len(lines)
1097 for linenum in range(len(lines)):
1098 self.lines.append(CleanseComments(lines[linenum]))
1099 elided = self._CollapseStrings(lines[linenum])
1100 self.elided.append(CleanseComments(elided))
1101
1102 def NumLines(self):
1103 """Returns the number of lines represented."""
1104 return self.num_lines
1105
1106 @staticmethod
1107 def _CollapseStrings(elided):
1108 """Collapses strings and chars on a line to simple "" or '' blocks.
1109
1110 We nix strings first so we're not fooled by text like '"http://"'
1111
1112 Args:
1113 elided: The line being processed.
1114
1115 Returns:
1116 The line with collapsed strings.
1117 """
1118 if not _RE_PATTERN_INCLUDE.match(elided):
1119 # Remove escaped characters first to make quote/single quote collapsing
1120 # basic. Things that look like escaped characters shouldn't occur
1121 # outside of strings and chars.
1122 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1123 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
1124 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
1125 return elided
1126
1127
erg@google.comd350fe52013-01-14 17:51:48 +00001128def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
1129 """Find the position just after the matching endchar.
1130
1131 Args:
1132 line: a CleansedLines line.
1133 startpos: start searching at this position.
1134 depth: nesting level at startpos.
1135 startchar: expression opening character.
1136 endchar: expression closing character.
1137
1138 Returns:
1139 Index just after endchar.
1140 """
1141 for i in xrange(startpos, len(line)):
1142 if line[i] == startchar:
1143 depth += 1
1144 elif line[i] == endchar:
1145 depth -= 1
1146 if depth == 0:
1147 return i + 1
1148 return -1
1149
1150
erg@google.com4e00b9a2009-01-12 23:05:11 +00001151def CloseExpression(clean_lines, linenum, pos):
1152 """If input points to ( or { or [, finds the position that closes it.
1153
erg@google.com8a95ecc2011-09-08 00:45:54 +00001154 If lines[linenum][pos] points to a '(' or '{' or '[', finds the
erg@google.com4e00b9a2009-01-12 23:05:11 +00001155 linenum/pos that correspond to the closing of the expression.
1156
1157 Args:
1158 clean_lines: A CleansedLines instance containing the file.
1159 linenum: The number of the line to check.
1160 pos: A position on the line.
1161
1162 Returns:
1163 A tuple (line, linenum, pos) pointer *past* the closing brace, or
1164 (line, len(lines), -1) if we never find a close. Note we ignore
1165 strings and comments when matching; and the line we return is the
1166 'cleansed' line at linenum.
1167 """
1168
1169 line = clean_lines.elided[linenum]
1170 startchar = line[pos]
1171 if startchar not in '({[':
1172 return (line, clean_lines.NumLines(), -1)
1173 if startchar == '(': endchar = ')'
1174 if startchar == '[': endchar = ']'
1175 if startchar == '{': endchar = '}'
1176
erg@google.comd350fe52013-01-14 17:51:48 +00001177 # Check first line
1178 end_pos = FindEndOfExpressionInLine(line, pos, 0, startchar, endchar)
1179 if end_pos > -1:
1180 return (line, linenum, end_pos)
1181 tail = line[pos:]
1182 num_open = tail.count(startchar) - tail.count(endchar)
1183 while linenum < clean_lines.NumLines() - 1:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001184 linenum += 1
1185 line = clean_lines.elided[linenum]
erg@google.comd350fe52013-01-14 17:51:48 +00001186 delta = line.count(startchar) - line.count(endchar)
1187 if num_open + delta <= 0:
1188 return (line, linenum,
1189 FindEndOfExpressionInLine(line, 0, num_open, startchar, endchar))
1190 num_open += delta
erg@google.com4e00b9a2009-01-12 23:05:11 +00001191
erg@google.comd350fe52013-01-14 17:51:48 +00001192 # Did not find endchar before end of file, give up
1193 return (line, clean_lines.NumLines(), -1)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001194
1195def CheckForCopyright(filename, lines, error):
1196 """Logs an error if no Copyright message appears at the top of the file."""
1197
1198 # We'll say it should occur by line 10. Don't forget there's a
1199 # dummy line at the front.
1200 for line in xrange(1, min(len(lines), 11)):
1201 if re.search(r'Copyright', lines[line], re.I): break
1202 else: # means no copyright line was found
1203 error(filename, 0, 'legal/copyright', 5,
1204 'No copyright message found. '
1205 'You should have a line: "Copyright [year] <Copyright Owner>"')
1206
1207
1208def GetHeaderGuardCPPVariable(filename):
1209 """Returns the CPP variable that should be used as a header guard.
1210
1211 Args:
1212 filename: The name of a C++ header file.
1213
1214 Returns:
1215 The CPP variable that should be used as a header guard in the
1216 named file.
1217
1218 """
1219
erg+personal@google.com05189642010-04-30 20:43:03 +00001220 # Restores original filename in case that cpplint is invoked from Emacs's
1221 # flymake.
1222 filename = re.sub(r'_flymake\.h$', '.h', filename)
erg@google.comd350fe52013-01-14 17:51:48 +00001223 filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
erg+personal@google.com05189642010-04-30 20:43:03 +00001224
erg@google.com4e00b9a2009-01-12 23:05:11 +00001225 fileinfo = FileInfo(filename)
erg@google.com4d70a882013-04-16 21:06:32 +00001226 file_path_from_root = fileinfo.RepositoryName()
1227 if _root:
1228 file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1229 return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
erg@google.com4e00b9a2009-01-12 23:05:11 +00001230
1231
1232def CheckForHeaderGuard(filename, lines, error):
1233 """Checks that the file contains a header guard.
1234
erg@google.coma87abb82009-02-24 01:41:01 +00001235 Logs an error if no #ifndef header guard is present. For other
erg@google.com4e00b9a2009-01-12 23:05:11 +00001236 headers, checks that the full pathname is used.
1237
1238 Args:
1239 filename: The name of the C++ header file.
1240 lines: An array of strings, each representing a line of the file.
1241 error: The function to call with any errors found.
1242 """
1243
1244 cppvar = GetHeaderGuardCPPVariable(filename)
1245
1246 ifndef = None
1247 ifndef_linenum = 0
1248 define = None
1249 endif = None
1250 endif_linenum = 0
1251 for linenum, line in enumerate(lines):
1252 linesplit = line.split()
1253 if len(linesplit) >= 2:
1254 # find the first occurrence of #ifndef and #define, save arg
1255 if not ifndef and linesplit[0] == '#ifndef':
1256 # set ifndef to the header guard presented on the #ifndef line.
1257 ifndef = linesplit[1]
1258 ifndef_linenum = linenum
1259 if not define and linesplit[0] == '#define':
1260 define = linesplit[1]
1261 # find the last occurrence of #endif, save entire line
1262 if line.startswith('#endif'):
1263 endif = line
1264 endif_linenum = linenum
1265
erg@google.comdc289702012-01-26 20:30:03 +00001266 if not ifndef:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001267 error(filename, 0, 'build/header_guard', 5,
1268 'No #ifndef header guard found, suggested CPP variable is: %s' %
1269 cppvar)
1270 return
1271
erg@google.comdc289702012-01-26 20:30:03 +00001272 if not define:
1273 error(filename, 0, 'build/header_guard', 5,
1274 'No #define header guard found, suggested CPP variable is: %s' %
1275 cppvar)
1276 return
1277
erg@google.com4e00b9a2009-01-12 23:05:11 +00001278 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1279 # for backward compatibility.
erg+personal@google.com05189642010-04-30 20:43:03 +00001280 if ifndef != cppvar:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001281 error_level = 0
1282 if ifndef != cppvar + '_':
1283 error_level = 5
1284
erg+personal@google.com05189642010-04-30 20:43:03 +00001285 ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1286 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001287 error(filename, ifndef_linenum, 'build/header_guard', error_level,
1288 '#ifndef header guard has wrong style, please use: %s' % cppvar)
1289
erg@google.comdc289702012-01-26 20:30:03 +00001290 if define != ifndef:
1291 error(filename, 0, 'build/header_guard', 5,
1292 '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1293 cppvar)
1294 return
1295
erg+personal@google.com05189642010-04-30 20:43:03 +00001296 if endif != ('#endif // %s' % cppvar):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001297 error_level = 0
1298 if endif != ('#endif // %s' % (cppvar + '_')):
1299 error_level = 5
1300
erg+personal@google.com05189642010-04-30 20:43:03 +00001301 ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1302 error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001303 error(filename, endif_linenum, 'build/header_guard', error_level,
1304 '#endif line should be "#endif // %s"' % cppvar)
1305
1306
1307def CheckForUnicodeReplacementCharacters(filename, lines, error):
1308 """Logs an error for each line containing Unicode replacement characters.
1309
1310 These indicate that either the file contained invalid UTF-8 (likely)
1311 or Unicode replacement characters (which it shouldn't). Note that
1312 it's possible for this to throw off line numbering if the invalid
1313 UTF-8 occurred adjacent to a newline.
1314
1315 Args:
1316 filename: The name of the current file.
1317 lines: An array of strings, each representing a line of the file.
1318 error: The function to call with any errors found.
1319 """
1320 for linenum, line in enumerate(lines):
1321 if u'\ufffd' in line:
1322 error(filename, linenum, 'readability/utf8', 5,
1323 'Line contains invalid UTF-8 (or Unicode replacement character).')
1324
1325
1326def CheckForNewlineAtEOF(filename, lines, error):
1327 """Logs an error if there is no newline char at the end of the file.
1328
1329 Args:
1330 filename: The name of the current file.
1331 lines: An array of strings, each representing a line of the file.
1332 error: The function to call with any errors found.
1333 """
1334
1335 # The array lines() was created by adding two newlines to the
1336 # original file (go figure), then splitting on \n.
1337 # To verify that the file ends in \n, we just have to make sure the
1338 # last-but-two element of lines() exists and is empty.
1339 if len(lines) < 3 or lines[-2]:
1340 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1341 'Could not find a newline character at the end of the file.')
1342
1343
1344def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1345 """Logs an error if we see /* ... */ or "..." that extend past one line.
1346
1347 /* ... */ comments are legit inside macros, for one line.
1348 Otherwise, we prefer // comments, so it's ok to warn about the
1349 other. Likewise, it's ok for strings to extend across multiple
1350 lines, as long as a line continuation character (backslash)
1351 terminates each line. Although not currently prohibited by the C++
1352 style guide, it's ugly and unnecessary. We don't do well with either
1353 in this lint program, so we warn about both.
1354
1355 Args:
1356 filename: The name of the current file.
1357 clean_lines: A CleansedLines instance containing the file.
1358 linenum: The number of the line to check.
1359 error: The function to call with any errors found.
1360 """
1361 line = clean_lines.elided[linenum]
1362
1363 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1364 # second (escaped) slash may trigger later \" detection erroneously.
1365 line = line.replace('\\\\', '')
1366
1367 if line.count('/*') > line.count('*/'):
1368 error(filename, linenum, 'readability/multiline_comment', 5,
1369 'Complex multi-line /*...*/-style comment found. '
1370 'Lint may give bogus warnings. '
1371 'Consider replacing these with //-style comments, '
1372 'with #if 0...#endif, '
1373 'or with more clearly structured multi-line comments.')
1374
1375 if (line.count('"') - line.count('\\"')) % 2:
1376 error(filename, linenum, 'readability/multiline_string', 5,
1377 'Multi-line string ("...") found. This lint script doesn\'t '
1378 'do well with such strings, and may give bogus warnings. They\'re '
1379 'ugly and unnecessary, and you should use concatenation instead".')
1380
1381
1382threading_list = (
1383 ('asctime(', 'asctime_r('),
1384 ('ctime(', 'ctime_r('),
1385 ('getgrgid(', 'getgrgid_r('),
1386 ('getgrnam(', 'getgrnam_r('),
1387 ('getlogin(', 'getlogin_r('),
1388 ('getpwnam(', 'getpwnam_r('),
1389 ('getpwuid(', 'getpwuid_r('),
1390 ('gmtime(', 'gmtime_r('),
1391 ('localtime(', 'localtime_r('),
1392 ('rand(', 'rand_r('),
erg@google.com4e00b9a2009-01-12 23:05:11 +00001393 ('strtok(', 'strtok_r('),
1394 ('ttyname(', 'ttyname_r('),
1395 )
1396
1397
1398def CheckPosixThreading(filename, clean_lines, linenum, error):
1399 """Checks for calls to thread-unsafe functions.
1400
1401 Much code has been originally written without consideration of
1402 multi-threading. Also, engineers are relying on their old experience;
1403 they have learned posix before threading extensions were added. These
1404 tests guide the engineers to use thread-safe functions (when using
1405 posix directly).
1406
1407 Args:
1408 filename: The name of the current file.
1409 clean_lines: A CleansedLines instance containing the file.
1410 linenum: The number of the line to check.
1411 error: The function to call with any errors found.
1412 """
1413 line = clean_lines.elided[linenum]
1414 for single_thread_function, multithread_safe_function in threading_list:
1415 ix = line.find(single_thread_function)
erg@google.coma87abb82009-02-24 01:41:01 +00001416 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00001417 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1418 line[ix - 1] not in ('_', '.', '>'))):
1419 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1420 'Consider using ' + multithread_safe_function +
1421 '...) instead of ' + single_thread_function +
1422 '...) for improved thread safety.')
1423
1424
erg@google.coma868d2d2009-10-09 21:18:45 +00001425# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com36649102009-03-25 21:18:36 +00001426# incrementing a value.
erg@google.coma868d2d2009-10-09 21:18:45 +00001427_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com36649102009-03-25 21:18:36 +00001428 r'^\s*\*\w+(\+\+|--);')
1429
1430
1431def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.coma868d2d2009-10-09 21:18:45 +00001432 """Checks for invalid increment *count++.
erg@google.com36649102009-03-25 21:18:36 +00001433
1434 For example following function:
1435 void increment_counter(int* count) {
1436 *count++;
1437 }
1438 is invalid, because it effectively does count++, moving pointer, and should
1439 be replaced with ++*count, (*count)++ or *count += 1.
1440
1441 Args:
1442 filename: The name of the current file.
1443 clean_lines: A CleansedLines instance containing the file.
1444 linenum: The number of the line to check.
1445 error: The function to call with any errors found.
1446 """
1447 line = clean_lines.elided[linenum]
erg@google.coma868d2d2009-10-09 21:18:45 +00001448 if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com36649102009-03-25 21:18:36 +00001449 error(filename, linenum, 'runtime/invalid_increment', 5,
1450 'Changing pointer instead of value (or unused value of operator*).')
1451
1452
erg@google.comd350fe52013-01-14 17:51:48 +00001453class _BlockInfo(object):
1454 """Stores information about a generic block of code."""
1455
1456 def __init__(self, seen_open_brace):
1457 self.seen_open_brace = seen_open_brace
1458 self.open_parentheses = 0
1459 self.inline_asm = _NO_ASM
1460
1461 def CheckBegin(self, filename, clean_lines, linenum, error):
1462 """Run checks that applies to text up to the opening brace.
1463
1464 This is mostly for checking the text after the class identifier
1465 and the "{", usually where the base class is specified. For other
1466 blocks, there isn't much to check, so we always pass.
1467
1468 Args:
1469 filename: The name of the current file.
1470 clean_lines: A CleansedLines instance containing the file.
1471 linenum: The number of the line to check.
1472 error: The function to call with any errors found.
1473 """
1474 pass
1475
1476 def CheckEnd(self, filename, clean_lines, linenum, error):
1477 """Run checks that applies to text after the closing brace.
1478
1479 This is mostly used for checking end of namespace comments.
1480
1481 Args:
1482 filename: The name of the current file.
1483 clean_lines: A CleansedLines instance containing the file.
1484 linenum: The number of the line to check.
1485 error: The function to call with any errors found.
1486 """
1487 pass
1488
1489
1490class _ClassInfo(_BlockInfo):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001491 """Stores information about a class."""
1492
erg@google.comd350fe52013-01-14 17:51:48 +00001493 def __init__(self, name, class_or_struct, clean_lines, linenum):
1494 _BlockInfo.__init__(self, False)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001495 self.name = name
erg@google.comd350fe52013-01-14 17:51:48 +00001496 self.starting_linenum = linenum
erg@google.com4e00b9a2009-01-12 23:05:11 +00001497 self.is_derived = False
erg@google.comd350fe52013-01-14 17:51:48 +00001498 if class_or_struct == 'struct':
1499 self.access = 'public'
erg@google.comfd5da632013-10-25 17:39:45 +00001500 self.is_struct = True
erg@google.comd350fe52013-01-14 17:51:48 +00001501 else:
1502 self.access = 'private'
erg@google.comfd5da632013-10-25 17:39:45 +00001503 self.is_struct = False
1504
1505 # Remember initial indentation level for this class. Using raw_lines here
erg@google.comc6671232013-10-25 21:44:03 +00001506 # instead of elided to account for leading comments.
erg@google.comfd5da632013-10-25 17:39:45 +00001507 initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
1508 if initial_indent:
1509 self.class_indent = len(initial_indent.group(1))
1510 else:
1511 self.class_indent = 0
erg@google.com4e00b9a2009-01-12 23:05:11 +00001512
erg@google.com8a95ecc2011-09-08 00:45:54 +00001513 # Try to find the end of the class. This will be confused by things like:
1514 # class A {
1515 # } *x = { ...
1516 #
1517 # But it's still good enough for CheckSectionSpacing.
1518 self.last_line = 0
1519 depth = 0
1520 for i in range(linenum, clean_lines.NumLines()):
erg@google.comd350fe52013-01-14 17:51:48 +00001521 line = clean_lines.elided[i]
erg@google.com8a95ecc2011-09-08 00:45:54 +00001522 depth += line.count('{') - line.count('}')
1523 if not depth:
1524 self.last_line = i
1525 break
1526
erg@google.comd350fe52013-01-14 17:51:48 +00001527 def CheckBegin(self, filename, clean_lines, linenum, error):
1528 # Look for a bare ':'
1529 if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1530 self.is_derived = True
erg@google.com4e00b9a2009-01-12 23:05:11 +00001531
erg@google.comfd5da632013-10-25 17:39:45 +00001532 def CheckEnd(self, filename, clean_lines, linenum, error):
1533 # Check that closing brace is aligned with beginning of the class.
1534 # Only do this if the closing brace is indented by only whitespaces.
1535 # This means we will not check single-line class definitions.
1536 indent = Match(r'^( *)\}', clean_lines.elided[linenum])
1537 if indent and len(indent.group(1)) != self.class_indent:
1538 if self.is_struct:
1539 parent = 'struct ' + self.name
1540 else:
1541 parent = 'class ' + self.name
1542 error(filename, linenum, 'whitespace/indent', 3,
1543 'Closing brace should be aligned with beginning of %s' % parent)
1544
erg@google.com4e00b9a2009-01-12 23:05:11 +00001545
erg@google.comd350fe52013-01-14 17:51:48 +00001546class _NamespaceInfo(_BlockInfo):
1547 """Stores information about a namespace."""
1548
1549 def __init__(self, name, linenum):
1550 _BlockInfo.__init__(self, False)
1551 self.name = name or ''
1552 self.starting_linenum = linenum
1553
1554 def CheckEnd(self, filename, clean_lines, linenum, error):
1555 """Check end of namespace comments."""
1556 line = clean_lines.raw_lines[linenum]
1557
1558 # Check how many lines is enclosed in this namespace. Don't issue
1559 # warning for missing namespace comments if there aren't enough
1560 # lines. However, do apply checks if there is already an end of
1561 # namespace comment and it's incorrect.
1562 #
1563 # TODO(unknown): We always want to check end of namespace comments
1564 # if a namespace is large, but sometimes we also want to apply the
1565 # check if a short namespace contained nontrivial things (something
1566 # other than forward declarations). There is currently no logic on
1567 # deciding what these nontrivial things are, so this check is
1568 # triggered by namespace size only, which works most of the time.
1569 if (linenum - self.starting_linenum < 10
1570 and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
1571 return
1572
1573 # Look for matching comment at end of namespace.
1574 #
1575 # Note that we accept C style "/* */" comments for terminating
1576 # namespaces, so that code that terminate namespaces inside
erg@google.comc6671232013-10-25 21:44:03 +00001577 # preprocessor macros can be cpplint clean.
erg@google.comd350fe52013-01-14 17:51:48 +00001578 #
1579 # We also accept stuff like "// end of namespace <name>." with the
1580 # period at the end.
1581 #
1582 # Besides these, we don't accept anything else, otherwise we might
1583 # get false negatives when existing comment is a substring of the
erg@google.comc6671232013-10-25 21:44:03 +00001584 # expected namespace.
erg@google.comd350fe52013-01-14 17:51:48 +00001585 if self.name:
1586 # Named namespace
1587 if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
1588 r'[\*/\.\\\s]*$'),
1589 line):
1590 error(filename, linenum, 'readability/namespace', 5,
1591 'Namespace should be terminated with "// namespace %s"' %
1592 self.name)
1593 else:
1594 # Anonymous namespace
1595 if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
1596 error(filename, linenum, 'readability/namespace', 5,
1597 'Namespace should be terminated with "// namespace"')
1598
1599
1600class _PreprocessorInfo(object):
1601 """Stores checkpoints of nesting stacks when #if/#else is seen."""
1602
1603 def __init__(self, stack_before_if):
1604 # The entire nesting stack before #if
1605 self.stack_before_if = stack_before_if
1606
1607 # The entire nesting stack up to #else
1608 self.stack_before_else = []
1609
1610 # Whether we have already seen #else or #elif
1611 self.seen_else = False
1612
1613
1614class _NestingState(object):
1615 """Holds states related to parsing braces."""
erg@google.com4e00b9a2009-01-12 23:05:11 +00001616
1617 def __init__(self):
erg@google.comd350fe52013-01-14 17:51:48 +00001618 # Stack for tracking all braces. An object is pushed whenever we
1619 # see a "{", and popped when we see a "}". Only 3 types of
1620 # objects are possible:
1621 # - _ClassInfo: a class or struct.
1622 # - _NamespaceInfo: a namespace.
1623 # - _BlockInfo: some other type of block.
1624 self.stack = []
erg@google.com4e00b9a2009-01-12 23:05:11 +00001625
erg@google.comd350fe52013-01-14 17:51:48 +00001626 # Stack of _PreprocessorInfo objects.
1627 self.pp_stack = []
1628
1629 def SeenOpenBrace(self):
1630 """Check if we have seen the opening brace for the innermost block.
1631
1632 Returns:
1633 True if we have seen the opening brace, False if the innermost
1634 block is still expecting an opening brace.
1635 """
1636 return (not self.stack) or self.stack[-1].seen_open_brace
1637
1638 def InNamespaceBody(self):
1639 """Check if we are currently one level inside a namespace body.
1640
1641 Returns:
1642 True if top of the stack is a namespace block, False otherwise.
1643 """
1644 return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
1645
1646 def UpdatePreprocessor(self, line):
1647 """Update preprocessor stack.
1648
1649 We need to handle preprocessors due to classes like this:
1650 #ifdef SWIG
1651 struct ResultDetailsPageElementExtensionPoint {
1652 #else
1653 struct ResultDetailsPageElementExtensionPoint : public Extension {
1654 #endif
erg@google.comd350fe52013-01-14 17:51:48 +00001655
1656 We make the following assumptions (good enough for most files):
1657 - Preprocessor condition evaluates to true from #if up to first
1658 #else/#elif/#endif.
1659
1660 - Preprocessor condition evaluates to false from #else/#elif up
1661 to #endif. We still perform lint checks on these lines, but
1662 these do not affect nesting stack.
1663
1664 Args:
1665 line: current line to check.
1666 """
1667 if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
1668 # Beginning of #if block, save the nesting stack here. The saved
1669 # stack will allow us to restore the parsing state in the #else case.
1670 self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
1671 elif Match(r'^\s*#\s*(else|elif)\b', line):
1672 # Beginning of #else block
1673 if self.pp_stack:
1674 if not self.pp_stack[-1].seen_else:
1675 # This is the first #else or #elif block. Remember the
1676 # whole nesting stack up to this point. This is what we
1677 # keep after the #endif.
1678 self.pp_stack[-1].seen_else = True
1679 self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
1680
1681 # Restore the stack to how it was before the #if
1682 self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
1683 else:
1684 # TODO(unknown): unexpected #else, issue warning?
1685 pass
1686 elif Match(r'^\s*#\s*endif\b', line):
1687 # End of #if or #else blocks.
1688 if self.pp_stack:
1689 # If we saw an #else, we will need to restore the nesting
1690 # stack to its former state before the #else, otherwise we
1691 # will just continue from where we left off.
1692 if self.pp_stack[-1].seen_else:
1693 # Here we can just use a shallow copy since we are the last
1694 # reference to it.
1695 self.stack = self.pp_stack[-1].stack_before_else
1696 # Drop the corresponding #if
1697 self.pp_stack.pop()
1698 else:
1699 # TODO(unknown): unexpected #endif, issue warning?
1700 pass
1701
1702 def Update(self, filename, clean_lines, linenum, error):
1703 """Update nesting state with current line.
1704
1705 Args:
1706 filename: The name of the current file.
1707 clean_lines: A CleansedLines instance containing the file.
1708 linenum: The number of the line to check.
1709 error: The function to call with any errors found.
1710 """
1711 line = clean_lines.elided[linenum]
1712
1713 # Update pp_stack first
1714 self.UpdatePreprocessor(line)
1715
1716 # Count parentheses. This is to avoid adding struct arguments to
1717 # the nesting stack.
1718 if self.stack:
1719 inner_block = self.stack[-1]
1720 depth_change = line.count('(') - line.count(')')
1721 inner_block.open_parentheses += depth_change
1722
1723 # Also check if we are starting or ending an inline assembly block.
1724 if inner_block.inline_asm in (_NO_ASM, _END_ASM):
1725 if (depth_change != 0 and
1726 inner_block.open_parentheses == 1 and
1727 _MATCH_ASM.match(line)):
1728 # Enter assembly block
1729 inner_block.inline_asm = _INSIDE_ASM
1730 else:
1731 # Not entering assembly block. If previous line was _END_ASM,
1732 # we will now shift to _NO_ASM state.
1733 inner_block.inline_asm = _NO_ASM
1734 elif (inner_block.inline_asm == _INSIDE_ASM and
1735 inner_block.open_parentheses == 0):
1736 # Exit assembly block
1737 inner_block.inline_asm = _END_ASM
1738
1739 # Consume namespace declaration at the beginning of the line. Do
1740 # this in a loop so that we catch same line declarations like this:
1741 # namespace proto2 { namespace bridge { class MessageSet; } }
1742 while True:
1743 # Match start of namespace. The "\b\s*" below catches namespace
1744 # declarations even if it weren't followed by a whitespace, this
1745 # is so that we don't confuse our namespace checker. The
1746 # missing spaces will be flagged by CheckSpacing.
1747 namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
1748 if not namespace_decl_match:
1749 break
1750
1751 new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
1752 self.stack.append(new_namespace)
1753
1754 line = namespace_decl_match.group(2)
1755 if line.find('{') != -1:
1756 new_namespace.seen_open_brace = True
1757 line = line[line.find('{') + 1:]
1758
1759 # Look for a class declaration in whatever is left of the line
1760 # after parsing namespaces. The regexp accounts for decorated classes
1761 # such as in:
1762 # class LOCKABLE API Object {
1763 # };
1764 #
1765 # Templates with class arguments may confuse the parser, for example:
1766 # template <class T
1767 # class Comparator = less<T>,
1768 # class Vector = vector<T> >
1769 # class HeapQueue {
1770 #
1771 # Because this parser has no nesting state about templates, by the
1772 # time it saw "class Comparator", it may think that it's a new class.
1773 # Nested templates have a similar problem:
1774 # template <
1775 # typename ExportedType,
1776 # typename TupleType,
1777 # template <typename, typename> class ImplTemplate>
1778 #
1779 # To avoid these cases, we ignore classes that are followed by '=' or '>'
1780 class_decl_match = Match(
1781 r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
erg@google.comfd5da632013-10-25 17:39:45 +00001782 r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
1783 r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line)
erg@google.comd350fe52013-01-14 17:51:48 +00001784 if (class_decl_match and
1785 (not self.stack or self.stack[-1].open_parentheses == 0)):
1786 self.stack.append(_ClassInfo(
1787 class_decl_match.group(4), class_decl_match.group(2),
1788 clean_lines, linenum))
1789 line = class_decl_match.group(5)
1790
1791 # If we have not yet seen the opening brace for the innermost block,
1792 # run checks here.
1793 if not self.SeenOpenBrace():
1794 self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
1795
1796 # Update access control if we are inside a class/struct
1797 if self.stack and isinstance(self.stack[-1], _ClassInfo):
erg@google.comfd5da632013-10-25 17:39:45 +00001798 classinfo = self.stack[-1]
1799 access_match = Match(
1800 r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
1801 r':(?:[^:]|$)',
1802 line)
erg@google.comd350fe52013-01-14 17:51:48 +00001803 if access_match:
erg@google.comfd5da632013-10-25 17:39:45 +00001804 classinfo.access = access_match.group(2)
1805
1806 # Check that access keywords are indented +1 space. Skip this
erg@google.comc6671232013-10-25 21:44:03 +00001807 # check if the keywords are not preceded by whitespaces.
erg@google.comfd5da632013-10-25 17:39:45 +00001808 indent = access_match.group(1)
1809 if (len(indent) != classinfo.class_indent + 1 and
1810 Match(r'^\s*$', indent)):
1811 if classinfo.is_struct:
1812 parent = 'struct ' + classinfo.name
1813 else:
1814 parent = 'class ' + classinfo.name
1815 slots = ''
1816 if access_match.group(3):
1817 slots = access_match.group(3)
1818 error(filename, linenum, 'whitespace/indent', 3,
1819 '%s%s: should be indented +1 space inside %s' % (
1820 access_match.group(2), slots, parent))
erg@google.comd350fe52013-01-14 17:51:48 +00001821
1822 # Consume braces or semicolons from what's left of the line
1823 while True:
1824 # Match first brace, semicolon, or closed parenthesis.
1825 matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
1826 if not matched:
1827 break
1828
1829 token = matched.group(1)
1830 if token == '{':
1831 # If namespace or class hasn't seen a opening brace yet, mark
1832 # namespace/class head as complete. Push a new block onto the
1833 # stack otherwise.
1834 if not self.SeenOpenBrace():
1835 self.stack[-1].seen_open_brace = True
1836 else:
1837 self.stack.append(_BlockInfo(True))
1838 if _MATCH_ASM.match(line):
1839 self.stack[-1].inline_asm = _BLOCK_ASM
1840 elif token == ';' or token == ')':
1841 # If we haven't seen an opening brace yet, but we already saw
1842 # a semicolon, this is probably a forward declaration. Pop
1843 # the stack for these.
1844 #
1845 # Similarly, if we haven't seen an opening brace yet, but we
1846 # already saw a closing parenthesis, then these are probably
1847 # function arguments with extra "class" or "struct" keywords.
1848 # Also pop these stack for these.
1849 if not self.SeenOpenBrace():
1850 self.stack.pop()
1851 else: # token == '}'
1852 # Perform end of block checks and pop the stack.
1853 if self.stack:
1854 self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
1855 self.stack.pop()
1856 line = matched.group(2)
1857
1858 def InnermostClass(self):
1859 """Get class info on the top of the stack.
1860
1861 Returns:
1862 A _ClassInfo object if we are inside a class, or None otherwise.
1863 """
1864 for i in range(len(self.stack), 0, -1):
1865 classinfo = self.stack[i - 1]
1866 if isinstance(classinfo, _ClassInfo):
1867 return classinfo
1868 return None
1869
1870 def CheckClassFinished(self, filename, error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001871 """Checks that all classes have been completely parsed.
1872
1873 Call this when all lines in a file have been processed.
1874 Args:
1875 filename: The name of the current file.
1876 error: The function to call with any errors found.
1877 """
erg@google.comd350fe52013-01-14 17:51:48 +00001878 # Note: This test can result in false positives if #ifdef constructs
1879 # get in the way of brace matching. See the testBuildClass test in
1880 # cpplint_unittest.py for an example of this.
1881 for obj in self.stack:
1882 if isinstance(obj, _ClassInfo):
1883 error(filename, obj.starting_linenum, 'build/class', 5,
1884 'Failed to find complete declaration of class %s' %
1885 obj.name)
erg@google.com4e00b9a2009-01-12 23:05:11 +00001886
1887
1888def CheckForNonStandardConstructs(filename, clean_lines, linenum,
erg@google.comd350fe52013-01-14 17:51:48 +00001889 nesting_state, error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001890 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1891
1892 Complain about several constructs which gcc-2 accepts, but which are
1893 not standard C++. Warning about these in lint is one way to ease the
1894 transition to new compilers.
1895 - put storage class first (e.g. "static const" instead of "const static").
1896 - "%lld" instead of %qd" in printf-type functions.
1897 - "%1$d" is non-standard in printf-type functions.
1898 - "\%" is an undefined character escape sequence.
1899 - text after #endif is not allowed.
1900 - invalid inner-style forward declaration.
1901 - >? and <? operators, and their >?= and <?= cousins.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001902
erg@google.coma868d2d2009-10-09 21:18:45 +00001903 Additionally, check for constructor/destructor style violations and reference
1904 members, as it is very convenient to do so while checking for
1905 gcc-2 compliance.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001906
1907 Args:
1908 filename: The name of the current file.
1909 clean_lines: A CleansedLines instance containing the file.
1910 linenum: The number of the line to check.
erg@google.comd350fe52013-01-14 17:51:48 +00001911 nesting_state: A _NestingState instance which maintains information about
1912 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00001913 error: A callable to which errors are reported, which takes 4 arguments:
1914 filename, line number, error level, and message
1915 """
1916
1917 # Remove comments from the line, but leave in strings for now.
1918 line = clean_lines.lines[linenum]
1919
1920 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1921 error(filename, linenum, 'runtime/printf_format', 3,
1922 '%q in format strings is deprecated. Use %ll instead.')
1923
1924 if Search(r'printf\s*\(.*".*%\d+\$', line):
1925 error(filename, linenum, 'runtime/printf_format', 2,
1926 '%N$ formats are unconventional. Try rewriting to avoid them.')
1927
1928 # Remove escaped backslashes before looking for undefined escapes.
1929 line = line.replace('\\\\', '')
1930
1931 if Search(r'("|\').*\\(%|\[|\(|{)', line):
1932 error(filename, linenum, 'build/printf_format', 3,
1933 '%, [, (, and { are undefined character escapes. Unescape them.')
1934
1935 # For the rest, work with both comments and strings removed.
1936 line = clean_lines.elided[linenum]
1937
1938 if Search(r'\b(const|volatile|void|char|short|int|long'
1939 r'|float|double|signed|unsigned'
1940 r'|schar|u?int8|u?int16|u?int32|u?int64)'
erg@google.comd350fe52013-01-14 17:51:48 +00001941 r'\s+(register|static|extern|typedef)\b',
erg@google.com4e00b9a2009-01-12 23:05:11 +00001942 line):
1943 error(filename, linenum, 'build/storage_class', 5,
1944 'Storage class (static, extern, typedef, etc) should be first.')
1945
1946 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1947 error(filename, linenum, 'build/endif_comment', 5,
1948 'Uncommented text after #endif is non-standard. Use a comment.')
1949
1950 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1951 error(filename, linenum, 'build/forward_decl', 5,
1952 'Inner-style forward declarations are invalid. Remove this line.')
1953
1954 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1955 line):
1956 error(filename, linenum, 'build/deprecated', 3,
1957 '>? and <? (max and min) operators are non-standard and deprecated.')
1958
erg@google.coma868d2d2009-10-09 21:18:45 +00001959 if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
1960 # TODO(unknown): Could it be expanded safely to arbitrary references,
1961 # without triggering too many false positives? The first
1962 # attempt triggered 5 warnings for mostly benign code in the regtest, hence
1963 # the restriction.
1964 # Here's the original regexp, for the reference:
1965 # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
1966 # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
1967 error(filename, linenum, 'runtime/member_string_references', 2,
1968 'const string& members are dangerous. It is much better to use '
1969 'alternatives, such as pointers or simple constants.')
1970
erg@google.comd350fe52013-01-14 17:51:48 +00001971 # Everything else in this function operates on class declarations.
1972 # Return early if the top of the nesting stack is not a class, or if
1973 # the class head is not completed yet.
1974 classinfo = nesting_state.InnermostClass()
1975 if not classinfo or not classinfo.seen_open_brace:
erg@google.com4e00b9a2009-01-12 23:05:11 +00001976 return
1977
erg@google.com4e00b9a2009-01-12 23:05:11 +00001978 # The class may have been declared with namespace or classname qualifiers.
1979 # The constructor and destructor will not have those qualifiers.
1980 base_classname = classinfo.name.split('::')[-1]
1981
1982 # Look for single-argument constructors that aren't marked explicit.
1983 # Technically a valid construct, but against style.
erg@google.com8a95ecc2011-09-08 00:45:54 +00001984 args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
erg@google.com4e00b9a2009-01-12 23:05:11 +00001985 % re.escape(base_classname),
1986 line)
1987 if (args and
1988 args.group(1) != 'void' and
erg@google.comfd5da632013-10-25 17:39:45 +00001989 not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
1990 % re.escape(base_classname), args.group(1).strip())):
erg@google.com4e00b9a2009-01-12 23:05:11 +00001991 error(filename, linenum, 'runtime/explicit', 5,
1992 'Single-argument constructors should be marked explicit.')
1993
erg@google.com4e00b9a2009-01-12 23:05:11 +00001994
1995def CheckSpacingForFunctionCall(filename, line, linenum, error):
1996 """Checks for the correctness of various spacing around function calls.
1997
1998 Args:
1999 filename: The name of the current file.
2000 line: The text of the line to check.
2001 linenum: The number of the line to check.
2002 error: The function to call with any errors found.
2003 """
2004
2005 # Since function calls often occur inside if/for/while/switch
2006 # expressions - which have their own, more liberal conventions - we
2007 # first see if we should be looking inside such an expression for a
2008 # function call, to which we can apply more strict standards.
2009 fncall = line # if there's no control flow construct, look at whole line
2010 for pattern in (r'\bif\s*\((.*)\)\s*{',
2011 r'\bfor\s*\((.*)\)\s*{',
2012 r'\bwhile\s*\((.*)\)\s*[{;]',
2013 r'\bswitch\s*\((.*)\)\s*{'):
2014 match = Search(pattern, line)
2015 if match:
2016 fncall = match.group(1) # look inside the parens for function calls
2017 break
2018
2019 # Except in if/for/while/switch, there should never be space
2020 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
2021 # for nested parens ( (a+b) + c ). Likewise, there should never be
2022 # a space before a ( when it's a function argument. I assume it's a
2023 # function argument when the char before the whitespace is legal in
2024 # a function name (alnum + _) and we're not starting a macro. Also ignore
2025 # pointers and references to arrays and functions coz they're too tricky:
2026 # we use a very simple way to recognize these:
2027 # " (something)(maybe-something)" or
2028 # " (something)(maybe-something," or
2029 # " (something)[something]"
2030 # Note that we assume the contents of [] to be short enough that
2031 # they'll never need to wrap.
2032 if ( # Ignore control structures.
erg@google.comc6671232013-10-25 21:44:03 +00002033 not Search(r'\b(if|for|while|switch|return|new|delete|catch)\b',
2034 fncall) and
erg@google.com4e00b9a2009-01-12 23:05:11 +00002035 # Ignore pointers/references to functions.
2036 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2037 # Ignore pointers/references to arrays.
2038 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com36649102009-03-25 21:18:36 +00002039 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
erg@google.com4e00b9a2009-01-12 23:05:11 +00002040 error(filename, linenum, 'whitespace/parens', 4,
2041 'Extra space after ( in function call')
erg@google.com36649102009-03-25 21:18:36 +00002042 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002043 error(filename, linenum, 'whitespace/parens', 2,
2044 'Extra space after (')
2045 if (Search(r'\w\s+\(', fncall) and
erg@google.comd350fe52013-01-14 17:51:48 +00002046 not Search(r'#\s*define|typedef', fncall) and
2047 not Search(r'\w\s+\((\w+::)?\*\w+\)\(', fncall)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002048 error(filename, linenum, 'whitespace/parens', 4,
2049 'Extra space before ( in function call')
2050 # If the ) is followed only by a newline or a { + newline, assume it's
2051 # part of a control statement (if/while/etc), and don't complain
2052 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002053 # If the closing parenthesis is preceded by only whitespaces,
2054 # try to give a more descriptive error message.
2055 if Search(r'^\s+\)', fncall):
2056 error(filename, linenum, 'whitespace/parens', 2,
2057 'Closing ) should be moved to the previous line')
2058 else:
2059 error(filename, linenum, 'whitespace/parens', 2,
2060 'Extra space before )')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002061
2062
2063def IsBlankLine(line):
2064 """Returns true if the given line is blank.
2065
2066 We consider a line to be blank if the line is empty or consists of
2067 only white spaces.
2068
2069 Args:
2070 line: A line of a string.
2071
2072 Returns:
2073 True, if the given line is blank.
2074 """
2075 return not line or line.isspace()
2076
2077
2078def CheckForFunctionLengths(filename, clean_lines, linenum,
2079 function_state, error):
2080 """Reports for long function bodies.
2081
2082 For an overview why this is done, see:
2083 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2084
2085 Uses a simplistic algorithm assuming other style guidelines
2086 (especially spacing) are followed.
2087 Only checks unindented functions, so class members are unchecked.
2088 Trivial bodies are unchecked, so constructors with huge initializer lists
2089 may be missed.
2090 Blank/comment lines are not counted so as to avoid encouraging the removal
erg@google.com8a95ecc2011-09-08 00:45:54 +00002091 of vertical space and comments just to get through a lint check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002092 NOLINT *on the last line of a function* disables this check.
2093
2094 Args:
2095 filename: The name of the current file.
2096 clean_lines: A CleansedLines instance containing the file.
2097 linenum: The number of the line to check.
2098 function_state: Current function name and lines in body so far.
2099 error: The function to call with any errors found.
2100 """
2101 lines = clean_lines.lines
2102 line = lines[linenum]
2103 raw = clean_lines.raw_lines
2104 raw_line = raw[linenum]
2105 joined_line = ''
2106
2107 starting_func = False
erg@google.coma87abb82009-02-24 01:41:01 +00002108 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
erg@google.com4e00b9a2009-01-12 23:05:11 +00002109 match_result = Match(regexp, line)
2110 if match_result:
2111 # If the name is all caps and underscores, figure it's a macro and
2112 # ignore it, unless it's TEST or TEST_F.
2113 function_name = match_result.group(1).split()[-1]
2114 if function_name == 'TEST' or function_name == 'TEST_F' or (
2115 not Match(r'[A-Z_]+$', function_name)):
2116 starting_func = True
2117
2118 if starting_func:
2119 body_found = False
erg@google.coma87abb82009-02-24 01:41:01 +00002120 for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002121 start_line = lines[start_linenum]
2122 joined_line += ' ' + start_line.lstrip()
2123 if Search(r'(;|})', start_line): # Declarations and trivial functions
2124 body_found = True
2125 break # ... ignore
2126 elif Search(r'{', start_line):
2127 body_found = True
2128 function = Search(r'((\w|:)*)\(', line).group(1)
2129 if Match(r'TEST', function): # Handle TEST... macros
2130 parameter_regexp = Search(r'(\(.*\))', joined_line)
2131 if parameter_regexp: # Ignore bad syntax
2132 function += parameter_regexp.group(1)
2133 else:
2134 function += '()'
2135 function_state.Begin(function)
2136 break
2137 if not body_found:
erg@google.coma87abb82009-02-24 01:41:01 +00002138 # No body for the function (or evidence of a non-function) was found.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002139 error(filename, linenum, 'readability/fn_size', 5,
2140 'Lint failed to find start of function body.')
2141 elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com05189642010-04-30 20:43:03 +00002142 function_state.Check(error, filename, linenum)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002143 function_state.End()
2144 elif not Match(r'^\s*$', line):
2145 function_state.Count() # Count non-blank/non-comment lines.
2146
2147
2148_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2149
2150
2151def CheckComment(comment, filename, linenum, error):
2152 """Checks for common mistakes in TODO comments.
2153
2154 Args:
2155 comment: The text of the comment from the line in question.
2156 filename: The name of the current file.
2157 linenum: The number of the line to check.
2158 error: The function to call with any errors found.
2159 """
2160 match = _RE_PATTERN_TODO.match(comment)
2161 if match:
2162 # One whitespace is correct; zero whitespace is handled elsewhere.
2163 leading_whitespace = match.group(1)
2164 if len(leading_whitespace) > 1:
2165 error(filename, linenum, 'whitespace/todo', 2,
2166 'Too many spaces before TODO')
2167
2168 username = match.group(2)
2169 if not username:
2170 error(filename, linenum, 'readability/todo', 2,
2171 'Missing username in TODO; it should look like '
2172 '"// TODO(my_username): Stuff."')
2173
2174 middle_whitespace = match.group(3)
erg@google.coma87abb82009-02-24 01:41:01 +00002175 # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00002176 if middle_whitespace != ' ' and middle_whitespace != '':
2177 error(filename, linenum, 'whitespace/todo', 2,
2178 'TODO(my_username) should be followed by a space')
2179
erg@google.comd350fe52013-01-14 17:51:48 +00002180def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2181 """Checks for improper use of DISALLOW* macros.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002182
erg@google.comd350fe52013-01-14 17:51:48 +00002183 Args:
2184 filename: The name of the current file.
2185 clean_lines: A CleansedLines instance containing the file.
2186 linenum: The number of the line to check.
2187 nesting_state: A _NestingState instance which maintains information about
2188 the current stack of nested blocks being parsed.
2189 error: The function to call with any errors found.
2190 """
2191 line = clean_lines.elided[linenum] # get rid of comments and strings
2192
2193 matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2194 r'DISALLOW_EVIL_CONSTRUCTORS|'
2195 r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2196 if not matched:
2197 return
2198 if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2199 if nesting_state.stack[-1].access != 'private':
2200 error(filename, linenum, 'readability/constructors', 3,
2201 '%s must be in the private: section' % matched.group(1))
2202
2203 else:
2204 # Found DISALLOW* macro outside a class declaration, or perhaps it
2205 # was used inside a function when it should have been part of the
2206 # class declaration. We could issue a warning here, but it
2207 # probably resulted in a compiler error already.
2208 pass
2209
2210
2211def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
2212 """Find the corresponding > to close a template.
2213
2214 Args:
2215 clean_lines: A CleansedLines instance containing the file.
2216 linenum: Current line number.
2217 init_suffix: Remainder of the current line after the initial <.
2218
2219 Returns:
2220 True if a matching bracket exists.
2221 """
2222 line = init_suffix
2223 nesting_stack = ['<']
2224 while True:
2225 # Find the next operator that can tell us whether < is used as an
2226 # opening bracket or as a less-than operator. We only want to
2227 # warn on the latter case.
2228 #
2229 # We could also check all other operators and terminate the search
2230 # early, e.g. if we got something like this "a<b+c", the "<" is
2231 # most likely a less-than operator, but then we will get false
erg@google.comc6671232013-10-25 21:44:03 +00002232 # positives for default arguments and other template expressions.
erg@google.comd350fe52013-01-14 17:51:48 +00002233 match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
2234 if match:
2235 # Found an operator, update nesting stack
2236 operator = match.group(1)
2237 line = match.group(2)
2238
2239 if nesting_stack[-1] == '<':
2240 # Expecting closing angle bracket
2241 if operator in ('<', '(', '['):
2242 nesting_stack.append(operator)
2243 elif operator == '>':
2244 nesting_stack.pop()
2245 if not nesting_stack:
2246 # Found matching angle bracket
2247 return True
2248 elif operator == ',':
2249 # Got a comma after a bracket, this is most likely a template
2250 # argument. We have not seen a closing angle bracket yet, but
2251 # it's probably a few lines later if we look for it, so just
2252 # return early here.
2253 return True
2254 else:
2255 # Got some other operator.
2256 return False
2257
2258 else:
2259 # Expecting closing parenthesis or closing bracket
2260 if operator in ('<', '(', '['):
2261 nesting_stack.append(operator)
2262 elif operator in (')', ']'):
2263 # We don't bother checking for matching () or []. If we got
2264 # something like (] or [), it would have been a syntax error.
2265 nesting_stack.pop()
2266
2267 else:
2268 # Scan the next line
2269 linenum += 1
2270 if linenum >= len(clean_lines.elided):
2271 break
2272 line = clean_lines.elided[linenum]
2273
2274 # Exhausted all remaining lines and still no matching angle bracket.
2275 # Most likely the input was incomplete, otherwise we should have
2276 # seen a semicolon and returned early.
2277 return True
2278
2279
2280def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
2281 """Find the corresponding < that started a template.
2282
2283 Args:
2284 clean_lines: A CleansedLines instance containing the file.
2285 linenum: Current line number.
2286 init_prefix: Part of the current line before the initial >.
2287
2288 Returns:
2289 True if a matching bracket exists.
2290 """
2291 line = init_prefix
2292 nesting_stack = ['>']
2293 while True:
2294 # Find the previous operator
2295 match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
2296 if match:
2297 # Found an operator, update nesting stack
2298 operator = match.group(2)
2299 line = match.group(1)
2300
2301 if nesting_stack[-1] == '>':
2302 # Expecting opening angle bracket
2303 if operator in ('>', ')', ']'):
2304 nesting_stack.append(operator)
2305 elif operator == '<':
2306 nesting_stack.pop()
2307 if not nesting_stack:
2308 # Found matching angle bracket
2309 return True
2310 elif operator == ',':
2311 # Got a comma before a bracket, this is most likely a
2312 # template argument. The opening angle bracket is probably
2313 # there if we look for it, so just return early here.
2314 return True
2315 else:
2316 # Got some other operator.
2317 return False
2318
2319 else:
2320 # Expecting opening parenthesis or opening bracket
2321 if operator in ('>', ')', ']'):
2322 nesting_stack.append(operator)
2323 elif operator in ('(', '['):
2324 nesting_stack.pop()
2325
2326 else:
2327 # Scan the previous line
2328 linenum -= 1
2329 if linenum < 0:
2330 break
2331 line = clean_lines.elided[linenum]
2332
2333 # Exhausted all earlier lines and still no matching angle bracket.
2334 return False
2335
2336
2337def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002338 """Checks for the correctness of various spacing issues in the code.
2339
2340 Things we check for: spaces around operators, spaces after
2341 if/for/while/switch, no spaces around parens in function calls, two
2342 spaces between code and comment, don't start a block with a blank
erg@google.com8a95ecc2011-09-08 00:45:54 +00002343 line, don't end a function with a blank line, don't add a blank line
2344 after public/protected/private, don't have too many blank lines in a row.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002345
2346 Args:
2347 filename: The name of the current file.
2348 clean_lines: A CleansedLines instance containing the file.
2349 linenum: The number of the line to check.
erg@google.comd350fe52013-01-14 17:51:48 +00002350 nesting_state: A _NestingState instance which maintains information about
2351 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002352 error: The function to call with any errors found.
2353 """
2354
2355 raw = clean_lines.raw_lines
2356 line = raw[linenum]
2357
2358 # Before nixing comments, check if the line is blank for no good
2359 # reason. This includes the first line after a block is opened, and
2360 # blank lines at the end of a function (ie, right before a line like '}'
erg@google.comd350fe52013-01-14 17:51:48 +00002361 #
2362 # Skip all the blank line checks if we are immediately inside a
2363 # namespace body. In other words, don't issue blank line warnings
2364 # for this block:
2365 # namespace {
2366 #
2367 # }
2368 #
2369 # A warning about missing end of namespace comments will be issued instead.
2370 if IsBlankLine(line) and not nesting_state.InNamespaceBody():
erg@google.com4e00b9a2009-01-12 23:05:11 +00002371 elided = clean_lines.elided
2372 prev_line = elided[linenum - 1]
2373 prevbrace = prev_line.rfind('{')
2374 # TODO(unknown): Don't complain if line before blank line, and line after,
2375 # both start with alnums and are indented the same amount.
2376 # This ignores whitespace at the start of a namespace block
2377 # because those are not usually indented.
erg@google.comd350fe52013-01-14 17:51:48 +00002378 if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002379 # OK, we have a blank line at the start of a code block. Before we
2380 # complain, we check if it is an exception to the rule: The previous
erg@google.com8a95ecc2011-09-08 00:45:54 +00002381 # non-empty line has the parameters of a function header that are indented
erg@google.com4e00b9a2009-01-12 23:05:11 +00002382 # 4 spaces (because they did not fit in a 80 column line when placed on
2383 # the same line as the function name). We also check for the case where
2384 # the previous line is indented 6 spaces, which may happen when the
2385 # initializers of a constructor do not fit into a 80 column line.
2386 exception = False
2387 if Match(r' {6}\w', prev_line): # Initializer list?
2388 # We are looking for the opening column of initializer list, which
2389 # should be indented 4 spaces to cause 6 space indentation afterwards.
2390 search_position = linenum-2
2391 while (search_position >= 0
2392 and Match(r' {6}\w', elided[search_position])):
2393 search_position -= 1
2394 exception = (search_position >= 0
2395 and elided[search_position][:5] == ' :')
2396 else:
2397 # Search for the function arguments or an initializer list. We use a
2398 # simple heuristic here: If the line is indented 4 spaces; and we have a
2399 # closing paren, without the opening paren, followed by an opening brace
2400 # or colon (for initializer lists) we assume that it is the last line of
2401 # a function header. If we have a colon indented 4 spaces, it is an
2402 # initializer list.
2403 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2404 prev_line)
2405 or Match(r' {4}:', prev_line))
2406
2407 if not exception:
2408 error(filename, linenum, 'whitespace/blank_line', 2,
2409 'Blank line at the start of a code block. Is this needed?')
erg@google.comd350fe52013-01-14 17:51:48 +00002410 # Ignore blank lines at the end of a block in a long if-else
erg@google.com4e00b9a2009-01-12 23:05:11 +00002411 # chain, like this:
2412 # if (condition1) {
2413 # // Something followed by a blank line
2414 #
2415 # } else if (condition2) {
2416 # // Something else
2417 # }
2418 if linenum + 1 < clean_lines.NumLines():
2419 next_line = raw[linenum + 1]
2420 if (next_line
2421 and Match(r'\s*}', next_line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002422 and next_line.find('} else ') == -1):
2423 error(filename, linenum, 'whitespace/blank_line', 3,
2424 'Blank line at the end of a code block. Is this needed?')
2425
erg@google.com8a95ecc2011-09-08 00:45:54 +00002426 matched = Match(r'\s*(public|protected|private):', prev_line)
2427 if matched:
2428 error(filename, linenum, 'whitespace/blank_line', 3,
2429 'Do not leave a blank line after "%s:"' % matched.group(1))
2430
erg@google.com4e00b9a2009-01-12 23:05:11 +00002431 # Next, we complain if there's a comment too near the text
2432 commentpos = line.find('//')
2433 if commentpos != -1:
2434 # Check if the // may be in quotes. If so, ignore it
erg@google.coma87abb82009-02-24 01:41:01 +00002435 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com4e00b9a2009-01-12 23:05:11 +00002436 if (line.count('"', 0, commentpos) -
2437 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
2438 # Allow one space for new scopes, two spaces otherwise:
2439 if (not Match(r'^\s*{ //', line) and
2440 ((commentpos >= 1 and
2441 line[commentpos-1] not in string.whitespace) or
2442 (commentpos >= 2 and
2443 line[commentpos-2] not in string.whitespace))):
2444 error(filename, linenum, 'whitespace/comments', 2,
2445 'At least two spaces is best between code and comments')
2446 # There should always be a space between the // and the comment
2447 commentend = commentpos + 2
2448 if commentend < len(line) and not line[commentend] == ' ':
2449 # but some lines are exceptions -- e.g. if they're big
2450 # comment delimiters like:
2451 # //----------------------------------------------------------
erg@google.coma51c16b2010-11-17 18:09:31 +00002452 # or are an empty C++ style Doxygen comment, like:
2453 # ///
erg@google.come35f7652009-06-19 20:52:09 +00002454 # or they begin with multiple slashes followed by a space:
2455 # //////// Header comment
2456 match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.coma51c16b2010-11-17 18:09:31 +00002457 Search(r'^/$', line[commentend:]) or
erg@google.come35f7652009-06-19 20:52:09 +00002458 Search(r'^/+ ', line[commentend:]))
erg@google.com4e00b9a2009-01-12 23:05:11 +00002459 if not match:
2460 error(filename, linenum, 'whitespace/comments', 4,
2461 'Should have a space between // and comment')
2462 CheckComment(line[commentpos:], filename, linenum, error)
2463
2464 line = clean_lines.elided[linenum] # get rid of comments and strings
2465
2466 # Don't try to do spacing checks for operator methods
2467 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
2468
2469 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2470 # Otherwise not. Note we only check for non-spaces on *both* sides;
2471 # sometimes people put non-spaces on one side when aligning ='s among
2472 # many lines (not that this is behavior that I approve of...)
2473 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2474 error(filename, linenum, 'whitespace/operators', 4,
2475 'Missing spaces around =')
2476
2477 # It's ok not to have spaces around binary operators like + - * /, but if
2478 # there's too little whitespace, we get concerned. It's hard to tell,
2479 # though, so we punt on this one for now. TODO.
2480
2481 # You should always have whitespace around binary operators.
erg@google.comd350fe52013-01-14 17:51:48 +00002482 #
2483 # Check <= and >= first to avoid false positives with < and >, then
2484 # check non-include lines for spacing around < and >.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002485 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002486 if match:
2487 error(filename, linenum, 'whitespace/operators', 3,
2488 'Missing spaces around %s' % match.group(1))
erg@google.comd350fe52013-01-14 17:51:48 +00002489 # We allow no-spaces around << when used like this: 10<<20, but
erg@google.com4e00b9a2009-01-12 23:05:11 +00002490 # not otherwise (particularly, not when used as streams)
erg@google.comd350fe52013-01-14 17:51:48 +00002491 match = Search(r'(\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
2492 if match and not (match.group(1).isdigit() and match.group(2).isdigit()):
2493 error(filename, linenum, 'whitespace/operators', 3,
2494 'Missing spaces around <<')
2495 elif not Match(r'#.*include', line):
2496 # Avoid false positives on ->
2497 reduced_line = line.replace('->', '')
2498
2499 # Look for < that is not surrounded by spaces. This is only
2500 # triggered if both sides are missing spaces, even though
2501 # technically should should flag if at least one side is missing a
2502 # space. This is done to avoid some false positives with shifts.
2503 match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
2504 if (match and
2505 not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
2506 error(filename, linenum, 'whitespace/operators', 3,
2507 'Missing spaces around <')
2508
2509 # Look for > that is not surrounded by spaces. Similar to the
2510 # above, we only trigger if both sides are missing spaces to avoid
2511 # false positives with shifts.
2512 match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
2513 if (match and
2514 not FindPreviousMatchingAngleBracket(clean_lines, linenum,
2515 match.group(1))):
2516 error(filename, linenum, 'whitespace/operators', 3,
2517 'Missing spaces around >')
2518
2519 # We allow no-spaces around >> for almost anything. This is because
2520 # C++11 allows ">>" to close nested templates, which accounts for
2521 # most cases when ">>" is not followed by a space.
2522 #
2523 # We still warn on ">>" followed by alpha character, because that is
2524 # likely due to ">>" being used for right shifts, e.g.:
2525 # value >> alpha
2526 #
2527 # When ">>" is used to close templates, the alphanumeric letter that
2528 # follows would be part of an identifier, and there should still be
2529 # a space separating the template type and the identifier.
2530 # type<type<type>> alpha
2531 match = Search(r'>>[a-zA-Z_]', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00002532 if match:
2533 error(filename, linenum, 'whitespace/operators', 3,
erg@google.comd350fe52013-01-14 17:51:48 +00002534 'Missing spaces around >>')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002535
2536 # There shouldn't be space around unary operators
2537 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
2538 if match:
2539 error(filename, linenum, 'whitespace/operators', 4,
2540 'Extra space for operator %s' % match.group(1))
2541
2542 # A pet peeve of mine: no spaces after an if, while, switch, or for
2543 match = Search(r' (if\(|for\(|while\(|switch\()', line)
2544 if match:
2545 error(filename, linenum, 'whitespace/parens', 5,
2546 'Missing space before ( in %s' % match.group(1))
2547
2548 # For if/for/while/switch, the left and right parens should be
2549 # consistent about how many spaces are inside the parens, and
2550 # there should either be zero or one spaces inside the parens.
2551 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.come35f7652009-06-19 20:52:09 +00002552 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002553 match = Search(r'\b(if|for|while|switch)\s*'
2554 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
2555 line)
2556 if match:
2557 if len(match.group(2)) != len(match.group(4)):
2558 if not (match.group(3) == ';' and
erg@google.come35f7652009-06-19 20:52:09 +00002559 len(match.group(2)) == 1 + len(match.group(4)) or
2560 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002561 error(filename, linenum, 'whitespace/parens', 5,
2562 'Mismatching spaces inside () in %s' % match.group(1))
erg@google.comc6671232013-10-25 21:44:03 +00002563 if len(match.group(2)) not in [0, 1]:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002564 error(filename, linenum, 'whitespace/parens', 5,
2565 'Should have zero or one spaces inside ( and ) in %s' %
2566 match.group(1))
2567
2568 # You should always have a space after a comma (either as fn arg or operator)
erg@google.comc6671232013-10-25 21:44:03 +00002569 #
2570 # This does not apply when the non-space character following the
2571 # comma is another comma, since the only time when that happens is
2572 # for empty macro arguments.
2573 if Search(r',[^,\s]', line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002574 error(filename, linenum, 'whitespace/comma', 3,
2575 'Missing space after ,')
2576
erg@google.comd7d27472011-09-07 17:36:35 +00002577 # You should always have a space after a semicolon
2578 # except for few corner cases
2579 # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
2580 # space after ;
2581 if Search(r';[^\s};\\)/]', line):
2582 error(filename, linenum, 'whitespace/semicolon', 3,
2583 'Missing space after ;')
2584
erg@google.com4e00b9a2009-01-12 23:05:11 +00002585 # Next we will look for issues with function calls.
2586 CheckSpacingForFunctionCall(filename, line, linenum, error)
2587
erg@google.com8a95ecc2011-09-08 00:45:54 +00002588 # Except after an opening paren, or after another opening brace (in case of
2589 # an initializer list, for instance), you should have spaces before your
2590 # braces. And since you should never have braces at the beginning of a line,
2591 # this is an easy test.
2592 if Search(r'[^ ({]{', line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002593 error(filename, linenum, 'whitespace/braces', 5,
2594 'Missing space before {')
2595
2596 # Make sure '} else {' has spaces.
2597 if Search(r'}else', line):
2598 error(filename, linenum, 'whitespace/braces', 5,
2599 'Missing space before else')
2600
2601 # You shouldn't have spaces before your brackets, except maybe after
2602 # 'delete []' or 'new char * []'.
2603 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
2604 error(filename, linenum, 'whitespace/braces', 5,
2605 'Extra space before [')
2606
2607 # You shouldn't have a space before a semicolon at the end of the line.
2608 # There's a special case for "for" since the style guide allows space before
2609 # the semicolon there.
2610 if Search(r':\s*;\s*$', line):
2611 error(filename, linenum, 'whitespace/semicolon', 5,
erg@google.comd350fe52013-01-14 17:51:48 +00002612 'Semicolon defining empty statement. Use {} instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002613 elif Search(r'^\s*;\s*$', line):
2614 error(filename, linenum, 'whitespace/semicolon', 5,
2615 'Line contains only semicolon. If this should be an empty statement, '
erg@google.comd350fe52013-01-14 17:51:48 +00002616 'use {} instead.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002617 elif (Search(r'\s+;\s*$', line) and
2618 not Search(r'\bfor\b', line)):
2619 error(filename, linenum, 'whitespace/semicolon', 5,
2620 'Extra space before last semicolon. If this should be an empty '
erg@google.comd350fe52013-01-14 17:51:48 +00002621 'statement, use {} instead.')
2622
2623 # In range-based for, we wanted spaces before and after the colon, but
2624 # not around "::" tokens that might appear.
2625 if (Search('for *\(.*[^:]:[^: ]', line) or
2626 Search('for *\(.*[^: ]:[^:]', line)):
2627 error(filename, linenum, 'whitespace/forcolon', 2,
2628 'Missing space around colon in range-based for loop')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002629
2630
erg@google.com8a95ecc2011-09-08 00:45:54 +00002631def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
2632 """Checks for additional blank line issues related to sections.
2633
2634 Currently the only thing checked here is blank line before protected/private.
2635
2636 Args:
2637 filename: The name of the current file.
2638 clean_lines: A CleansedLines instance containing the file.
2639 class_info: A _ClassInfo objects.
2640 linenum: The number of the line to check.
2641 error: The function to call with any errors found.
2642 """
2643 # Skip checks if the class is small, where small means 25 lines or less.
2644 # 25 lines seems like a good cutoff since that's the usual height of
2645 # terminals, and any class that can't fit in one screen can't really
2646 # be considered "small".
2647 #
2648 # Also skip checks if we are on the first line. This accounts for
2649 # classes that look like
2650 # class Foo { public: ... };
2651 #
2652 # If we didn't find the end of the class, last_line would be zero,
2653 # and the check will be skipped by the first condition.
erg@google.comd350fe52013-01-14 17:51:48 +00002654 if (class_info.last_line - class_info.starting_linenum <= 24 or
2655 linenum <= class_info.starting_linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002656 return
2657
2658 matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
2659 if matched:
2660 # Issue warning if the line before public/protected/private was
2661 # not a blank line, but don't do this if the previous line contains
2662 # "class" or "struct". This can happen two ways:
2663 # - We are at the beginning of the class.
2664 # - We are forward-declaring an inner class that is semantically
2665 # private, but needed to be public for implementation reasons.
erg@google.comd350fe52013-01-14 17:51:48 +00002666 # Also ignores cases where the previous line ends with a backslash as can be
2667 # common when defining classes in C macros.
erg@google.com8a95ecc2011-09-08 00:45:54 +00002668 prev_line = clean_lines.lines[linenum - 1]
2669 if (not IsBlankLine(prev_line) and
erg@google.comd350fe52013-01-14 17:51:48 +00002670 not Search(r'\b(class|struct)\b', prev_line) and
2671 not Search(r'\\$', prev_line)):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002672 # Try a bit harder to find the beginning of the class. This is to
2673 # account for multi-line base-specifier lists, e.g.:
2674 # class Derived
2675 # : public Base {
erg@google.comd350fe52013-01-14 17:51:48 +00002676 end_class_head = class_info.starting_linenum
2677 for i in range(class_info.starting_linenum, linenum):
erg@google.com8a95ecc2011-09-08 00:45:54 +00002678 if Search(r'\{\s*$', clean_lines.lines[i]):
2679 end_class_head = i
2680 break
2681 if end_class_head < linenum - 1:
2682 error(filename, linenum, 'whitespace/blank_line', 3,
2683 '"%s:" should be preceded by a blank line' % matched.group(1))
2684
2685
erg@google.com4e00b9a2009-01-12 23:05:11 +00002686def GetPreviousNonBlankLine(clean_lines, linenum):
2687 """Return the most recent non-blank line and its line number.
2688
2689 Args:
2690 clean_lines: A CleansedLines instance containing the file contents.
2691 linenum: The number of the line to check.
2692
2693 Returns:
2694 A tuple with two elements. The first element is the contents of the last
2695 non-blank line before the current line, or the empty string if this is the
2696 first non-blank line. The second is the line number of that line, or -1
2697 if this is the first non-blank line.
2698 """
2699
2700 prevlinenum = linenum - 1
2701 while prevlinenum >= 0:
2702 prevline = clean_lines.elided[prevlinenum]
2703 if not IsBlankLine(prevline): # if not a blank line...
2704 return (prevline, prevlinenum)
2705 prevlinenum -= 1
2706 return ('', -1)
2707
2708
2709def CheckBraces(filename, clean_lines, linenum, error):
2710 """Looks for misplaced braces (e.g. at the end of line).
2711
2712 Args:
2713 filename: The name of the current file.
2714 clean_lines: A CleansedLines instance containing the file.
2715 linenum: The number of the line to check.
2716 error: The function to call with any errors found.
2717 """
2718
2719 line = clean_lines.elided[linenum] # get rid of comments and strings
2720
2721 if Match(r'\s*{\s*$', line):
2722 # We allow an open brace to start a line in the case where someone
2723 # is using braces in a block to explicitly create a new scope,
2724 # which is commonly used to control the lifetime of
2725 # stack-allocated variables. We don't detect this perfectly: we
2726 # just don't complain if the last non-whitespace character on the
erg@google.comc6671232013-10-25 21:44:03 +00002727 # previous non-blank line is ',', ';', ':', '{', or '}', or if the
2728 # previous line starts a preprocessor block.
erg@google.com4e00b9a2009-01-12 23:05:11 +00002729 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
erg@google.comc6671232013-10-25 21:44:03 +00002730 if (not Search(r'[,;:}{]\s*$', prevline) and
erg@google.comd350fe52013-01-14 17:51:48 +00002731 not Match(r'\s*#', prevline)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002732 error(filename, linenum, 'whitespace/braces', 4,
2733 '{ should almost always be at the end of the previous line')
2734
2735 # An else clause should be on the same line as the preceding closing brace.
2736 if Match(r'\s*else\s*', line):
2737 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2738 if Match(r'\s*}\s*$', prevline):
2739 error(filename, linenum, 'whitespace/newline', 4,
2740 'An else should appear on the same line as the preceding }')
2741
2742 # If braces come on one side of an else, they should be on both.
2743 # However, we have to worry about "else if" that spans multiple lines!
2744 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
2745 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
2746 # find the ( after the if
2747 pos = line.find('else if')
2748 pos = line.find('(', pos)
2749 if pos > 0:
2750 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
2751 if endline[endpos:].find('{') == -1: # must be brace after if
2752 error(filename, linenum, 'readability/braces', 5,
2753 'If an else has a brace on one side, it should have it on both')
2754 else: # common case: else not followed by a multi-line if
2755 error(filename, linenum, 'readability/braces', 5,
2756 'If an else has a brace on one side, it should have it on both')
2757
2758 # Likewise, an else should never have the else clause on the same line
2759 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
2760 error(filename, linenum, 'whitespace/newline', 4,
2761 'Else clause should never be on same line as else (use 2 lines)')
2762
2763 # In the same way, a do/while should never be on one line
2764 if Match(r'\s*do [^\s{]', line):
2765 error(filename, linenum, 'whitespace/newline', 4,
2766 'do/while clauses should not be on a single line')
2767
2768 # Braces shouldn't be followed by a ; unless they're defining a struct
2769 # or initializing an array.
2770 # We can't tell in general, but we can for some common cases.
2771 prevlinenum = linenum
2772 while True:
2773 (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
2774 if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
2775 line = prevline + line
2776 else:
2777 break
2778 if (Search(r'{.*}\s*;', line) and
2779 line.count('{') == line.count('}') and
erg@google.comfd5da632013-10-25 17:39:45 +00002780 not Search(r'struct|union|class|enum|\s*=\s*{', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002781 error(filename, linenum, 'readability/braces', 4,
2782 "You don't need a ; after a }")
2783
2784
erg@google.comc6671232013-10-25 21:44:03 +00002785def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
2786 """Look for empty loop/conditional body with only a single semicolon.
erg@google.comd350fe52013-01-14 17:51:48 +00002787
2788 Args:
2789 filename: The name of the current file.
2790 clean_lines: A CleansedLines instance containing the file.
2791 linenum: The number of the line to check.
2792 error: The function to call with any errors found.
2793 """
2794
2795 # Search for loop keywords at the beginning of the line. Because only
2796 # whitespaces are allowed before the keywords, this will also ignore most
2797 # do-while-loops, since those lines should start with closing brace.
erg@google.comc6671232013-10-25 21:44:03 +00002798 #
2799 # We also check "if" blocks here, since an empty conditional block
2800 # is likely an error.
erg@google.comd350fe52013-01-14 17:51:48 +00002801 line = clean_lines.elided[linenum]
erg@google.comc6671232013-10-25 21:44:03 +00002802 matched = Match(r'\s*(for|while|if)\s*\(', line)
2803 if matched:
erg@google.comd350fe52013-01-14 17:51:48 +00002804 # Find the end of the conditional expression
2805 (end_line, end_linenum, end_pos) = CloseExpression(
2806 clean_lines, linenum, line.find('('))
2807
2808 # Output warning if what follows the condition expression is a semicolon.
2809 # No warning for all other cases, including whitespace or newline, since we
2810 # have a separate check for semicolons preceded by whitespace.
2811 if end_pos >= 0 and Match(r';', end_line[end_pos:]):
erg@google.comc6671232013-10-25 21:44:03 +00002812 if matched.group(1) == 'if':
2813 error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
2814 'Empty conditional bodies should use {}')
2815 else:
2816 error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
2817 'Empty loop bodies should use {} or continue')
erg@google.com4e00b9a2009-01-12 23:05:11 +00002818
2819
2820def CheckCheck(filename, clean_lines, linenum, error):
2821 """Checks the use of CHECK and EXPECT macros.
2822
2823 Args:
2824 filename: The name of the current file.
2825 clean_lines: A CleansedLines instance containing the file.
2826 linenum: The number of the line to check.
2827 error: The function to call with any errors found.
2828 """
2829
2830 # Decide the set of replacement macros that should be suggested
erg@google.comc6671232013-10-25 21:44:03 +00002831 lines = clean_lines.elided
2832 check_macro = None
2833 start_pos = -1
erg@google.com4e00b9a2009-01-12 23:05:11 +00002834 for macro in _CHECK_MACROS:
erg@google.comc6671232013-10-25 21:44:03 +00002835 i = lines[linenum].find(macro)
2836 if i >= 0:
2837 check_macro = macro
2838
2839 # Find opening parenthesis. Do a regular expression match here
2840 # to make sure that we are matching the expected CHECK macro, as
2841 # opposed to some other macro that happens to contain the CHECK
2842 # substring.
2843 matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum])
2844 if not matched:
2845 continue
2846 start_pos = len(matched.group(1))
erg@google.com4e00b9a2009-01-12 23:05:11 +00002847 break
erg@google.comc6671232013-10-25 21:44:03 +00002848 if not check_macro or start_pos < 0:
erg@google.com4e00b9a2009-01-12 23:05:11 +00002849 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2850 return
2851
erg@google.comc6671232013-10-25 21:44:03 +00002852 # Find end of the boolean expression by matching parentheses
2853 (last_line, end_line, end_pos) = CloseExpression(
2854 clean_lines, linenum, start_pos)
2855 if end_pos < 0:
2856 return
2857 if linenum == end_line:
2858 expression = lines[linenum][start_pos + 1:end_pos - 1]
2859 else:
2860 expression = lines[linenum][start_pos + 1:]
2861 for i in xrange(linenum + 1, end_line):
2862 expression += lines[i]
2863 expression += last_line[0:end_pos - 1]
erg@google.com4e00b9a2009-01-12 23:05:11 +00002864
erg@google.comc6671232013-10-25 21:44:03 +00002865 # Parse expression so that we can take parentheses into account.
2866 # This avoids false positives for inputs like "CHECK((a < 4) == b)",
2867 # which is not replaceable by CHECK_LE.
2868 lhs = ''
2869 rhs = ''
2870 operator = None
2871 while expression:
2872 matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
2873 r'==|!=|>=|>|<=|<|\()(.*)$', expression)
2874 if matched:
2875 token = matched.group(1)
2876 if token == '(':
2877 # Parenthesized operand
2878 expression = matched.group(2)
2879 end = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
2880 if end < 0:
2881 return # Unmatched parenthesis
2882 lhs += '(' + expression[0:end]
2883 expression = expression[end:]
2884 elif token in ('&&', '||'):
2885 # Logical and/or operators. This means the expression
2886 # contains more than one term, for example:
2887 # CHECK(42 < a && a < b);
2888 #
2889 # These are not replaceable with CHECK_LE, so bail out early.
2890 return
2891 elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
2892 # Non-relational operator
2893 lhs += token
2894 expression = matched.group(2)
2895 else:
2896 # Relational operator
2897 operator = token
2898 rhs = matched.group(2)
2899 break
2900 else:
2901 # Unparenthesized operand. Instead of appending to lhs one character
2902 # at a time, we do another regular expression match to consume several
2903 # characters at once if possible. Trivial benchmark shows that this
2904 # is more efficient when the operands are longer than a single
2905 # character, which is generally the case.
2906 matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
2907 if not matched:
2908 matched = Match(r'^(\s*\S)(.*)$', expression)
2909 if not matched:
2910 break
2911 lhs += matched.group(1)
2912 expression = matched.group(2)
2913
2914 # Only apply checks if we got all parts of the boolean expression
2915 if not (lhs and operator and rhs):
2916 return
2917
2918 # Check that rhs do not contain logical operators. We already know
2919 # that lhs is fine since the loop above parses out && and ||.
2920 if rhs.find('&&') > -1 or rhs.find('||') > -1:
2921 return
2922
2923 # At least one of the operands must be a constant literal. This is
2924 # to avoid suggesting replacements for unprintable things like
2925 # CHECK(variable != iterator)
2926 #
2927 # The following pattern matches decimal, hex integers, strings, and
2928 # characters (in that order).
2929 lhs = lhs.strip()
2930 rhs = rhs.strip()
2931 match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
2932 if Match(match_constant, lhs) or Match(match_constant, rhs):
2933 # Note: since we know both lhs and rhs, we can provide a more
2934 # descriptive error message like:
2935 # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
2936 # Instead of:
2937 # Consider using CHECK_EQ instead of CHECK(a == b)
2938 #
2939 # We are still keeping the less descriptive message because if lhs
2940 # or rhs gets long, the error message might become unreadable.
2941 error(filename, linenum, 'readability/check', 2,
2942 'Consider using %s instead of %s(a %s b)' % (
2943 _CHECK_REPLACEMENT[check_macro][operator],
2944 check_macro, operator))
erg@google.com4e00b9a2009-01-12 23:05:11 +00002945
2946
erg@google.comd350fe52013-01-14 17:51:48 +00002947def CheckAltTokens(filename, clean_lines, linenum, error):
2948 """Check alternative keywords being used in boolean expressions.
2949
2950 Args:
2951 filename: The name of the current file.
2952 clean_lines: A CleansedLines instance containing the file.
2953 linenum: The number of the line to check.
2954 error: The function to call with any errors found.
2955 """
2956 line = clean_lines.elided[linenum]
2957
2958 # Avoid preprocessor lines
2959 if Match(r'^\s*#', line):
2960 return
2961
2962 # Last ditch effort to avoid multi-line comments. This will not help
2963 # if the comment started before the current line or ended after the
2964 # current line, but it catches most of the false positives. At least,
2965 # it provides a way to workaround this warning for people who use
2966 # multi-line comments in preprocessor macros.
2967 #
2968 # TODO(unknown): remove this once cpplint has better support for
2969 # multi-line comments.
2970 if line.find('/*') >= 0 or line.find('*/') >= 0:
2971 return
2972
2973 for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
2974 error(filename, linenum, 'readability/alt_tokens', 2,
2975 'Use operator %s instead of %s' % (
2976 _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
2977
2978
erg@google.com4e00b9a2009-01-12 23:05:11 +00002979def GetLineWidth(line):
2980 """Determines the width of the line in column positions.
2981
2982 Args:
2983 line: A string, which may be a Unicode string.
2984
2985 Returns:
2986 The width of the line in column positions, accounting for Unicode
2987 combining characters and wide characters.
2988 """
2989 if isinstance(line, unicode):
2990 width = 0
erg@google.com8a95ecc2011-09-08 00:45:54 +00002991 for uc in unicodedata.normalize('NFC', line):
2992 if unicodedata.east_asian_width(uc) in ('W', 'F'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002993 width += 2
erg@google.com8a95ecc2011-09-08 00:45:54 +00002994 elif not unicodedata.combining(uc):
erg@google.com4e00b9a2009-01-12 23:05:11 +00002995 width += 1
2996 return width
2997 else:
2998 return len(line)
2999
3000
erg@google.comd350fe52013-01-14 17:51:48 +00003001def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
erg@google.com8a95ecc2011-09-08 00:45:54 +00003002 error):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003003 """Checks rules from the 'C++ style rules' section of cppguide.html.
3004
3005 Most of these rules are hard to test (naming, comment style), but we
3006 do what we can. In particular we check for 2-space indents, line lengths,
3007 tab usage, spaces inside code, etc.
3008
3009 Args:
3010 filename: The name of the current file.
3011 clean_lines: A CleansedLines instance containing the file.
3012 linenum: The number of the line to check.
3013 file_extension: The extension (without the dot) of the filename.
erg@google.comd350fe52013-01-14 17:51:48 +00003014 nesting_state: A _NestingState instance which maintains information about
3015 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003016 error: The function to call with any errors found.
3017 """
3018
3019 raw_lines = clean_lines.raw_lines
3020 line = raw_lines[linenum]
3021
3022 if line.find('\t') != -1:
3023 error(filename, linenum, 'whitespace/tab', 1,
3024 'Tab found; better to use spaces')
3025
3026 # One or three blank spaces at the beginning of the line is weird; it's
3027 # hard to reconcile that with 2-space indents.
3028 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
3029 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
3030 # if(RLENGTH > 20) complain = 0;
3031 # if(match($0, " +(error|private|public|protected):")) complain = 0;
3032 # if(match(prev, "&& *$")) complain = 0;
3033 # if(match(prev, "\\|\\| *$")) complain = 0;
3034 # if(match(prev, "[\",=><] *$")) complain = 0;
3035 # if(match($0, " <<")) complain = 0;
3036 # if(match(prev, " +for \\(")) complain = 0;
3037 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
3038 initial_spaces = 0
3039 cleansed_line = clean_lines.elided[linenum]
3040 while initial_spaces < len(line) and line[initial_spaces] == ' ':
3041 initial_spaces += 1
3042 if line and line[-1].isspace():
3043 error(filename, linenum, 'whitespace/end_of_line', 4,
3044 'Line ends in whitespace. Consider deleting these extra spaces.')
erg@google.comfd5da632013-10-25 17:39:45 +00003045 # There are certain situations we allow one space, notably for section labels
erg@google.com4e00b9a2009-01-12 23:05:11 +00003046 elif ((initial_spaces == 1 or initial_spaces == 3) and
3047 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
3048 error(filename, linenum, 'whitespace/indent', 3,
3049 'Weird number of spaces at line-start. '
3050 'Are you using a 2-space indent?')
erg@google.com4e00b9a2009-01-12 23:05:11 +00003051
3052 # Check if the line is a header guard.
3053 is_header_guard = False
3054 if file_extension == 'h':
3055 cppvar = GetHeaderGuardCPPVariable(filename)
3056 if (line.startswith('#ifndef %s' % cppvar) or
3057 line.startswith('#define %s' % cppvar) or
3058 line.startswith('#endif // %s' % cppvar)):
3059 is_header_guard = True
3060 # #include lines and header guards can be long, since there's no clean way to
3061 # split them.
erg@google.coma87abb82009-02-24 01:41:01 +00003062 #
3063 # URLs can be long too. It's possible to split these, but it makes them
3064 # harder to cut&paste.
erg@google.comd7d27472011-09-07 17:36:35 +00003065 #
3066 # The "$Id:...$" comment may also get very long without it being the
3067 # developers fault.
erg@google.coma87abb82009-02-24 01:41:01 +00003068 if (not line.startswith('#include') and not is_header_guard and
erg@google.comd7d27472011-09-07 17:36:35 +00003069 not Match(r'^\s*//.*http(s?)://\S*$', line) and
3070 not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003071 line_width = GetLineWidth(line)
3072 if line_width > 100:
3073 error(filename, linenum, 'whitespace/line_length', 4,
3074 'Lines should very rarely be longer than 100 characters')
3075 elif line_width > 80:
3076 error(filename, linenum, 'whitespace/line_length', 2,
3077 'Lines should be <= 80 characters long')
3078
3079 if (cleansed_line.count(';') > 1 and
3080 # for loops are allowed two ;'s (and may run over two lines).
3081 cleansed_line.find('for') == -1 and
3082 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
3083 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
3084 # It's ok to have many commands in a switch case that fits in 1 line
3085 not ((cleansed_line.find('case ') != -1 or
3086 cleansed_line.find('default:') != -1) and
3087 cleansed_line.find('break;') != -1)):
erg@google.comd350fe52013-01-14 17:51:48 +00003088 error(filename, linenum, 'whitespace/newline', 0,
erg@google.com4e00b9a2009-01-12 23:05:11 +00003089 'More than one command on the same line')
3090
3091 # Some more style checks
3092 CheckBraces(filename, clean_lines, linenum, error)
erg@google.comc6671232013-10-25 21:44:03 +00003093 CheckEmptyBlockBody(filename, clean_lines, linenum, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003094 CheckAccess(filename, clean_lines, linenum, nesting_state, error)
3095 CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003096 CheckCheck(filename, clean_lines, linenum, error)
erg@google.comd350fe52013-01-14 17:51:48 +00003097 CheckAltTokens(filename, clean_lines, linenum, error)
3098 classinfo = nesting_state.InnermostClass()
3099 if classinfo:
3100 CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003101
3102
3103_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
3104_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
3105# Matches the first component of a filename delimited by -s and _s. That is:
3106# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
3107# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
3108# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
3109# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
3110_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
3111
3112
3113def _DropCommonSuffixes(filename):
3114 """Drops common suffixes like _test.cc or -inl.h from filename.
3115
3116 For example:
3117 >>> _DropCommonSuffixes('foo/foo-inl.h')
3118 'foo/foo'
3119 >>> _DropCommonSuffixes('foo/bar/foo.cc')
3120 'foo/bar/foo'
3121 >>> _DropCommonSuffixes('foo/foo_internal.h')
3122 'foo/foo'
3123 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
3124 'foo/foo_unusualinternal'
3125
3126 Args:
3127 filename: The input filename.
3128
3129 Returns:
3130 The filename with the common suffix removed.
3131 """
3132 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
3133 'inl.h', 'impl.h', 'internal.h'):
3134 if (filename.endswith(suffix) and len(filename) > len(suffix) and
3135 filename[-len(suffix) - 1] in ('-', '_')):
3136 return filename[:-len(suffix) - 1]
3137 return os.path.splitext(filename)[0]
3138
3139
3140def _IsTestFilename(filename):
3141 """Determines if the given filename has a suffix that identifies it as a test.
3142
3143 Args:
3144 filename: The input filename.
3145
3146 Returns:
3147 True if 'filename' looks like a test, False otherwise.
3148 """
3149 if (filename.endswith('_test.cc') or
3150 filename.endswith('_unittest.cc') or
3151 filename.endswith('_regtest.cc')):
3152 return True
3153 else:
3154 return False
3155
3156
3157def _ClassifyInclude(fileinfo, include, is_system):
3158 """Figures out what kind of header 'include' is.
3159
3160 Args:
3161 fileinfo: The current file cpplint is running over. A FileInfo instance.
3162 include: The path to a #included file.
3163 is_system: True if the #include used <> rather than "".
3164
3165 Returns:
3166 One of the _XXX_HEADER constants.
3167
3168 For example:
3169 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
3170 _C_SYS_HEADER
3171 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
3172 _CPP_SYS_HEADER
3173 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
3174 _LIKELY_MY_HEADER
3175 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
3176 ... 'bar/foo_other_ext.h', False)
3177 _POSSIBLE_MY_HEADER
3178 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
3179 _OTHER_HEADER
3180 """
3181 # This is a list of all standard c++ header files, except
3182 # those already checked for above.
erg@google.comfd5da632013-10-25 17:39:45 +00003183 is_cpp_h = include in _CPP_HEADERS
erg@google.com4e00b9a2009-01-12 23:05:11 +00003184
3185 if is_system:
3186 if is_cpp_h:
3187 return _CPP_SYS_HEADER
3188 else:
3189 return _C_SYS_HEADER
3190
3191 # If the target file and the include we're checking share a
3192 # basename when we drop common extensions, and the include
3193 # lives in . , then it's likely to be owned by the target file.
3194 target_dir, target_base = (
3195 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
3196 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
3197 if target_base == include_base and (
3198 include_dir == target_dir or
3199 include_dir == os.path.normpath(target_dir + '/../public')):
3200 return _LIKELY_MY_HEADER
3201
3202 # If the target and include share some initial basename
3203 # component, it's possible the target is implementing the
3204 # include, so it's allowed to be first, but we'll never
3205 # complain if it's not there.
3206 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
3207 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
3208 if (target_first_component and include_first_component and
3209 target_first_component.group(0) ==
3210 include_first_component.group(0)):
3211 return _POSSIBLE_MY_HEADER
3212
3213 return _OTHER_HEADER
3214
3215
erg@google.coma87abb82009-02-24 01:41:01 +00003216
erg@google.come35f7652009-06-19 20:52:09 +00003217def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
3218 """Check rules that are applicable to #include lines.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003219
erg@google.come35f7652009-06-19 20:52:09 +00003220 Strings on #include lines are NOT removed from elided line, to make
3221 certain tasks easier. However, to prevent false positives, checks
3222 applicable to #include lines in CheckLanguage must be put here.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003223
3224 Args:
3225 filename: The name of the current file.
3226 clean_lines: A CleansedLines instance containing the file.
3227 linenum: The number of the line to check.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003228 include_state: An _IncludeState instance in which the headers are inserted.
3229 error: The function to call with any errors found.
3230 """
3231 fileinfo = FileInfo(filename)
3232
erg@google.come35f7652009-06-19 20:52:09 +00003233 line = clean_lines.lines[linenum]
erg@google.com4e00b9a2009-01-12 23:05:11 +00003234
3235 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.come35f7652009-06-19 20:52:09 +00003236 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003237 error(filename, linenum, 'build/include', 4,
3238 'Include the directory when naming .h files')
3239
3240 # we shouldn't include a file more than once. actually, there are a
3241 # handful of instances where doing so is okay, but in general it's
3242 # not.
erg@google.come35f7652009-06-19 20:52:09 +00003243 match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003244 if match:
3245 include = match.group(2)
3246 is_system = (match.group(1) == '<')
3247 if include in include_state:
3248 error(filename, linenum, 'build/include', 4,
3249 '"%s" already included at %s:%s' %
3250 (include, filename, include_state[include]))
3251 else:
3252 include_state[include] = linenum
3253
3254 # We want to ensure that headers appear in the right order:
3255 # 1) for foo.cc, foo.h (preferred location)
3256 # 2) c system files
3257 # 3) cpp system files
3258 # 4) for foo.cc, foo.h (deprecated location)
3259 # 5) other google headers
3260 #
3261 # We classify each include statement as one of those 5 types
3262 # using a number of techniques. The include_state object keeps
3263 # track of the highest type seen, and complains if we see a
3264 # lower type after that.
3265 error_message = include_state.CheckNextIncludeOrder(
3266 _ClassifyInclude(fileinfo, include, is_system))
3267 if error_message:
3268 error(filename, linenum, 'build/include_order', 4,
3269 '%s. Should be: %s.h, c system, c++ system, other.' %
3270 (error_message, fileinfo.BaseName()))
erg@google.comfd5da632013-10-25 17:39:45 +00003271 canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
3272 if not include_state.IsInAlphabeticalOrder(
3273 clean_lines, linenum, canonical_include):
erg@google.coma868d2d2009-10-09 21:18:45 +00003274 error(filename, linenum, 'build/include_alpha', 4,
3275 'Include "%s" not in alphabetical order' % include)
erg@google.comfd5da632013-10-25 17:39:45 +00003276 include_state.SetLastHeader(canonical_include)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003277
erg@google.come35f7652009-06-19 20:52:09 +00003278 # Look for any of the stream classes that are part of standard C++.
3279 match = _RE_PATTERN_INCLUDE.match(line)
3280 if match:
3281 include = match.group(2)
3282 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
3283 # Many unit tests use cout, so we exempt them.
3284 if not _IsTestFilename(filename):
3285 error(filename, linenum, 'readability/streams', 3,
3286 'Streams are highly discouraged.')
3287
erg@google.com8a95ecc2011-09-08 00:45:54 +00003288
3289def _GetTextInside(text, start_pattern):
3290 """Retrieves all the text between matching open and close parentheses.
3291
3292 Given a string of lines and a regular expression string, retrieve all the text
3293 following the expression and between opening punctuation symbols like
3294 (, [, or {, and the matching close-punctuation symbol. This properly nested
3295 occurrences of the punctuations, so for the text like
3296 printf(a(), b(c()));
3297 a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
3298 start_pattern must match string having an open punctuation symbol at the end.
3299
3300 Args:
3301 text: The lines to extract text. Its comments and strings must be elided.
3302 It can be single line and can span multiple lines.
3303 start_pattern: The regexp string indicating where to start extracting
3304 the text.
3305 Returns:
3306 The extracted text.
3307 None if either the opening string or ending punctuation could not be found.
3308 """
3309 # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
3310 # rewritten to use _GetTextInside (and use inferior regexp matching today).
3311
3312 # Give opening punctuations to get the matching close-punctuations.
3313 matching_punctuation = {'(': ')', '{': '}', '[': ']'}
3314 closing_punctuation = set(matching_punctuation.itervalues())
3315
3316 # Find the position to start extracting text.
3317 match = re.search(start_pattern, text, re.M)
3318 if not match: # start_pattern not found in text.
3319 return None
3320 start_position = match.end(0)
3321
3322 assert start_position > 0, (
3323 'start_pattern must ends with an opening punctuation.')
3324 assert text[start_position - 1] in matching_punctuation, (
3325 'start_pattern must ends with an opening punctuation.')
3326 # Stack of closing punctuations we expect to have in text after position.
3327 punctuation_stack = [matching_punctuation[text[start_position - 1]]]
3328 position = start_position
3329 while punctuation_stack and position < len(text):
3330 if text[position] == punctuation_stack[-1]:
3331 punctuation_stack.pop()
3332 elif text[position] in closing_punctuation:
3333 # A closing punctuation without matching opening punctuations.
3334 return None
3335 elif text[position] in matching_punctuation:
3336 punctuation_stack.append(matching_punctuation[text[position]])
3337 position += 1
3338 if punctuation_stack:
3339 # Opening punctuations left without matching close-punctuations.
3340 return None
3341 # punctuations match.
3342 return text[start_position:position - 1]
3343
3344
erg@google.comfd5da632013-10-25 17:39:45 +00003345# Patterns for matching call-by-reference parameters.
3346_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]*
3347_RE_PATTERN_TYPE = (
3348 r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
3349 r'[\w:]*\w(?:\s*<[\w:*, ]*>(?:::\w+)?)?')
3350# A call-by-reference parameter ends with '& identifier'.
3351_RE_PATTERN_REF_PARAM = re.compile(
3352 r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
3353 r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
3354# A call-by-const-reference parameter either ends with 'const& identifier'
3355# or looks like 'const type& identifier' when 'type' is atomic.
3356_RE_PATTERN_CONST_REF_PARAM = (
3357 r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
3358 r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
3359
3360
3361def CheckLanguage(filename, clean_lines, linenum, file_extension,
3362 include_state, nesting_state, error):
erg@google.come35f7652009-06-19 20:52:09 +00003363 """Checks rules from the 'C++ language rules' section of cppguide.html.
3364
3365 Some of these rules are hard to test (function overloading, using
3366 uint32 inappropriately), but we do the best we can.
3367
3368 Args:
3369 filename: The name of the current file.
3370 clean_lines: A CleansedLines instance containing the file.
3371 linenum: The number of the line to check.
3372 file_extension: The extension (without the dot) of the filename.
3373 include_state: An _IncludeState instance in which the headers are inserted.
erg@google.comfd5da632013-10-25 17:39:45 +00003374 nesting_state: A _NestingState instance which maintains information about
3375 the current stack of nested blocks being parsed.
erg@google.come35f7652009-06-19 20:52:09 +00003376 error: The function to call with any errors found.
3377 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00003378 # If the line is empty or consists of entirely a comment, no need to
3379 # check it.
3380 line = clean_lines.elided[linenum]
3381 if not line:
3382 return
3383
erg@google.come35f7652009-06-19 20:52:09 +00003384 match = _RE_PATTERN_INCLUDE.search(line)
3385 if match:
3386 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
3387 return
3388
erg@google.com4e00b9a2009-01-12 23:05:11 +00003389 # Make Windows paths like Unix.
3390 fullname = os.path.abspath(filename).replace('\\', '/')
3391
3392 # TODO(unknown): figure out if they're using default arguments in fn proto.
3393
erg@google.com4e00b9a2009-01-12 23:05:11 +00003394 # Check to see if they're using an conversion function cast.
3395 # I just try to capture the most common basic types, though there are more.
3396 # Parameterless conversion functions, such as bool(), are allowed as they are
3397 # probably a member operator declaration or default constructor.
3398 match = Search(
erg@google.coma868d2d2009-10-09 21:18:45 +00003399 r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
erg@google.comc6671232013-10-25 21:44:03 +00003400 r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
3401 r'(\([^)].*)', line)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003402 if match:
erg@google.comc6671232013-10-25 21:44:03 +00003403 matched_new = match.group(1)
3404 matched_type = match.group(2)
3405 matched_funcptr = match.group(3)
3406
erg@google.com4e00b9a2009-01-12 23:05:11 +00003407 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
3408 # where type may be float(), int(string), etc. Without context they are
erg@google.comd7d27472011-09-07 17:36:35 +00003409 # virtually indistinguishable from int(x) casts. Likewise, gMock's
3410 # MockCallback takes a template parameter of the form return_type(arg_type),
3411 # which looks much like the cast we're trying to detect.
erg@google.comc6671232013-10-25 21:44:03 +00003412 #
3413 # std::function<> wrapper has a similar problem.
3414 #
3415 # Return types for function pointers also look like casts if they
3416 # don't have an extra space.
3417 if (matched_new is None and # If new operator, then this isn't a cast
erg@google.comd7d27472011-09-07 17:36:35 +00003418 not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
erg@google.comc6671232013-10-25 21:44:03 +00003419 Search(r'\bMockCallback<.*>', line) or
3420 Search(r'\bstd::function<.*>', line)) and
3421 not (matched_funcptr and
3422 Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
3423 matched_funcptr))):
erg@google.comd350fe52013-01-14 17:51:48 +00003424 # Try a bit harder to catch gmock lines: the only place where
3425 # something looks like an old-style cast is where we declare the
3426 # return type of the mocked method, and the only time when we
3427 # are missing context is if MOCK_METHOD was split across
erg@google.comc6671232013-10-25 21:44:03 +00003428 # multiple lines. The missing MOCK_METHOD is usually one or two
3429 # lines back, so scan back one or two lines.
3430 #
3431 # It's not possible for gmock macros to appear in the first 2
3432 # lines, since the class head + section name takes up 2 lines.
3433 if (linenum < 2 or
3434 not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
3435 clean_lines.elided[linenum - 1]) or
3436 Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
3437 clean_lines.elided[linenum - 2]))):
erg@google.comd350fe52013-01-14 17:51:48 +00003438 error(filename, linenum, 'readability/casting', 4,
3439 'Using deprecated casting style. '
3440 'Use static_cast<%s>(...) instead' %
erg@google.comc6671232013-10-25 21:44:03 +00003441 matched_type)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003442
3443 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3444 'static_cast',
erg@google.com8a95ecc2011-09-08 00:45:54 +00003445 r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
3446
3447 # This doesn't catch all cases. Consider (const char * const)"hello".
3448 #
3449 # (char *) "foo" should always be a const_cast (reinterpret_cast won't
3450 # compile).
3451 if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3452 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
3453 pass
3454 else:
3455 # Check pointer casts for other than string constants
3456 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3457 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00003458
3459 # In addition, we look for people taking the address of a cast. This
3460 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
3461 # point where you think.
erg@google.comc6671232013-10-25 21:44:03 +00003462 match = Search(
3463 r'(?:&\(([^)]+)\)[\w(])|'
3464 r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
3465 if match and match.group(1) != '*':
erg@google.com4e00b9a2009-01-12 23:05:11 +00003466 error(filename, linenum, 'runtime/casting', 4,
3467 ('Are you taking an address of a cast? '
3468 'This is dangerous: could be a temp var. '
3469 'Take the address before doing the cast, rather than after'))
3470
erg@google.comc6671232013-10-25 21:44:03 +00003471 # Create an extended_line, which is the concatenation of the current and
3472 # next lines, for more effective checking of code that may span more than one
3473 # line.
3474 if linenum + 1 < clean_lines.NumLines():
3475 extended_line = line + clean_lines.elided[linenum + 1]
3476 else:
3477 extended_line = line
3478
erg@google.com4e00b9a2009-01-12 23:05:11 +00003479 # Check for people declaring static/global STL strings at the top level.
3480 # This is dangerous because the C++ language does not guarantee that
3481 # globals with constructors are initialized before the first access.
3482 match = Match(
3483 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
3484 line)
3485 # Make sure it's not a function.
3486 # Function template specialization looks like: "string foo<Type>(...".
3487 # Class template definitions look like: "string Foo<Type>::Method(...".
3488 if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
3489 match.group(3)):
3490 error(filename, linenum, 'runtime/string', 4,
3491 'For a static/global string constant, use a C style string instead: '
3492 '"%schar %s[]".' %
3493 (match.group(1), match.group(2)))
3494
erg@google.com4e00b9a2009-01-12 23:05:11 +00003495 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
3496 error(filename, linenum, 'runtime/init', 4,
3497 'You seem to be initializing a member variable with itself.')
3498
3499 if file_extension == 'h':
3500 # TODO(unknown): check that 1-arg constructors are explicit.
3501 # How to tell it's a constructor?
3502 # (handled in CheckForNonStandardConstructs for now)
3503 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
3504 # (level 1 error)
3505 pass
3506
3507 # Check if people are using the verboten C basic types. The only exception
3508 # we regularly allow is "unsigned short port" for port.
3509 if Search(r'\bshort port\b', line):
3510 if not Search(r'\bunsigned short port\b', line):
3511 error(filename, linenum, 'runtime/int', 4,
3512 'Use "unsigned short" for ports, not "short"')
3513 else:
3514 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
3515 if match:
3516 error(filename, linenum, 'runtime/int', 4,
3517 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
3518
3519 # When snprintf is used, the second argument shouldn't be a literal.
3520 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
erg+personal@google.com05189642010-04-30 20:43:03 +00003521 if match and match.group(2) != '0':
3522 # If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003523 error(filename, linenum, 'runtime/printf', 3,
3524 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
3525 'to snprintf.' % (match.group(1), match.group(2)))
3526
3527 # Check if some verboten C functions are being used.
3528 if Search(r'\bsprintf\b', line):
3529 error(filename, linenum, 'runtime/printf', 5,
3530 'Never use sprintf. Use snprintf instead.')
3531 match = Search(r'\b(strcpy|strcat)\b', line)
3532 if match:
3533 error(filename, linenum, 'runtime/printf', 4,
3534 'Almost always, snprintf is better than %s' % match.group(1))
3535
erg@google.coma868d2d2009-10-09 21:18:45 +00003536 # Check if some verboten operator overloading is going on
3537 # TODO(unknown): catch out-of-line unary operator&:
3538 # class X {};
3539 # int operator&(const X& x) { return 42; } // unary operator&
3540 # The trick is it's hard to tell apart from binary operator&:
3541 # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
3542 if Search(r'\boperator\s*&\s*\(\s*\)', line):
3543 error(filename, linenum, 'runtime/operator', 4,
3544 'Unary operator& is dangerous. Do not use it.')
3545
erg@google.com4e00b9a2009-01-12 23:05:11 +00003546 # Check for suspicious usage of "if" like
3547 # } if (a == b) {
3548 if Search(r'\}\s*if\s*\(', line):
3549 error(filename, linenum, 'readability/braces', 4,
3550 'Did you mean "else if"? If not, start a new line for "if".')
3551
3552 # Check for potential format string bugs like printf(foo).
3553 # We constrain the pattern not to pick things like DocidForPrintf(foo).
3554 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
erg@google.com8a95ecc2011-09-08 00:45:54 +00003555 # TODO(sugawarayu): Catch the following case. Need to change the calling
3556 # convention of the whole function to process multiple line to handle it.
3557 # printf(
3558 # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
3559 printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
3560 if printf_args:
3561 match = Match(r'([\w.\->()]+)$', printf_args)
erg@google.comd350fe52013-01-14 17:51:48 +00003562 if match and match.group(1) != '__VA_ARGS__':
erg@google.com8a95ecc2011-09-08 00:45:54 +00003563 function_name = re.search(r'\b((?:string)?printf)\s*\(',
3564 line, re.I).group(1)
3565 error(filename, linenum, 'runtime/printf', 4,
3566 'Potential format string bug. Do %s("%%s", %s) instead.'
3567 % (function_name, match.group(1)))
erg@google.com4e00b9a2009-01-12 23:05:11 +00003568
3569 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
3570 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
3571 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
3572 error(filename, linenum, 'runtime/memset', 4,
3573 'Did you mean "memset(%s, 0, %s)"?'
3574 % (match.group(1), match.group(2)))
3575
3576 if Search(r'\busing namespace\b', line):
3577 error(filename, linenum, 'build/namespaces', 5,
3578 'Do not use namespace using-directives. '
3579 'Use using-declarations instead.')
3580
3581 # Detect variable-length arrays.
3582 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
3583 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
3584 match.group(3).find(']') == -1):
3585 # Split the size using space and arithmetic operators as delimiters.
3586 # If any of the resulting tokens are not compile time constants then
3587 # report the error.
3588 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
3589 is_const = True
3590 skip_next = False
3591 for tok in tokens:
3592 if skip_next:
3593 skip_next = False
3594 continue
3595
3596 if Search(r'sizeof\(.+\)', tok): continue
3597 if Search(r'arraysize\(\w+\)', tok): continue
3598
3599 tok = tok.lstrip('(')
3600 tok = tok.rstrip(')')
3601 if not tok: continue
3602 if Match(r'\d+', tok): continue
3603 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
3604 if Match(r'k[A-Z0-9]\w*', tok): continue
3605 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
3606 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
3607 # A catch all for tricky sizeof cases, including 'sizeof expression',
3608 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
erg@google.com8a95ecc2011-09-08 00:45:54 +00003609 # requires skipping the next token because we split on ' ' and '*'.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003610 if tok.startswith('sizeof'):
3611 skip_next = True
3612 continue
3613 is_const = False
3614 break
3615 if not is_const:
3616 error(filename, linenum, 'runtime/arrays', 1,
3617 'Do not use variable-length arrays. Use an appropriately named '
3618 "('k' followed by CamelCase) compile-time constant for the size.")
3619
3620 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
3621 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
3622 # in the class declaration.
3623 match = Match(
3624 (r'\s*'
3625 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
3626 r'\(.*\);$'),
3627 line)
3628 if match and linenum + 1 < clean_lines.NumLines():
3629 next_line = clean_lines.elided[linenum + 1]
erg@google.com8a95ecc2011-09-08 00:45:54 +00003630 # We allow some, but not all, declarations of variables to be present
3631 # in the statement that defines the class. The [\w\*,\s]* fragment of
3632 # the regular expression below allows users to declare instances of
3633 # the class or pointers to instances, but not less common types such
3634 # as function pointers or arrays. It's a tradeoff between allowing
3635 # reasonable code and avoiding trying to parse more C++ using regexps.
3636 if not Search(r'^\s*}[\w\*,\s]*;', next_line):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003637 error(filename, linenum, 'readability/constructors', 3,
3638 match.group(1) + ' should be the last thing in the class')
3639
3640 # Check for use of unnamed namespaces in header files. Registration
3641 # macros are typically OK, so we allow use of "namespace {" on lines
3642 # that end with backslashes.
3643 if (file_extension == 'h'
3644 and Search(r'\bnamespace\s*{', line)
3645 and line[-1] != '\\'):
3646 error(filename, linenum, 'build/namespaces', 4,
3647 'Do not use unnamed namespaces in header files. See '
3648 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
3649 ' for more information.')
3650
erg@google.comc6671232013-10-25 21:44:03 +00003651def CheckForNonConstReference(filename, clean_lines, linenum,
3652 nesting_state, error):
3653 """Check for non-const references.
3654
3655 Separate from CheckLanguage since it scans backwards from current
3656 line, instead of scanning forward.
3657
3658 Args:
3659 filename: The name of the current file.
3660 clean_lines: A CleansedLines instance containing the file.
3661 linenum: The number of the line to check.
3662 nesting_state: A _NestingState instance which maintains information about
3663 the current stack of nested blocks being parsed.
3664 error: The function to call with any errors found.
3665 """
3666 # Do nothing if there is no '&' on current line.
3667 line = clean_lines.elided[linenum]
3668 if '&' not in line:
3669 return
3670
3671 # Long type names may be broken across multiple lines, with the
3672 # newline before or after the scope resolution operator. If we
3673 # detected a type split across two lines, join the previous line to
3674 # current line so that we can match const references accordingly.
3675 #
3676 # Note that this only scans back one line, since scanning back
3677 # arbitrary number of lines would be expensive. If you have a type
3678 # that spans more than 2 lines, please use a typedef.
3679 if linenum > 1:
3680 previous = None
3681 if Match(r'\s*::(?:\w|::)+\s*&\s*\S', line):
3682 # previous_line\n + ::current_line
3683 previous = Search(r'\b((?:const\s*)?(?:\w|::)+\w)\s*$',
3684 clean_lines.elided[linenum - 1])
3685 elif Match(r'\s*[a-zA-Z_](\w|::)+\s*&\s*\S', line):
3686 # previous_line::\n + current_line
3687 previous = Search(r'\b((?:const\s*)?(?:\w|::)+::)\s*$',
3688 clean_lines.elided[linenum - 1])
3689 if previous:
3690 line = previous.group(1) + line.lstrip()
3691
3692 # Check for non-const references in function parameters. A single '&' may
3693 # found in the following places:
3694 # inside expression: binary & for bitwise AND
3695 # inside expression: unary & for taking the address of something
3696 # inside declarators: reference parameter
3697 # We will exclude the first two cases by checking that we are not inside a
3698 # function body, including one that was just introduced by a trailing '{'.
3699 # TODO(unknwon): Doesn't account for preprocessor directives.
3700 # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
3701 check_params = False
3702 if not nesting_state.stack:
3703 check_params = True # top level
3704 elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
3705 isinstance(nesting_state.stack[-1], _NamespaceInfo)):
3706 check_params = True # within class or namespace
3707 elif Match(r'.*{\s*$', line):
3708 if (len(nesting_state.stack) == 1 or
3709 isinstance(nesting_state.stack[-2], _ClassInfo) or
3710 isinstance(nesting_state.stack[-2], _NamespaceInfo)):
3711 check_params = True # just opened global/class/namespace block
3712 # We allow non-const references in a few standard places, like functions
3713 # called "swap()" or iostream operators like "<<" or ">>". Do not check
3714 # those function parameters.
3715 #
3716 # We also accept & in static_assert, which looks like a function but
3717 # it's actually a declaration expression.
3718 whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
3719 r'operator\s*[<>][<>]|'
3720 r'static_assert|COMPILE_ASSERT'
3721 r')\s*\(')
3722 if Search(whitelisted_functions, line):
3723 check_params = False
3724 elif not Search(r'\S+\([^)]*$', line):
3725 # Don't see a whitelisted function on this line. Actually we
3726 # didn't see any function name on this line, so this is likely a
3727 # multi-line parameter list. Try a bit harder to catch this case.
3728 for i in xrange(2):
3729 if (linenum > i and
3730 Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
3731 check_params = False
3732 break
3733
3734 if check_params:
3735 decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
3736 for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
3737 if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
3738 error(filename, linenum, 'runtime/references', 2,
3739 'Is this a non-const reference? '
3740 'If so, make const or use a pointer: ' + parameter)
3741
erg@google.com4e00b9a2009-01-12 23:05:11 +00003742
3743def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
3744 error):
3745 """Checks for a C-style cast by looking for the pattern.
3746
erg@google.com4e00b9a2009-01-12 23:05:11 +00003747 Args:
3748 filename: The name of the current file.
3749 linenum: The number of the line to check.
3750 line: The line of code to check.
3751 raw_line: The raw line of code to check, with comments.
3752 cast_type: The string for the C++ cast to recommend. This is either
erg@google.com8a95ecc2011-09-08 00:45:54 +00003753 reinterpret_cast, static_cast, or const_cast, depending.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003754 pattern: The regular expression used to find C-style casts.
3755 error: The function to call with any errors found.
erg@google.com8a95ecc2011-09-08 00:45:54 +00003756
3757 Returns:
3758 True if an error was emitted.
3759 False otherwise.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003760 """
3761 match = Search(pattern, line)
3762 if not match:
erg@google.com8a95ecc2011-09-08 00:45:54 +00003763 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +00003764
erg@google.comfd5da632013-10-25 17:39:45 +00003765 # Exclude lines with sizeof, since sizeof looks like a cast.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003766 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
3767 if sizeof_match:
erg@google.comfd5da632013-10-25 17:39:45 +00003768 return False
erg@google.com4e00b9a2009-01-12 23:05:11 +00003769
erg@google.comd350fe52013-01-14 17:51:48 +00003770 # operator++(int) and operator--(int)
3771 if (line[0:match.start(1) - 1].endswith(' operator++') or
3772 line[0:match.start(1) - 1].endswith(' operator--')):
3773 return False
3774
erg@google.comc6671232013-10-25 21:44:03 +00003775 # A single unnamed argument for a function tends to look like old
3776 # style cast. If we see those, don't issue warnings for deprecated
3777 # casts, instead issue warnings for unnamed arguments where
3778 # appropriate.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003779 #
erg@google.comc6671232013-10-25 21:44:03 +00003780 # These are things that we want warnings for, since the style guide
3781 # explicitly require all parameters to be named:
3782 # Function(int);
3783 # Function(int) {
3784 # ConstMember(int) const;
3785 # ConstMember(int) const {
3786 # ExceptionMember(int) throw (...);
3787 # ExceptionMember(int) throw (...) {
3788 # PureVirtual(int) = 0;
3789 #
3790 # These are functions of some sort, where the compiler would be fine
3791 # if they had named parameters, but people often omit those
3792 # identifiers to reduce clutter:
3793 # (FunctionPointer)(int);
3794 # (FunctionPointer)(int) = value;
3795 # Function((function_pointer_arg)(int))
3796 # <TemplateArgument(int)>;
3797 # <(FunctionPointerTemplateArgument)(int)>;
3798 remainder = line[match.end(0):]
3799 if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder):
3800 # Looks like an unnamed parameter.
3801
3802 # Don't warn on any kind of template arguments.
3803 if Match(r'^\s*>', remainder):
3804 return False
3805
3806 # Don't warn on assignments to function pointers, but keep warnings for
3807 # unnamed parameters to pure virtual functions. Note that this pattern
3808 # will also pass on assignments of "0" to function pointers, but the
3809 # preferred values for those would be "nullptr" or "NULL".
3810 matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
3811 if matched_zero and matched_zero.group(1) != '0':
3812 return False
3813
3814 # Don't warn on function pointer declarations. For this we need
3815 # to check what came before the "(type)" string.
3816 if Match(r'.*\)\s*$', line[0:match.start(0)]):
3817 return False
3818
3819 # Don't warn if the parameter is named with block comments, e.g.:
3820 # Function(int /*unused_param*/);
3821 if '/*' in raw_line:
3822 return False
3823
3824 # Passed all filters, issue warning here.
3825 error(filename, linenum, 'readability/function', 3,
3826 'All parameters should be named in a function')
erg@google.com8a95ecc2011-09-08 00:45:54 +00003827 return True
erg@google.com4e00b9a2009-01-12 23:05:11 +00003828
3829 # At this point, all that should be left is actual casts.
3830 error(filename, linenum, 'readability/casting', 4,
3831 'Using C-style cast. Use %s<%s>(...) instead' %
3832 (cast_type, match.group(1)))
3833
erg@google.com8a95ecc2011-09-08 00:45:54 +00003834 return True
3835
erg@google.com4e00b9a2009-01-12 23:05:11 +00003836
3837_HEADERS_CONTAINING_TEMPLATES = (
3838 ('<deque>', ('deque',)),
3839 ('<functional>', ('unary_function', 'binary_function',
3840 'plus', 'minus', 'multiplies', 'divides', 'modulus',
3841 'negate',
3842 'equal_to', 'not_equal_to', 'greater', 'less',
3843 'greater_equal', 'less_equal',
3844 'logical_and', 'logical_or', 'logical_not',
3845 'unary_negate', 'not1', 'binary_negate', 'not2',
3846 'bind1st', 'bind2nd',
3847 'pointer_to_unary_function',
3848 'pointer_to_binary_function',
3849 'ptr_fun',
3850 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
3851 'mem_fun_ref_t',
3852 'const_mem_fun_t', 'const_mem_fun1_t',
3853 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
3854 'mem_fun_ref',
3855 )),
3856 ('<limits>', ('numeric_limits',)),
3857 ('<list>', ('list',)),
3858 ('<map>', ('map', 'multimap',)),
3859 ('<memory>', ('allocator',)),
3860 ('<queue>', ('queue', 'priority_queue',)),
3861 ('<set>', ('set', 'multiset',)),
3862 ('<stack>', ('stack',)),
3863 ('<string>', ('char_traits', 'basic_string',)),
3864 ('<utility>', ('pair',)),
3865 ('<vector>', ('vector',)),
3866
3867 # gcc extensions.
3868 # Note: std::hash is their hash, ::hash is our hash
3869 ('<hash_map>', ('hash_map', 'hash_multimap',)),
3870 ('<hash_set>', ('hash_set', 'hash_multiset',)),
3871 ('<slist>', ('slist',)),
3872 )
3873
erg@google.com4e00b9a2009-01-12 23:05:11 +00003874_RE_PATTERN_STRING = re.compile(r'\bstring\b')
3875
3876_re_pattern_algorithm_header = []
erg@google.coma87abb82009-02-24 01:41:01 +00003877for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
3878 'transform'):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003879 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
3880 # type::max().
3881 _re_pattern_algorithm_header.append(
3882 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
3883 _template,
3884 '<algorithm>'))
3885
3886_re_pattern_templates = []
3887for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
3888 for _template in _templates:
3889 _re_pattern_templates.append(
3890 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
3891 _template + '<>',
3892 _header))
3893
3894
erg@google.come35f7652009-06-19 20:52:09 +00003895def FilesBelongToSameModule(filename_cc, filename_h):
3896 """Check if these two filenames belong to the same module.
3897
3898 The concept of a 'module' here is a as follows:
3899 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
3900 same 'module' if they are in the same directory.
3901 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
3902 to belong to the same module here.
3903
3904 If the filename_cc contains a longer path than the filename_h, for example,
3905 '/absolute/path/to/base/sysinfo.cc', and this file would include
3906 'base/sysinfo.h', this function also produces the prefix needed to open the
3907 header. This is used by the caller of this function to more robustly open the
3908 header file. We don't have access to the real include paths in this context,
3909 so we need this guesswork here.
3910
3911 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
3912 according to this implementation. Because of this, this function gives
3913 some false positives. This should be sufficiently rare in practice.
3914
3915 Args:
3916 filename_cc: is the path for the .cc file
3917 filename_h: is the path for the header path
3918
3919 Returns:
3920 Tuple with a bool and a string:
3921 bool: True if filename_cc and filename_h belong to the same module.
3922 string: the additional prefix needed to open the header file.
3923 """
3924
3925 if not filename_cc.endswith('.cc'):
3926 return (False, '')
3927 filename_cc = filename_cc[:-len('.cc')]
3928 if filename_cc.endswith('_unittest'):
3929 filename_cc = filename_cc[:-len('_unittest')]
3930 elif filename_cc.endswith('_test'):
3931 filename_cc = filename_cc[:-len('_test')]
3932 filename_cc = filename_cc.replace('/public/', '/')
3933 filename_cc = filename_cc.replace('/internal/', '/')
3934
3935 if not filename_h.endswith('.h'):
3936 return (False, '')
3937 filename_h = filename_h[:-len('.h')]
3938 if filename_h.endswith('-inl'):
3939 filename_h = filename_h[:-len('-inl')]
3940 filename_h = filename_h.replace('/public/', '/')
3941 filename_h = filename_h.replace('/internal/', '/')
3942
3943 files_belong_to_same_module = filename_cc.endswith(filename_h)
3944 common_path = ''
3945 if files_belong_to_same_module:
3946 common_path = filename_cc[:-len(filename_h)]
3947 return files_belong_to_same_module, common_path
3948
3949
3950def UpdateIncludeState(filename, include_state, io=codecs):
3951 """Fill up the include_state with new includes found from the file.
3952
3953 Args:
3954 filename: the name of the header to read.
3955 include_state: an _IncludeState instance in which the headers are inserted.
3956 io: The io factory to use to read the file. Provided for testability.
3957
3958 Returns:
3959 True if a header was succesfully added. False otherwise.
3960 """
3961 headerfile = None
3962 try:
3963 headerfile = io.open(filename, 'r', 'utf8', 'replace')
3964 except IOError:
3965 return False
3966 linenum = 0
3967 for line in headerfile:
3968 linenum += 1
3969 clean_line = CleanseComments(line)
3970 match = _RE_PATTERN_INCLUDE.search(clean_line)
3971 if match:
3972 include = match.group(2)
3973 # The value formatting is cute, but not really used right now.
3974 # What matters here is that the key is in include_state.
3975 include_state.setdefault(include, '%s:%d' % (filename, linenum))
3976 return True
3977
3978
3979def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
3980 io=codecs):
erg@google.com4e00b9a2009-01-12 23:05:11 +00003981 """Reports for missing stl includes.
3982
3983 This function will output warnings to make sure you are including the headers
3984 necessary for the stl containers and functions that you use. We only give one
3985 reason to include a header. For example, if you use both equal_to<> and
3986 less<> in a .h file, only one (the latter in the file) of these will be
3987 reported as a reason to include the <functional>.
3988
erg@google.com4e00b9a2009-01-12 23:05:11 +00003989 Args:
3990 filename: The name of the current file.
3991 clean_lines: A CleansedLines instance containing the file.
3992 include_state: An _IncludeState instance.
3993 error: The function to call with any errors found.
erg@google.come35f7652009-06-19 20:52:09 +00003994 io: The IO factory to use to read the header file. Provided for unittest
3995 injection.
erg@google.com4e00b9a2009-01-12 23:05:11 +00003996 """
erg@google.com4e00b9a2009-01-12 23:05:11 +00003997 required = {} # A map of header name to linenumber and the template entity.
3998 # Example of required: { '<functional>': (1219, 'less<>') }
3999
4000 for linenum in xrange(clean_lines.NumLines()):
4001 line = clean_lines.elided[linenum]
4002 if not line or line[0] == '#':
4003 continue
4004
4005 # String is special -- it is a non-templatized type in STL.
erg@google.com8a95ecc2011-09-08 00:45:54 +00004006 matched = _RE_PATTERN_STRING.search(line)
4007 if matched:
erg+personal@google.com05189642010-04-30 20:43:03 +00004008 # Don't warn about strings in non-STL namespaces:
4009 # (We check only the first match per line; good enough.)
erg@google.com8a95ecc2011-09-08 00:45:54 +00004010 prefix = line[:matched.start()]
erg+personal@google.com05189642010-04-30 20:43:03 +00004011 if prefix.endswith('std::') or not prefix.endswith('::'):
4012 required['<string>'] = (linenum, 'string')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004013
4014 for pattern, template, header in _re_pattern_algorithm_header:
4015 if pattern.search(line):
4016 required[header] = (linenum, template)
4017
4018 # The following function is just a speed up, no semantics are changed.
4019 if not '<' in line: # Reduces the cpu time usage by skipping lines.
4020 continue
4021
4022 for pattern, template, header in _re_pattern_templates:
4023 if pattern.search(line):
4024 required[header] = (linenum, template)
4025
erg@google.come35f7652009-06-19 20:52:09 +00004026 # The policy is that if you #include something in foo.h you don't need to
4027 # include it again in foo.cc. Here, we will look at possible includes.
4028 # Let's copy the include_state so it is only messed up within this function.
4029 include_state = include_state.copy()
4030
4031 # Did we find the header for this file (if any) and succesfully load it?
4032 header_found = False
4033
4034 # Use the absolute path so that matching works properly.
erg@google.com90ecb622012-01-30 19:34:23 +00004035 abs_filename = FileInfo(filename).FullName()
erg@google.come35f7652009-06-19 20:52:09 +00004036
4037 # For Emacs's flymake.
4038 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
4039 # by flymake and that file name might end with '_flymake.cc'. In that case,
4040 # restore original file name here so that the corresponding header file can be
4041 # found.
4042 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
4043 # instead of 'foo_flymake.h'
erg+personal@google.com05189642010-04-30 20:43:03 +00004044 abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.come35f7652009-06-19 20:52:09 +00004045
4046 # include_state is modified during iteration, so we iterate over a copy of
4047 # the keys.
erg@google.com8a95ecc2011-09-08 00:45:54 +00004048 header_keys = include_state.keys()
4049 for header in header_keys:
erg@google.come35f7652009-06-19 20:52:09 +00004050 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
4051 fullpath = common_path + header
4052 if same_module and UpdateIncludeState(fullpath, include_state, io):
4053 header_found = True
4054
4055 # If we can't find the header file for a .cc, assume it's because we don't
4056 # know where to look. In that case we'll give up as we're not sure they
4057 # didn't include it in the .h file.
4058 # TODO(unknown): Do a better job of finding .h files so we are confident that
4059 # not having the .h file means there isn't one.
4060 if filename.endswith('.cc') and not header_found:
4061 return
4062
erg@google.com4e00b9a2009-01-12 23:05:11 +00004063 # All the lines have been processed, report the errors found.
4064 for required_header_unstripped in required:
4065 template = required[required_header_unstripped][1]
erg@google.com4e00b9a2009-01-12 23:05:11 +00004066 if required_header_unstripped.strip('<>"') not in include_state:
4067 error(filename, required[required_header_unstripped][0],
4068 'build/include_what_you_use', 4,
4069 'Add #include ' + required_header_unstripped + ' for ' + template)
4070
4071
erg@google.com8a95ecc2011-09-08 00:45:54 +00004072_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
4073
4074
4075def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
4076 """Check that make_pair's template arguments are deduced.
4077
4078 G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
4079 specified explicitly, and such use isn't intended in any case.
4080
4081 Args:
4082 filename: The name of the current file.
4083 clean_lines: A CleansedLines instance containing the file.
4084 linenum: The number of the line to check.
4085 error: The function to call with any errors found.
4086 """
4087 raw = clean_lines.raw_lines
4088 line = raw[linenum]
4089 match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
4090 if match:
4091 error(filename, linenum, 'build/explicit_make_pair',
4092 4, # 4 = high confidence
erg@google.comd350fe52013-01-14 17:51:48 +00004093 'For C++11-compatibility, omit template arguments from make_pair'
4094 ' OR use pair directly OR if appropriate, construct a pair directly')
erg@google.com8a95ecc2011-09-08 00:45:54 +00004095
4096
erg@google.comd350fe52013-01-14 17:51:48 +00004097def ProcessLine(filename, file_extension, clean_lines, line,
4098 include_state, function_state, nesting_state, error,
4099 extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004100 """Processes a single line in the file.
4101
4102 Args:
4103 filename: Filename of the file that is being processed.
4104 file_extension: The extension (dot not included) of the file.
4105 clean_lines: An array of strings, each representing a line of the file,
4106 with comments stripped.
4107 line: Number of line being processed.
4108 include_state: An _IncludeState instance in which the headers are inserted.
4109 function_state: A _FunctionState instance which counts function lines, etc.
erg@google.comd350fe52013-01-14 17:51:48 +00004110 nesting_state: A _NestingState instance which maintains information about
4111 the current stack of nested blocks being parsed.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004112 error: A callable to which errors are reported, which takes 4 arguments:
4113 filename, line number, error level, and message
erg@google.comefeacdf2011-09-07 21:12:16 +00004114 extra_check_functions: An array of additional check functions that will be
4115 run on each source line. Each function takes 4
4116 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004117 """
4118 raw_lines = clean_lines.raw_lines
erg+personal@google.com05189642010-04-30 20:43:03 +00004119 ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.comd350fe52013-01-14 17:51:48 +00004120 nesting_state.Update(filename, clean_lines, line, error)
4121 if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
4122 return
erg@google.com4e00b9a2009-01-12 23:05:11 +00004123 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004124 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
erg@google.comd350fe52013-01-14 17:51:48 +00004125 CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004126 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
erg@google.comfd5da632013-10-25 17:39:45 +00004127 nesting_state, error)
erg@google.comc6671232013-10-25 21:44:03 +00004128 CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004129 CheckForNonStandardConstructs(filename, clean_lines, line,
erg@google.comd350fe52013-01-14 17:51:48 +00004130 nesting_state, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004131 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com36649102009-03-25 21:18:36 +00004132 CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com8a95ecc2011-09-08 00:45:54 +00004133 CheckMakePairUsesDeduction(filename, clean_lines, line, error)
erg@google.comefeacdf2011-09-07 21:12:16 +00004134 for check_fn in extra_check_functions:
4135 check_fn(filename, clean_lines, line, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004136
erg@google.comefeacdf2011-09-07 21:12:16 +00004137def ProcessFileData(filename, file_extension, lines, error,
4138 extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004139 """Performs lint checks and reports any errors to the given error function.
4140
4141 Args:
4142 filename: Filename of the file that is being processed.
4143 file_extension: The extension (dot not included) of the file.
4144 lines: An array of strings, each representing a line of the file, with the
erg@google.com8a95ecc2011-09-08 00:45:54 +00004145 last element being empty if the file is terminated with a newline.
erg@google.com4e00b9a2009-01-12 23:05:11 +00004146 error: A callable to which errors are reported, which takes 4 arguments:
erg@google.comefeacdf2011-09-07 21:12:16 +00004147 filename, line number, error level, and message
4148 extra_check_functions: An array of additional check functions that will be
4149 run on each source line. Each function takes 4
4150 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004151 """
4152 lines = (['// marker so line numbers and indices both start at 1'] + lines +
4153 ['// marker so line numbers end in a known way'])
4154
4155 include_state = _IncludeState()
4156 function_state = _FunctionState()
erg@google.comd350fe52013-01-14 17:51:48 +00004157 nesting_state = _NestingState()
erg@google.com4e00b9a2009-01-12 23:05:11 +00004158
erg+personal@google.com05189642010-04-30 20:43:03 +00004159 ResetNolintSuppressions()
4160
erg@google.com4e00b9a2009-01-12 23:05:11 +00004161 CheckForCopyright(filename, lines, error)
4162
4163 if file_extension == 'h':
4164 CheckForHeaderGuard(filename, lines, error)
4165
4166 RemoveMultiLineComments(filename, lines, error)
4167 clean_lines = CleansedLines(lines)
4168 for line in xrange(clean_lines.NumLines()):
4169 ProcessLine(filename, file_extension, clean_lines, line,
erg@google.comd350fe52013-01-14 17:51:48 +00004170 include_state, function_state, nesting_state, error,
erg@google.comefeacdf2011-09-07 21:12:16 +00004171 extra_check_functions)
erg@google.comd350fe52013-01-14 17:51:48 +00004172 nesting_state.CheckClassFinished(filename, error)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004173
4174 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
4175
4176 # We check here rather than inside ProcessLine so that we see raw
4177 # lines rather than "cleaned" lines.
4178 CheckForUnicodeReplacementCharacters(filename, lines, error)
4179
4180 CheckForNewlineAtEOF(filename, lines, error)
4181
erg@google.comefeacdf2011-09-07 21:12:16 +00004182def ProcessFile(filename, vlevel, extra_check_functions=[]):
erg@google.com4e00b9a2009-01-12 23:05:11 +00004183 """Does google-lint on a single file.
4184
4185 Args:
4186 filename: The name of the file to parse.
4187
4188 vlevel: The level of errors to report. Every error of confidence
4189 >= verbose_level will be reported. 0 is a good default.
erg@google.comefeacdf2011-09-07 21:12:16 +00004190
4191 extra_check_functions: An array of additional check functions that will be
4192 run on each source line. Each function takes 4
4193 arguments: filename, clean_lines, line, error
erg@google.com4e00b9a2009-01-12 23:05:11 +00004194 """
4195
4196 _SetVerboseLevel(vlevel)
4197
4198 try:
4199 # Support the UNIX convention of using "-" for stdin. Note that
4200 # we are not opening the file with universal newline support
4201 # (which codecs doesn't support anyway), so the resulting lines do
4202 # contain trailing '\r' characters if we are reading a file that
4203 # has CRLF endings.
4204 # If after the split a trailing '\r' is present, it is removed
4205 # below. If it is not expected to be present (i.e. os.linesep !=
4206 # '\r\n' as in Windows), a warning is issued below if this file
4207 # is processed.
4208
4209 if filename == '-':
4210 lines = codecs.StreamReaderWriter(sys.stdin,
4211 codecs.getreader('utf8'),
4212 codecs.getwriter('utf8'),
4213 'replace').read().split('\n')
4214 else:
4215 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
4216
4217 carriage_return_found = False
4218 # Remove trailing '\r'.
4219 for linenum in range(len(lines)):
4220 if lines[linenum].endswith('\r'):
4221 lines[linenum] = lines[linenum].rstrip('\r')
4222 carriage_return_found = True
4223
4224 except IOError:
4225 sys.stderr.write(
4226 "Skipping input '%s': Can't open for reading\n" % filename)
4227 return
4228
4229 # Note, if no dot is found, this will give the entire filename as the ext.
4230 file_extension = filename[filename.rfind('.') + 1:]
4231
4232 # When reading from stdin, the extension is unknown, so no cpplint tests
4233 # should rely on the extension.
4234 if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
4235 and file_extension != 'cpp'):
4236 sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
4237 else:
erg@google.comefeacdf2011-09-07 21:12:16 +00004238 ProcessFileData(filename, file_extension, lines, Error,
4239 extra_check_functions)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004240 if carriage_return_found and os.linesep != '\r\n':
erg@google.com8a95ecc2011-09-08 00:45:54 +00004241 # Use 0 for linenum since outputting only one error for potentially
erg@google.com4e00b9a2009-01-12 23:05:11 +00004242 # several lines.
4243 Error(filename, 0, 'whitespace/newline', 1,
4244 'One or more unexpected \\r (^M) found;'
4245 'better to use only a \\n')
4246
4247 sys.stderr.write('Done processing %s\n' % filename)
4248
4249
4250def PrintUsage(message):
4251 """Prints a brief usage string and exits, optionally with an error message.
4252
4253 Args:
4254 message: The optional error message.
4255 """
4256 sys.stderr.write(_USAGE)
4257 if message:
4258 sys.exit('\nFATAL ERROR: ' + message)
4259 else:
4260 sys.exit(1)
4261
4262
4263def PrintCategories():
4264 """Prints a list of all the error-categories used by error messages.
4265
4266 These are the categories used to filter messages via --filter.
4267 """
erg+personal@google.com05189642010-04-30 20:43:03 +00004268 sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com4e00b9a2009-01-12 23:05:11 +00004269 sys.exit(0)
4270
4271
4272def ParseArguments(args):
4273 """Parses the command line arguments.
4274
4275 This may set the output format and verbosity level as side-effects.
4276
4277 Args:
4278 args: The command line arguments:
4279
4280 Returns:
4281 The list of filenames to lint.
4282 """
4283 try:
4284 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.coma868d2d2009-10-09 21:18:45 +00004285 'counting=',
erg@google.com4d70a882013-04-16 21:06:32 +00004286 'filter=',
4287 'root='])
erg@google.com4e00b9a2009-01-12 23:05:11 +00004288 except getopt.GetoptError:
4289 PrintUsage('Invalid arguments.')
4290
4291 verbosity = _VerboseLevel()
4292 output_format = _OutputFormat()
4293 filters = ''
erg@google.coma868d2d2009-10-09 21:18:45 +00004294 counting_style = ''
erg@google.com4e00b9a2009-01-12 23:05:11 +00004295
4296 for (opt, val) in opts:
4297 if opt == '--help':
4298 PrintUsage(None)
4299 elif opt == '--output':
erg@google.comc6671232013-10-25 21:44:03 +00004300 if val not in ('emacs', 'vs7', 'eclipse'):
erg@google.com02c27fd2013-05-28 21:34:34 +00004301 PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
erg@google.com4e00b9a2009-01-12 23:05:11 +00004302 output_format = val
4303 elif opt == '--verbose':
4304 verbosity = int(val)
4305 elif opt == '--filter':
4306 filters = val
erg@google.coma87abb82009-02-24 01:41:01 +00004307 if not filters:
erg@google.com4e00b9a2009-01-12 23:05:11 +00004308 PrintCategories()
erg@google.coma868d2d2009-10-09 21:18:45 +00004309 elif opt == '--counting':
4310 if val not in ('total', 'toplevel', 'detailed'):
4311 PrintUsage('Valid counting options are total, toplevel, and detailed')
4312 counting_style = val
erg@google.com4d70a882013-04-16 21:06:32 +00004313 elif opt == '--root':
4314 global _root
4315 _root = val
erg@google.com4e00b9a2009-01-12 23:05:11 +00004316
4317 if not filenames:
4318 PrintUsage('No files were specified.')
4319
4320 _SetOutputFormat(output_format)
4321 _SetVerboseLevel(verbosity)
4322 _SetFilters(filters)
erg@google.coma868d2d2009-10-09 21:18:45 +00004323 _SetCountingStyle(counting_style)
erg@google.com4e00b9a2009-01-12 23:05:11 +00004324
4325 return filenames
4326
4327
4328def main():
4329 filenames = ParseArguments(sys.argv[1:])
4330
4331 # Change stderr to write with replacement characters so we don't die
4332 # if we try to print something containing non-ASCII characters.
4333 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
4334 codecs.getreader('utf8'),
4335 codecs.getwriter('utf8'),
4336 'replace')
4337
erg@google.coma868d2d2009-10-09 21:18:45 +00004338 _cpplint_state.ResetErrorCounts()
erg@google.com4e00b9a2009-01-12 23:05:11 +00004339 for filename in filenames:
4340 ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.coma868d2d2009-10-09 21:18:45 +00004341 _cpplint_state.PrintErrorCounts()
4342
erg@google.com4e00b9a2009-01-12 23:05:11 +00004343 sys.exit(_cpplint_state.error_count > 0)
4344
4345
4346if __name__ == '__main__':
4347 main()