Blame - cpplint/cpplint.py - platform/external/google-styleguide

blob: 24fd40c1a48d91ed98d26a592a3e5f73939cb7a7 [file] [log] [blame]

erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1	#!/usr/bin/python2.4
				2	#
erg@google.com	8f91ab2	2011-09-06 21:04:45 +0000	[diff] [blame]	3	# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	8	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	18	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	30
				31	# Here are some issues that I've had people identify in my code during reviews,
				32	# that I think are possible to flag automatically in a lint tool. If these were
				33	# caught by lint, it would save time both for myself and that of my reviewers.
				34	# Most likely, some of these are beyond the scope of the current lint framework,
				35	# but I think it is valuable to retain these wish-list items even if they cannot
				36	# be immediately implemented.
				37	#
				38	# Suggestions
				39	# -----------
				40	# - Check for no 'explicit' for multi-arg ctor
				41	# - Check for boolean assign RHS in parens
				42	# - Check for ctor initializer-list colon position and spacing
				43	# - Check that if there's a ctor, there should be a dtor
				44	# - Check accessors that return non-pointer member variables are
				45	# declared const
				46	# - Check accessors that return non-const pointer member vars are
				47	# not declared const
				48	# - Check for using public includes for testing
				49	# - Check for spaces between brackets in one-line inline method
				50	# - Check for no assert()
				51	# - Check for spaces surrounding operators
				52	# - Check for 0 in pointer context (should be NULL)
				53	# - Check for 0 in char context (should be '\0')
				54	# - Check for camel-case method name conventions for methods
				55	# that are not simple inline getters and setters
				56	# - Check that base classes have virtual destructors
				57	# put " // namespace" after } that closes a namespace, with
				58	# namespace's name after 'namespace' if it is named.
				59	# - Do not indent namespace contents
				60	# - Avoid inlining non-trivial constructors in header files
				61	# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
				62	# - Check for old-school (void) cast for call-sites of functions
				63	# ignored return value
				64	# - Check gUnit usage of anonymous namespace
				65	# - Check for class declaration order (typedefs, consts, enums,
				66	# ctor(s?), dtor, friend declarations, methods, member vars)
				67	#
				68
				69	"""Does google-lint on c++ files.
				70
				71	The goal of this script is to identify places in the code that may
				72	be in non-compliance with google style. It does not attempt to fix
				73	up these problems -- the point is to educate. It does also not
				74	attempt to find all problems, or to ensure that everything it does
				75	find is legitimately a problem.
				76
				77	In particular, we can get very confused by /* and // inside strings!
				78	We do a small hack, which is to ignore //'s with "'s after them on the
				79	same line, but it is far from perfect (in either direction).
				80	"""
				81
				82	import codecs
				83	import getopt
				84	import math # for log
				85	import os
				86	import re
				87	import sre_compile
				88	import string
				89	import sys
				90	import unicodedata
				91
				92
				93	_USAGE = """
				94	Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	95	[--counting=total\|toplevel\|detailed]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	96	<file> [file] ...
				97
				98	The style guidelines this tries to follow are those in
				99	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
				100
				101	Every problem is given a confidence score from 1-5, with 5 meaning we are
				102	certain of the problem, and 1 meaning it could be a legitimate construct.
				103	This will miss some errors, and is not a substitute for a code review.
				104
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	105	To suppress false-positive errors of a certain category, add a
				106	'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
				107	suppresses errors of all categories on that line.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	108
				109	The files passed in will be linted; at least one file must be provided.
				110	Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
				111
				112	Flags:
				113
				114	output=vs7
				115	By default, the output is formatted to ease emacs parsing. Visual Studio
				116	compatible output (vs7) may also be used. Other formats are unsupported.
				117
				118	verbose=#
				119	Specify a number 0-5 to restrict errors to certain verbosity levels.
				120
				121	filter=-x,+y,...
				122	Specify a comma-separated list of category-filters to apply: only
				123	error messages whose category names pass the filters will be printed.
				124	(Category names are printed with the message and look like
				125	"[whitespace/indent]".) Filters are evaluated left to right.
				126	"-FOO" and "FOO" means "do not print categories that start with FOO".
				127	"+FOO" means "do print categories that start with FOO".
				128
				129	Examples: --filter=-whitespace,+whitespace/braces
				130	--filter=whitespace,runtime/printf,+runtime/printf_format
				131	--filter=-,+build/include_what_you_use
				132
				133	To see a list of all the categories used in cpplint, pass no arg:
				134	--filter=
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	135
				136	counting=total\|toplevel\|detailed
				137	The total number of errors found is always printed. If
				138	'toplevel' is provided, then the count of errors in each of
				139	the top-level categories like 'build' and 'whitespace' will
				140	also be printed. If 'detailed' is provided, then a count
				141	is provided for each category like 'build/class'.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	142	"""
				143
				144	# We categorize each error message we print. Here are the categories.
				145	# We want an explicit list so we can list them all in cpplint --filter=.
				146	# If you add a new error message with a new category, add it to the list
				147	# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	148	# \ used for clearer layout -- pylint: disable-msg=C6013
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	149	_ERROR_CATEGORIES = [
				150	'build/class',
				151	'build/deprecated',
				152	'build/endif_comment',
				153	'build/forward_decl',
				154	'build/header_guard',
				155	'build/include',
				156	'build/include_alpha',
				157	'build/include_order',
				158	'build/include_what_you_use',
				159	'build/namespaces',
				160	'build/printf_format',
				161	'build/storage_class',
				162	'legal/copyright',
				163	'readability/braces',
				164	'readability/casting',
				165	'readability/check',
				166	'readability/constructors',
				167	'readability/fn_size',
				168	'readability/function',
				169	'readability/multiline_comment',
				170	'readability/multiline_string',
				171	'readability/nolint',
				172	'readability/streams',
				173	'readability/todo',
				174	'readability/utf8',
				175	'runtime/arrays',
				176	'runtime/casting',
				177	'runtime/explicit',
				178	'runtime/int',
				179	'runtime/init',
				180	'runtime/invalid_increment',
				181	'runtime/member_string_references',
				182	'runtime/memset',
				183	'runtime/operator',
				184	'runtime/printf',
				185	'runtime/printf_format',
				186	'runtime/references',
				187	'runtime/rtti',
				188	'runtime/sizeof',
				189	'runtime/string',
				190	'runtime/threadsafe_fn',
				191	'runtime/virtual',
				192	'whitespace/blank_line',
				193	'whitespace/braces',
				194	'whitespace/comma',
				195	'whitespace/comments',
				196	'whitespace/end_of_line',
				197	'whitespace/ending_newline',
				198	'whitespace/indent',
				199	'whitespace/labels',
				200	'whitespace/line_length',
				201	'whitespace/newline',
				202	'whitespace/operators',
				203	'whitespace/parens',
				204	'whitespace/semicolon',
				205	'whitespace/tab',
				206	'whitespace/todo'
				207	]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	208
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	209	# The default state of the category filter. This is overrided by the --filter=
				210	# flag. By default all errors are on, so only add here categories that should be
				211	# off by default (i.e., categories that must be enabled by the --filter= flags).
				212	# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	213	_DEFAULT_FILTERS = [ '-build/include_alpha' ]
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	214
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	215	# We used to check for high-bit characters, but after much discussion we
				216	# decided those were OK, as long as they were in UTF-8 and didn't represent
				217	# hard-coded international strings, which belong in a seperate i18n file.
				218
				219	# Headers that we consider STL headers.
				220	_STL_HEADERS = frozenset([
				221	'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
				222	'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	223	'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
				224	'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	225	'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
				226	'utility', 'vector', 'vector.h',
				227	])
				228
				229
				230	# Non-STL C++ system headers.
				231	_CPP_HEADERS = frozenset([
				232	'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
				233	'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
				234	'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
				235	'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
				236	'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
				237	'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	238	'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream',
				239	'istream.h', 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
				240	'numeric', 'ostream', 'ostream.h', 'parsestream.h', 'pfstream.h',
				241	'PlotFile.h', 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h',
				242	'ropeimpl.h', 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	243	'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
				244	'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
				245	])
				246
				247
				248	# Assertion macros. These are defined in base/logging.h and
				249	# testing/base/gunit.h. Note that the _M versions need to come first
				250	# for substring matching to work.
				251	_CHECK_MACROS = [
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	252	'DCHECK', 'CHECK',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	253	'EXPECT_TRUE_M', 'EXPECT_TRUE',
				254	'ASSERT_TRUE_M', 'ASSERT_TRUE',
				255	'EXPECT_FALSE_M', 'EXPECT_FALSE',
				256	'ASSERT_FALSE_M', 'ASSERT_FALSE',
				257	]
				258
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	259	# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	260	_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
				261
				262	for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
				263	('>=', 'GE'), ('>', 'GT'),
				264	('<=', 'LE'), ('<', 'LT')]:
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	265	_CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	266	_CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
				267	_CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
				268	_CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
				269	_CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
				270	_CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
				271
				272	for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
				273	('>=', 'LT'), ('>', 'LE'),
				274	('<=', 'GT'), ('<', 'GE')]:
				275	_CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
				276	_CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
				277	_CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
				278	_CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
				279
				280
				281	# These constants define types of headers for use with
				282	# _IncludeState.CheckNextIncludeOrder().
				283	_C_SYS_HEADER = 1
				284	_CPP_SYS_HEADER = 2
				285	_LIKELY_MY_HEADER = 3
				286	_POSSIBLE_MY_HEADER = 4
				287	_OTHER_HEADER = 5
				288
				289
				290	_regexp_compile_cache = {}
				291
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	292	# Finds occurrences of NOLINT or NOLINT(...).
				293	_RE_SUPPRESSION = re.compile(r'\bNOLINT\b($[^)]*$)?')
				294
				295	# {str, set(int)}: a map from error categories to sets of linenumbers
				296	# on which those errors are expected and should be suppressed.
				297	_error_suppressions = {}
				298
				299	def ParseNolintSuppressions(filename, raw_line, linenum, error):
				300	"""Updates the global list of error-suppressions.
				301
				302	Parses any NOLINT comments on the current line, updating the global
				303	error_suppressions store. Reports an error if the NOLINT comment
				304	was malformed.
				305
				306	Args:
				307	filename: str, the name of the input file.
				308	raw_line: str, the line of input text, with comments.
				309	linenum: int, the number of the current line.
				310	error: function, an error handler.
				311	"""
				312	# FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
				313	m = _RE_SUPPRESSION.search(raw_line)
				314	if m:
				315	category = m.group(1)
				316	if category in (None, '(*)'): # => "suppress all"
				317	_error_suppressions.setdefault(None, set()).add(linenum)
				318	else:
				319	if category.startswith('(') and category.endswith(')'):
				320	category = category[1:-1]
				321	if category in _ERROR_CATEGORIES:
				322	_error_suppressions.setdefault(category, set()).add(linenum)
				323	else:
				324	error(filename, linenum, 'readability/nolint', 5,
				325	'Unknown NOLINT error category: %s' % category)
				326
				327
				328	def ResetNolintSuppressions():
				329	"Resets the set of NOLINT suppressions to empty."
				330	_error_suppressions.clear()
				331
				332
				333	def IsErrorSuppressedByNolint(category, linenum):
				334	"""Returns true if the specified error category is suppressed on this line.
				335
				336	Consults the global error_suppressions map populated by
				337	ParseNolintSuppressions/ResetNolintSuppressions.
				338
				339	Args:
				340	category: str, the category of the error.
				341	linenum: int, the current line number.
				342	Returns:
				343	bool, True iff the error should be suppressed due to a NOLINT comment.
				344	"""
				345	return (linenum in _error_suppressions.get(category, set()) or
				346	linenum in _error_suppressions.get(None, set()))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	347
				348	def Match(pattern, s):
				349	"""Matches the string with the pattern, caching the compiled regexp."""
				350	# The regexp compilation caching is inlined in both Match and Search for
				351	# performance reasons; factoring it out into a separate function turns out
				352	# to be noticeably expensive.
				353	if not pattern in _regexp_compile_cache:
				354	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				355	return _regexp_compile_cache[pattern].match(s)
				356
				357
				358	def Search(pattern, s):
				359	"""Searches the string for the pattern, caching the compiled regexp."""
				360	if not pattern in _regexp_compile_cache:
				361	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				362	return _regexp_compile_cache[pattern].search(s)
				363
				364
				365	class _IncludeState(dict):
				366	"""Tracks line numbers for includes, and the order in which includes appear.
				367
				368	As a dict, an _IncludeState object serves as a mapping between include
				369	filename and line number on which that file was included.
				370
				371	Call CheckNextIncludeOrder() once for each header in the file, passing
				372	in the type constants defined above. Calls in an illegal order will
				373	raise an _IncludeError with an appropriate error message.
				374
				375	"""
				376	# self._section will move monotonically through this set. If it ever
				377	# needs to move backwards, CheckNextIncludeOrder will raise an error.
				378	_INITIAL_SECTION = 0
				379	_MY_H_SECTION = 1
				380	_C_SECTION = 2
				381	_CPP_SECTION = 3
				382	_OTHER_H_SECTION = 4
				383
				384	_TYPE_NAMES = {
				385	_C_SYS_HEADER: 'C system header',
				386	_CPP_SYS_HEADER: 'C++ system header',
				387	_LIKELY_MY_HEADER: 'header this file implements',
				388	_POSSIBLE_MY_HEADER: 'header this file may implement',
				389	_OTHER_HEADER: 'other header',
				390	}
				391	_SECTION_NAMES = {
				392	_INITIAL_SECTION: "... nothing. (This can't be an error.)",
				393	_MY_H_SECTION: 'a header this file implements',
				394	_C_SECTION: 'C system header',
				395	_CPP_SECTION: 'C++ system header',
				396	_OTHER_H_SECTION: 'other header',
				397	}
				398
				399	def __init__(self):
				400	dict.__init__(self)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	401	# The name of the current section.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	402	self._section = self._INITIAL_SECTION
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	403	# The path of last found header.
				404	self._last_header = ''
				405
				406	def CanonicalizeAlphabeticalOrder(self, header_path):
				407	"""Returns a path canonicalized for alphabetical comparisson.
				408
				409	- replaces "-" with "_" so they both cmp the same.
				410	- removes '-inl' since we don't require them to be after the main header.
				411	- lowercase everything, just in case.
				412
				413	Args:
				414	header_path: Path to be canonicalized.
				415
				416	Returns:
				417	Canonicalized path.
				418	"""
				419	return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
				420
				421	def IsInAlphabeticalOrder(self, header_path):
				422	"""Check if a header is in alphabetical order with the previous header.
				423
				424	Args:
				425	header_path: Header to be checked.
				426
				427	Returns:
				428	Returns true if the header is in alphabetical order.
				429	"""
				430	canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
				431	if self._last_header > canonical_header:
				432	return False
				433	self._last_header = canonical_header
				434	return True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	435
				436	def CheckNextIncludeOrder(self, header_type):
				437	"""Returns a non-empty error message if the next header is out of order.
				438
				439	This function also updates the internal state to be ready to check
				440	the next include.
				441
				442	Args:
				443	header_type: One of the _XXX_HEADER constants defined above.
				444
				445	Returns:
				446	The empty string if the header is in the right order, or an
				447	error message describing what's wrong.
				448
				449	"""
				450	error_message = ('Found %s after %s' %
				451	(self._TYPE_NAMES[header_type],
				452	self._SECTION_NAMES[self._section]))
				453
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	454	last_section = self._section
				455
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	456	if header_type == _C_SYS_HEADER:
				457	if self._section <= self._C_SECTION:
				458	self._section = self._C_SECTION
				459	else:
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	460	self._last_header = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	461	return error_message
				462	elif header_type == _CPP_SYS_HEADER:
				463	if self._section <= self._CPP_SECTION:
				464	self._section = self._CPP_SECTION
				465	else:
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	466	self._last_header = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	467	return error_message
				468	elif header_type == _LIKELY_MY_HEADER:
				469	if self._section <= self._MY_H_SECTION:
				470	self._section = self._MY_H_SECTION
				471	else:
				472	self._section = self._OTHER_H_SECTION
				473	elif header_type == _POSSIBLE_MY_HEADER:
				474	if self._section <= self._MY_H_SECTION:
				475	self._section = self._MY_H_SECTION
				476	else:
				477	# This will always be the fallback because we're not sure
				478	# enough that the header is associated with this file.
				479	self._section = self._OTHER_H_SECTION
				480	else:
				481	assert header_type == _OTHER_HEADER
				482	self._section = self._OTHER_H_SECTION
				483
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	484	if last_section != self._section:
				485	self._last_header = ''
				486
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	487	return ''
				488
				489
				490	class _CppLintState(object):
				491	"""Maintains module-wide state.."""
				492
				493	def __init__(self):
				494	self.verbose_level = 1 # global setting.
				495	self.error_count = 0 # global count of reported errors
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	496	# filters to apply when emitting error messages
				497	self.filters = _DEFAULT_FILTERS[:]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	498	self.counting = 'total' # In what way are we counting errors?
				499	self.errors_by_category = {} # string to int dict storing error counts
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	500
				501	# output format:
				502	# "emacs" - format that emacs can parse (default)
				503	# "vs7" - format that Microsoft Visual Studio 7 can parse
				504	self.output_format = 'emacs'
				505
				506	def SetOutputFormat(self, output_format):
				507	"""Sets the output format for errors."""
				508	self.output_format = output_format
				509
				510	def SetVerboseLevel(self, level):
				511	"""Sets the module's verbosity, and returns the previous setting."""
				512	last_verbose_level = self.verbose_level
				513	self.verbose_level = level
				514	return last_verbose_level
				515
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	516	def SetCountingStyle(self, counting_style):
				517	"""Sets the module's counting options."""
				518	self.counting = counting_style
				519
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	520	def SetFilters(self, filters):
				521	"""Sets the error-message filters.
				522
				523	These filters are applied when deciding whether to emit a given
				524	error message.
				525
				526	Args:
				527	filters: A string of comma-separated filters (eg "+whitespace/indent").
				528	Each filter should start with + or -; else we die.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	529
				530	Raises:
				531	ValueError: The comma-separated filters did not all start with '+' or '-'.
				532	E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	533	"""
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	534	# Default filters always have less priority than the flag ones.
				535	self.filters = _DEFAULT_FILTERS[:]
				536	for filt in filters.split(','):
				537	clean_filt = filt.strip()
				538	if clean_filt:
				539	self.filters.append(clean_filt)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	540	for filt in self.filters:
				541	if not (filt.startswith('+') or filt.startswith('-')):
				542	raise ValueError('Every filter in --filters must start with + or -'
				543	' (%s does not)' % filt)
				544
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	545	def ResetErrorCounts(self):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	546	"""Sets the module's error statistic back to zero."""
				547	self.error_count = 0
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	548	self.errors_by_category = {}
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	549
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	550	def IncrementErrorCount(self, category):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	551	"""Bumps the module's error statistic."""
				552	self.error_count += 1
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	553	if self.counting in ('toplevel', 'detailed'):
				554	if self.counting != 'detailed':
				555	category = category.split('/')[0]
				556	if category not in self.errors_by_category:
				557	self.errors_by_category[category] = 0
				558	self.errors_by_category[category] += 1
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	559
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	560	def PrintErrorCounts(self):
				561	"""Print a summary of errors by category, and the total."""
				562	for category, count in self.errors_by_category.iteritems():
				563	sys.stderr.write('Category \'%s\' errors found: %d\n' %
				564	(category, count))
				565	sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	566
				567	_cpplint_state = _CppLintState()
				568
				569
				570	def _OutputFormat():
				571	"""Gets the module's output format."""
				572	return _cpplint_state.output_format
				573
				574
				575	def _SetOutputFormat(output_format):
				576	"""Sets the module's output format."""
				577	_cpplint_state.SetOutputFormat(output_format)
				578
				579
				580	def _VerboseLevel():
				581	"""Returns the module's verbosity setting."""
				582	return _cpplint_state.verbose_level
				583
				584
				585	def _SetVerboseLevel(level):
				586	"""Sets the module's verbosity, and returns the previous setting."""
				587	return _cpplint_state.SetVerboseLevel(level)
				588
				589
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	590	def _SetCountingStyle(level):
				591	"""Sets the module's counting options."""
				592	_cpplint_state.SetCountingStyle(level)
				593
				594
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	595	def _Filters():
				596	"""Returns the module's list of output filters, as a list."""
				597	return _cpplint_state.filters
				598
				599
				600	def _SetFilters(filters):
				601	"""Sets the module's error-message filters.
				602
				603	These filters are applied when deciding whether to emit a given
				604	error message.
				605
				606	Args:
				607	filters: A string of comma-separated filters (eg "whitespace/indent").
				608	Each filter should start with + or -; else we die.
				609	"""
				610	_cpplint_state.SetFilters(filters)
				611
				612
				613	class _FunctionState(object):
				614	"""Tracks current function name and the number of lines in its body."""
				615
				616	_NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
				617	_TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
				618
				619	def __init__(self):
				620	self.in_a_function = False
				621	self.lines_in_function = 0
				622	self.current_function = ''
				623
				624	def Begin(self, function_name):
				625	"""Start analyzing function body.
				626
				627	Args:
				628	function_name: The name of the function being tracked.
				629	"""
				630	self.in_a_function = True
				631	self.lines_in_function = 0
				632	self.current_function = function_name
				633
				634	def Count(self):
				635	"""Count line in current function body."""
				636	if self.in_a_function:
				637	self.lines_in_function += 1
				638
				639	def Check(self, error, filename, linenum):
				640	"""Report if too many lines in function body.
				641
				642	Args:
				643	error: The function to call with any errors found.
				644	filename: The name of the current file.
				645	linenum: The number of the line to check.
				646	"""
				647	if Match(r'T(EST\|est)', self.current_function):
				648	base_trigger = self._TEST_TRIGGER
				649	else:
				650	base_trigger = self._NORMAL_TRIGGER
				651	trigger = base_trigger * 2**_VerboseLevel()
				652
				653	if self.lines_in_function > trigger:
				654	error_level = int(math.log(self.lines_in_function / base_trigger, 2))
				655	# 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
				656	if error_level > 5:
				657	error_level = 5
				658	error(filename, linenum, 'readability/fn_size', error_level,
				659	'Small and focused functions are preferred:'
				660	' %s has %d non-comment lines'
				661	' (error triggered by exceeding %d lines).' % (
				662	self.current_function, self.lines_in_function, trigger))
				663
				664	def End(self):
				665	"""Stop analizing function body."""
				666	self.in_a_function = False
				667
				668
				669	class _IncludeError(Exception):
				670	"""Indicates a problem with the include order in a file."""
				671	pass
				672
				673
				674	class FileInfo:
				675	"""Provides utility functions for filenames.
				676
				677	FileInfo provides easy access to the components of a file's path
				678	relative to the project root.
				679	"""
				680
				681	def __init__(self, filename):
				682	self._filename = filename
				683
				684	def FullName(self):
				685	"""Make Windows paths like Unix."""
				686	return os.path.abspath(self._filename).replace('\\', '/')
				687
				688	def RepositoryName(self):
				689	"""FullName after removing the local path to the repository.
				690
				691	If we have a real absolute path name here we can try to do something smart:
				692	detecting the root of the checkout and truncating /path/to/checkout from
				693	the name so that we get header guards that don't include things like
				694	"C:\Documents and Settings\..." or "/home/username/..." in them and thus
				695	people on different computers who have checked the source out to different
				696	locations won't see bogus errors.
				697	"""
				698	fullname = self.FullName()
				699
				700	if os.path.exists(fullname):
				701	project_dir = os.path.dirname(fullname)
				702
				703	if os.path.exists(os.path.join(project_dir, ".svn")):
				704	# If there's a .svn file in the current directory, we recursively look
				705	# up the directory tree for the top of the SVN checkout
				706	root_dir = project_dir
				707	one_up_dir = os.path.dirname(root_dir)
				708	while os.path.exists(os.path.join(one_up_dir, ".svn")):
				709	root_dir = os.path.dirname(root_dir)
				710	one_up_dir = os.path.dirname(one_up_dir)
				711
				712	prefix = os.path.commonprefix([root_dir, project_dir])
				713	return fullname[len(prefix) + 1:]
				714
erg@google.com	5e16969	2010-01-28 20:17:01 +0000	[diff] [blame]	715	# Not SVN? Try to find a git or hg top level directory by searching up
				716	# from the current path.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	717	root_dir = os.path.dirname(fullname)
				718	while (root_dir != os.path.dirname(root_dir) and
erg@google.com	5e16969	2010-01-28 20:17:01 +0000	[diff] [blame]	719	not os.path.exists(os.path.join(root_dir, ".git")) and
				720	not os.path.exists(os.path.join(root_dir, ".hg"))):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	721	root_dir = os.path.dirname(root_dir)
erg@google.com	42e59b0	2010-10-04 22:18:07 +0000	[diff] [blame]	722
				723	if (os.path.exists(os.path.join(root_dir, ".git")) or
				724	os.path.exists(os.path.join(root_dir, ".hg"))):
				725	prefix = os.path.commonprefix([root_dir, project_dir])
				726	return fullname[len(prefix) + 1:]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	727
				728	# Don't know what to do; header guard warnings may be wrong...
				729	return fullname
				730
				731	def Split(self):
				732	"""Splits the file into the directory, basename, and extension.
				733
				734	For 'chrome/browser/browser.cc', Split() would
				735	return ('chrome/browser', 'browser', '.cc')
				736
				737	Returns:
				738	A tuple of (directory, basename, extension).
				739	"""
				740
				741	googlename = self.RepositoryName()
				742	project, rest = os.path.split(googlename)
				743	return (project,) + os.path.splitext(rest)
				744
				745	def BaseName(self):
				746	"""File base name - text after the final slash, before the final period."""
				747	return self.Split()[1]
				748
				749	def Extension(self):
				750	"""File extension - text following the final period."""
				751	return self.Split()[2]
				752
				753	def NoExtension(self):
				754	"""File has no source file extension."""
				755	return '/'.join(self.Split()[0:2])
				756
				757	def IsSource(self):
				758	"""File has a source file extension."""
				759	return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
				760
				761
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	762	def _ShouldPrintError(category, confidence, linenum):
				763	"""Returns true iff confidence >= verbose, category passes
				764	filter and is not NOLINT-suppressed."""
				765
				766	# There are three ways we might decide not to print an error message:
				767	# a "NOLINT(category)" comment appears in the source,
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	768	# the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	769	if IsErrorSuppressedByNolint(category, linenum):
				770	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	771	if confidence < _cpplint_state.verbose_level:
				772	return False
				773
				774	is_filtered = False
				775	for one_filter in _Filters():
				776	if one_filter.startswith('-'):
				777	if category.startswith(one_filter[1:]):
				778	is_filtered = True
				779	elif one_filter.startswith('+'):
				780	if category.startswith(one_filter[1:]):
				781	is_filtered = False
				782	else:
				783	assert False # should have been checked for in SetFilter.
				784	if is_filtered:
				785	return False
				786
				787	return True
				788
				789
				790	def Error(filename, linenum, category, confidence, message):
				791	"""Logs the fact we've found a lint error.
				792
				793	We log where the error was found, and also our confidence in the error,
				794	that is, how certain we are this is a legitimate style regression, and
				795	not a misidentification or a use that's sometimes justified.
				796
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	797	False positives can be suppressed by the use of
				798	"cpplint(category)" comments on the offending line. These are
				799	parsed into _error_suppressions.
				800
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	801	Args:
				802	filename: The name of the file containing the error.
				803	linenum: The number of the line containing the error.
				804	category: A string used to describe the "category" this bug
				805	falls under: "whitespace", say, or "runtime". Categories
				806	may have a hierarchy separated by slashes: "whitespace/indent".
				807	confidence: A number from 1-5 representing a confidence score for
				808	the error, with 5 meaning that we are certain of the problem,
				809	and 1 meaning that it could be a legitimate construct.
				810	message: The error message.
				811	"""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	812	if _ShouldPrintError(category, confidence, linenum):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	813	_cpplint_state.IncrementErrorCount(category)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	814	if _cpplint_state.output_format == 'vs7':
				815	sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
				816	filename, linenum, message, category, confidence))
				817	else:
				818	sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
				819	filename, linenum, message, category, confidence))
				820
				821
				822	# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
				823	_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
				824	r'\\([abfnrtv?"\\\']\|\d+\|x[0-9a-fA-F]+)')
				825	# Matches strings. Escape codes should already be removed by ESCAPES.
				826	_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
				827	# Matches characters. Escape codes should already be removed by ESCAPES.
				828	_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
				829	# Matches multi-line C++ comments.
				830	# This RE is a little bit more complicated than one might expect, because we
				831	# have to take care of space removals tools so we can handle comments inside
				832	# statements better.
				833	# The current rule is: We only clear spaces from both sides when we're at the
				834	# end of the line. Otherwise, we try to remove spaces from the right side,
				835	# if this doesn't work we try on left side but only if there's a non-character
				836	# on the right.
				837	_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
				838	r"""(\s/\.\/\s*$\|
				839	/\.\*/\s+\|
				840	\s+/\.\*/(?=\W)\|
				841	/\.\*/)""", re.VERBOSE)
				842
				843
				844	def IsCppString(line):
				845	"""Does line terminate so, that the next symbol is in string constant.
				846
				847	This function does not consider single-line nor multi-line comments.
				848
				849	Args:
				850	line: is a partial line of code starting from the 0..n.
				851
				852	Returns:
				853	True, if next character appended to 'line' is inside a
				854	string constant.
				855	"""
				856
				857	line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
				858	return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
				859
				860
				861	def FindNextMultiLineCommentStart(lines, lineix):
				862	"""Find the beginning marker for a multiline comment."""
				863	while lineix < len(lines):
				864	if lines[lineix].strip().startswith('/*'):
				865	# Only return this marker if the comment goes beyond this line
				866	if lines[lineix].strip().find('*/', 2) < 0:
				867	return lineix
				868	lineix += 1
				869	return len(lines)
				870
				871
				872	def FindNextMultiLineCommentEnd(lines, lineix):
				873	"""We are inside a comment, find the end marker."""
				874	while lineix < len(lines):
				875	if lines[lineix].strip().endswith('*/'):
				876	return lineix
				877	lineix += 1
				878	return len(lines)
				879
				880
				881	def RemoveMultiLineCommentsFromRange(lines, begin, end):
				882	"""Clears a range of lines for multi-line comments."""
				883	# Having // dummy comments makes the lines non-empty, so we will not get
				884	# unnecessary blank line warnings later in the code.
				885	for i in range(begin, end):
				886	lines[i] = '// dummy'
				887
				888
				889	def RemoveMultiLineComments(filename, lines, error):
				890	"""Removes multiline (c-style) comments from lines."""
				891	lineix = 0
				892	while lineix < len(lines):
				893	lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
				894	if lineix_begin >= len(lines):
				895	return
				896	lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
				897	if lineix_end >= len(lines):
				898	error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
				899	'Could not find end of multi-line comment')
				900	return
				901	RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
				902	lineix = lineix_end + 1
				903
				904
				905	def CleanseComments(line):
				906	"""Removes //-comments and single-line C-style /* */ comments.
				907
				908	Args:
				909	line: A line of C++ source.
				910
				911	Returns:
				912	The line with single-line comments removed.
				913	"""
				914	commentpos = line.find('//')
				915	if commentpos != -1 and not IsCppString(line[:commentpos]):
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	916	line = line[:commentpos].rstrip()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	917	# get rid of /* ... */
				918	return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
				919
				920
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	921	class CleansedLines(object):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	922	"""Holds 3 copies of all lines with different preprocessing applied to them.
				923
				924	1) elided member contains lines without strings and comments,
				925	2) lines member contains lines without comments, and
				926	3) raw member contains all the lines without processing.
				927	All these three members are of <type 'list'>, and of the same length.
				928	"""
				929
				930	def __init__(self, lines):
				931	self.elided = []
				932	self.lines = []
				933	self.raw_lines = lines
				934	self.num_lines = len(lines)
				935	for linenum in range(len(lines)):
				936	self.lines.append(CleanseComments(lines[linenum]))
				937	elided = self._CollapseStrings(lines[linenum])
				938	self.elided.append(CleanseComments(elided))
				939
				940	def NumLines(self):
				941	"""Returns the number of lines represented."""
				942	return self.num_lines
				943
				944	@staticmethod
				945	def _CollapseStrings(elided):
				946	"""Collapses strings and chars on a line to simple "" or '' blocks.
				947
				948	We nix strings first so we're not fooled by text like '"http://"'
				949
				950	Args:
				951	elided: The line being processed.
				952
				953	Returns:
				954	The line with collapsed strings.
				955	"""
				956	if not _RE_PATTERN_INCLUDE.match(elided):
				957	# Remove escaped characters first to make quote/single quote collapsing
				958	# basic. Things that look like escaped characters shouldn't occur
				959	# outside of strings and chars.
				960	elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
				961	elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
				962	elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
				963	return elided
				964
				965
				966	def CloseExpression(clean_lines, linenum, pos):
				967	"""If input points to ( or { or [, finds the position that closes it.
				968
				969	If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
				970	linenum/pos that correspond to the closing of the expression.
				971
				972	Args:
				973	clean_lines: A CleansedLines instance containing the file.
				974	linenum: The number of the line to check.
				975	pos: A position on the line.
				976
				977	Returns:
				978	A tuple (line, linenum, pos) pointer past the closing brace, or
				979	(line, len(lines), -1) if we never find a close. Note we ignore
				980	strings and comments when matching; and the line we return is the
				981	'cleansed' line at linenum.
				982	"""
				983
				984	line = clean_lines.elided[linenum]
				985	startchar = line[pos]
				986	if startchar not in '({[':
				987	return (line, clean_lines.NumLines(), -1)
				988	if startchar == '(': endchar = ')'
				989	if startchar == '[': endchar = ']'
				990	if startchar == '{': endchar = '}'
				991
				992	num_open = line.count(startchar) - line.count(endchar)
				993	while linenum < clean_lines.NumLines() and num_open > 0:
				994	linenum += 1
				995	line = clean_lines.elided[linenum]
				996	num_open += line.count(startchar) - line.count(endchar)
				997	# OK, now find the endchar that actually got us back to even
				998	endpos = len(line)
				999	while num_open >= 0:
				1000	endpos = line.rfind(')', 0, endpos)
				1001	num_open -= 1 # chopped off another )
				1002	return (line, linenum, endpos + 1)
				1003
				1004
				1005	def CheckForCopyright(filename, lines, error):
				1006	"""Logs an error if no Copyright message appears at the top of the file."""
				1007
				1008	# We'll say it should occur by line 10. Don't forget there's a
				1009	# dummy line at the front.
				1010	for line in xrange(1, min(len(lines), 11)):
				1011	if re.search(r'Copyright', lines[line], re.I): break
				1012	else: # means no copyright line was found
				1013	error(filename, 0, 'legal/copyright', 5,
				1014	'No copyright message found. '
				1015	'You should have a line: "Copyright [year] <Copyright Owner>"')
				1016
				1017
				1018	def GetHeaderGuardCPPVariable(filename):
				1019	"""Returns the CPP variable that should be used as a header guard.
				1020
				1021	Args:
				1022	filename: The name of a C++ header file.
				1023
				1024	Returns:
				1025	The CPP variable that should be used as a header guard in the
				1026	named file.
				1027
				1028	"""
				1029
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1030	# Restores original filename in case that cpplint is invoked from Emacs's
				1031	# flymake.
				1032	filename = re.sub(r'_flymake\.h$', '.h', filename)
				1033
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1034	fileinfo = FileInfo(filename)
				1035	return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
				1036
				1037
				1038	def CheckForHeaderGuard(filename, lines, error):
				1039	"""Checks that the file contains a header guard.
				1040
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1041	Logs an error if no #ifndef header guard is present. For other
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1042	headers, checks that the full pathname is used.
				1043
				1044	Args:
				1045	filename: The name of the C++ header file.
				1046	lines: An array of strings, each representing a line of the file.
				1047	error: The function to call with any errors found.
				1048	"""
				1049
				1050	cppvar = GetHeaderGuardCPPVariable(filename)
				1051
				1052	ifndef = None
				1053	ifndef_linenum = 0
				1054	define = None
				1055	endif = None
				1056	endif_linenum = 0
				1057	for linenum, line in enumerate(lines):
				1058	linesplit = line.split()
				1059	if len(linesplit) >= 2:
				1060	# find the first occurrence of #ifndef and #define, save arg
				1061	if not ifndef and linesplit[0] == '#ifndef':
				1062	# set ifndef to the header guard presented on the #ifndef line.
				1063	ifndef = linesplit[1]
				1064	ifndef_linenum = linenum
				1065	if not define and linesplit[0] == '#define':
				1066	define = linesplit[1]
				1067	# find the last occurrence of #endif, save entire line
				1068	if line.startswith('#endif'):
				1069	endif = line
				1070	endif_linenum = linenum
				1071
				1072	if not ifndef or not define or ifndef != define:
				1073	error(filename, 0, 'build/header_guard', 5,
				1074	'No #ifndef header guard found, suggested CPP variable is: %s' %
				1075	cppvar)
				1076	return
				1077
				1078	# The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
				1079	# for backward compatibility.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1080	if ifndef != cppvar:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1081	error_level = 0
				1082	if ifndef != cppvar + '_':
				1083	error_level = 5
				1084
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1085	ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
				1086	error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1087	error(filename, ifndef_linenum, 'build/header_guard', error_level,
				1088	'#ifndef header guard has wrong style, please use: %s' % cppvar)
				1089
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1090	if endif != ('#endif // %s' % cppvar):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1091	error_level = 0
				1092	if endif != ('#endif // %s' % (cppvar + '_')):
				1093	error_level = 5
				1094
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1095	ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
				1096	error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1097	error(filename, endif_linenum, 'build/header_guard', error_level,
				1098	'#endif line should be "#endif // %s"' % cppvar)
				1099
				1100
				1101	def CheckForUnicodeReplacementCharacters(filename, lines, error):
				1102	"""Logs an error for each line containing Unicode replacement characters.
				1103
				1104	These indicate that either the file contained invalid UTF-8 (likely)
				1105	or Unicode replacement characters (which it shouldn't). Note that
				1106	it's possible for this to throw off line numbering if the invalid
				1107	UTF-8 occurred adjacent to a newline.
				1108
				1109	Args:
				1110	filename: The name of the current file.
				1111	lines: An array of strings, each representing a line of the file.
				1112	error: The function to call with any errors found.
				1113	"""
				1114	for linenum, line in enumerate(lines):
				1115	if u'\ufffd' in line:
				1116	error(filename, linenum, 'readability/utf8', 5,
				1117	'Line contains invalid UTF-8 (or Unicode replacement character).')
				1118
				1119
				1120	def CheckForNewlineAtEOF(filename, lines, error):
				1121	"""Logs an error if there is no newline char at the end of the file.
				1122
				1123	Args:
				1124	filename: The name of the current file.
				1125	lines: An array of strings, each representing a line of the file.
				1126	error: The function to call with any errors found.
				1127	"""
				1128
				1129	# The array lines() was created by adding two newlines to the
				1130	# original file (go figure), then splitting on \n.
				1131	# To verify that the file ends in \n, we just have to make sure the
				1132	# last-but-two element of lines() exists and is empty.
				1133	if len(lines) < 3 or lines[-2]:
				1134	error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
				1135	'Could not find a newline character at the end of the file.')
				1136
				1137
				1138	def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
				1139	"""Logs an error if we see /* ... */ or "..." that extend past one line.
				1140
				1141	/* ... */ comments are legit inside macros, for one line.
				1142	Otherwise, we prefer // comments, so it's ok to warn about the
				1143	other. Likewise, it's ok for strings to extend across multiple
				1144	lines, as long as a line continuation character (backslash)
				1145	terminates each line. Although not currently prohibited by the C++
				1146	style guide, it's ugly and unnecessary. We don't do well with either
				1147	in this lint program, so we warn about both.
				1148
				1149	Args:
				1150	filename: The name of the current file.
				1151	clean_lines: A CleansedLines instance containing the file.
				1152	linenum: The number of the line to check.
				1153	error: The function to call with any errors found.
				1154	"""
				1155	line = clean_lines.elided[linenum]
				1156
				1157	# Remove all \\ (escaped backslashes) from the line. They are OK, and the
				1158	# second (escaped) slash may trigger later \" detection erroneously.
				1159	line = line.replace('\\\\', '')
				1160
				1161	if line.count('/') > line.count('/'):
				1162	error(filename, linenum, 'readability/multiline_comment', 5,
				1163	'Complex multi-line /.../-style comment found. '
				1164	'Lint may give bogus warnings. '
				1165	'Consider replacing these with //-style comments, '
				1166	'with #if 0...#endif, '
				1167	'or with more clearly structured multi-line comments.')
				1168
				1169	if (line.count('"') - line.count('\\"')) % 2:
				1170	error(filename, linenum, 'readability/multiline_string', 5,
				1171	'Multi-line string ("...") found. This lint script doesn\'t '
				1172	'do well with such strings, and may give bogus warnings. They\'re '
				1173	'ugly and unnecessary, and you should use concatenation instead".')
				1174
				1175
				1176	threading_list = (
				1177	('asctime(', 'asctime_r('),
				1178	('ctime(', 'ctime_r('),
				1179	('getgrgid(', 'getgrgid_r('),
				1180	('getgrnam(', 'getgrnam_r('),
				1181	('getlogin(', 'getlogin_r('),
				1182	('getpwnam(', 'getpwnam_r('),
				1183	('getpwuid(', 'getpwuid_r('),
				1184	('gmtime(', 'gmtime_r('),
				1185	('localtime(', 'localtime_r('),
				1186	('rand(', 'rand_r('),
				1187	('readdir(', 'readdir_r('),
				1188	('strtok(', 'strtok_r('),
				1189	('ttyname(', 'ttyname_r('),
				1190	)
				1191
				1192
				1193	def CheckPosixThreading(filename, clean_lines, linenum, error):
				1194	"""Checks for calls to thread-unsafe functions.
				1195
				1196	Much code has been originally written without consideration of
				1197	multi-threading. Also, engineers are relying on their old experience;
				1198	they have learned posix before threading extensions were added. These
				1199	tests guide the engineers to use thread-safe functions (when using
				1200	posix directly).
				1201
				1202	Args:
				1203	filename: The name of the current file.
				1204	clean_lines: A CleansedLines instance containing the file.
				1205	linenum: The number of the line to check.
				1206	error: The function to call with any errors found.
				1207	"""
				1208	line = clean_lines.elided[linenum]
				1209	for single_thread_function, multithread_safe_function in threading_list:
				1210	ix = line.find(single_thread_function)
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1211	# Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1212	if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
				1213	line[ix - 1] not in ('_', '.', '>'))):
				1214	error(filename, linenum, 'runtime/threadsafe_fn', 2,
				1215	'Consider using ' + multithread_safe_function +
				1216	'...) instead of ' + single_thread_function +
				1217	'...) for improved thread safety.')
				1218
				1219
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1220	# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1221	# incrementing a value.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1222	_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1223	r'^\s\\w+(\+\+\|--);')
				1224
				1225
				1226	def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1227	"""Checks for invalid increment *count++.
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1228
				1229	For example following function:
				1230	void increment_counter(int* count) {
				1231	*count++;
				1232	}
				1233	is invalid, because it effectively does count++, moving pointer, and should
				1234	be replaced with ++count, (count)++ or *count += 1.
				1235
				1236	Args:
				1237	filename: The name of the current file.
				1238	clean_lines: A CleansedLines instance containing the file.
				1239	linenum: The number of the line to check.
				1240	error: The function to call with any errors found.
				1241	"""
				1242	line = clean_lines.elided[linenum]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1243	if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1244	error(filename, linenum, 'runtime/invalid_increment', 5,
				1245	'Changing pointer instead of value (or unused value of operator*).')
				1246
				1247
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1248	class _ClassInfo(object):
				1249	"""Stores information about a class."""
				1250
				1251	def __init__(self, name, linenum):
				1252	self.name = name
				1253	self.linenum = linenum
				1254	self.seen_open_brace = False
				1255	self.is_derived = False
				1256	self.virtual_method_linenumber = None
				1257	self.has_virtual_destructor = False
				1258	self.brace_depth = 0
				1259
				1260
				1261	class _ClassState(object):
				1262	"""Holds the current state of the parse relating to class declarations.
				1263
				1264	It maintains a stack of _ClassInfos representing the parser's guess
				1265	as to the current nesting of class declarations. The innermost class
				1266	is at the top (back) of the stack. Typically, the stack will either
				1267	be empty or have exactly one entry.
				1268	"""
				1269
				1270	def __init__(self):
				1271	self.classinfo_stack = []
				1272
				1273	def CheckFinished(self, filename, error):
				1274	"""Checks that all classes have been completely parsed.
				1275
				1276	Call this when all lines in a file have been processed.
				1277	Args:
				1278	filename: The name of the current file.
				1279	error: The function to call with any errors found.
				1280	"""
				1281	if self.classinfo_stack:
				1282	# Note: This test can result in false positives if #ifdef constructs
				1283	# get in the way of brace matching. See the testBuildClass test in
				1284	# cpplint_unittest.py for an example of this.
				1285	error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
				1286	'Failed to find complete declaration of class %s' %
				1287	self.classinfo_stack[0].name)
				1288
				1289
				1290	def CheckForNonStandardConstructs(filename, clean_lines, linenum,
				1291	class_state, error):
				1292	"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
				1293
				1294	Complain about several constructs which gcc-2 accepts, but which are
				1295	not standard C++. Warning about these in lint is one way to ease the
				1296	transition to new compilers.
				1297	- put storage class first (e.g. "static const" instead of "const static").
				1298	- "%lld" instead of %qd" in printf-type functions.
				1299	- "%1$d" is non-standard in printf-type functions.
				1300	- "\%" is an undefined character escape sequence.
				1301	- text after #endif is not allowed.
				1302	- invalid inner-style forward declaration.
				1303	- >? and <? operators, and their >?= and <?= cousins.
				1304	- classes with virtual methods need virtual destructors (compiler warning
				1305	available, but not turned on yet.)
				1306
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1307	Additionally, check for constructor/destructor style violations and reference
				1308	members, as it is very convenient to do so while checking for
				1309	gcc-2 compliance.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1310
				1311	Args:
				1312	filename: The name of the current file.
				1313	clean_lines: A CleansedLines instance containing the file.
				1314	linenum: The number of the line to check.
				1315	class_state: A _ClassState instance which maintains information about
				1316	the current stack of nested class declarations being parsed.
				1317	error: A callable to which errors are reported, which takes 4 arguments:
				1318	filename, line number, error level, and message
				1319	"""
				1320
				1321	# Remove comments from the line, but leave in strings for now.
				1322	line = clean_lines.lines[linenum]
				1323
				1324	if Search(r'printf\s\(.".%[-+ ]?\dq', line):
				1325	error(filename, linenum, 'runtime/printf_format', 3,
				1326	'%q in format strings is deprecated. Use %ll instead.')
				1327
				1328	if Search(r'printf\s\(.".*%\d+\$', line):
				1329	error(filename, linenum, 'runtime/printf_format', 2,
				1330	'%N$ formats are unconventional. Try rewriting to avoid them.')
				1331
				1332	# Remove escaped backslashes before looking for undefined escapes.
				1333	line = line.replace('\\\\', '')
				1334
				1335	if Search(r'("\|\').*\\(%\|\[\|\(\|{)', line):
				1336	error(filename, linenum, 'build/printf_format', 3,
				1337	'%, [, (, and { are undefined character escapes. Unescape them.')
				1338
				1339	# For the rest, work with both comments and strings removed.
				1340	line = clean_lines.elided[linenum]
				1341
				1342	if Search(r'\b(const\|volatile\|void\|char\|short\|int\|long'
				1343	r'\|float\|double\|signed\|unsigned'
				1344	r'\|schar\|u?int8\|u?int16\|u?int32\|u?int64)'
				1345	r'\s+(auto\|register\|static\|extern\|typedef)\b',
				1346	line):
				1347	error(filename, linenum, 'build/storage_class', 5,
				1348	'Storage class (static, extern, typedef, etc) should be first.')
				1349
				1350	if Match(r'\s#\sendif\s*[^/\s]+', line):
				1351	error(filename, linenum, 'build/endif_comment', 5,
				1352	'Uncommented text after #endif is non-standard. Use a comment.')
				1353
				1354	if Match(r'\sclass\s+(\w+\s::\s)+\w+\s;', line):
				1355	error(filename, linenum, 'build/forward_decl', 5,
				1356	'Inner-style forward declarations are invalid. Remove this line.')
				1357
				1358	if Search(r'(\w+\|[+-]?\d+(\.\d)?)\s(<\|>)\?=?\s(\w+\|[+-]?\d+)(\.\d)?',
				1359	line):
				1360	error(filename, linenum, 'build/deprecated', 3,
				1361	'>? and <? (max and min) operators are non-standard and deprecated.')
				1362
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1363	if Search(r'^\sconst\sstring\s&\s\w+\s*;', line):
				1364	# TODO(unknown): Could it be expanded safely to arbitrary references,
				1365	# without triggering too many false positives? The first
				1366	# attempt triggered 5 warnings for mostly benign code in the regtest, hence
				1367	# the restriction.
				1368	# Here's the original regexp, for the reference:
				1369	# type_name = r'\w+((\s::\s\w+)\|(\s<\s\w+?\s*>))?'
				1370	# r'\sconst\s' + type_name + '\s&\s\w+\s*;'
				1371	error(filename, linenum, 'runtime/member_string_references', 2,
				1372	'const string& members are dangerous. It is much better to use '
				1373	'alternatives, such as pointers or simple constants.')
				1374
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1375	# Track class entry and exit, and attempt to find cases within the
				1376	# class declaration that don't meet the C++ style
				1377	# guidelines. Tracking is very dependent on the code matching Google
				1378	# style guidelines, but it seems to perform well enough in testing
				1379	# to be a worthwhile addition to the checks.
				1380	classinfo_stack = class_state.classinfo_stack
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	1381	# Look for a class declaration. The regexp accounts for decorated classes
				1382	# such as in:
				1383	# class LOCKABLE API Object {
				1384	# };
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1385	class_decl_match = Match(
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	1386	r'\s(template\s<[\w\s<>,:]>\s)?(class\|struct)\s+([A-Z_]+\s+)(\w+(::\w+))', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1387	if class_decl_match:
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	1388	classinfo_stack.append(_ClassInfo(class_decl_match.group(4), linenum))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1389
				1390	# Everything else in this function uses the top of the stack if it's
				1391	# not empty.
				1392	if not classinfo_stack:
				1393	return
				1394
				1395	classinfo = classinfo_stack[-1]
				1396
				1397	# If the opening brace hasn't been seen look for it and also
				1398	# parent class declarations.
				1399	if not classinfo.seen_open_brace:
				1400	# If the line has a ';' in it, assume it's a forward declaration or
				1401	# a single-line class declaration, which we won't process.
				1402	if line.find(';') != -1:
				1403	classinfo_stack.pop()
				1404	return
				1405	classinfo.seen_open_brace = (line.find('{') != -1)
				1406	# Look for a bare ':'
				1407	if Search('(^\|[^:]):($\|[^:])', line):
				1408	classinfo.is_derived = True
				1409	if not classinfo.seen_open_brace:
				1410	return # Everything else in this function is for after open brace
				1411
				1412	# The class may have been declared with namespace or classname qualifiers.
				1413	# The constructor and destructor will not have those qualifiers.
				1414	base_classname = classinfo.name.split('::')[-1]
				1415
				1416	# Look for single-argument constructors that aren't marked explicit.
				1417	# Technically a valid construct, but against style.
				1418	args = Match(r'(?<!explicit)\s+%s\s*$([^,()]+)$'
				1419	% re.escape(base_classname),
				1420	line)
				1421	if (args and
				1422	args.group(1) != 'void' and
				1423	not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
				1424	args.group(1).strip())):
				1425	error(filename, linenum, 'runtime/explicit', 5,
				1426	'Single-argument constructors should be marked explicit.')
				1427
				1428	# Look for methods declared virtual.
				1429	if Search(r'\bvirtual\b', line):
				1430	classinfo.virtual_method_linenumber = linenum
				1431	# Only look for a destructor declaration on the same line. It would
				1432	# be extremely unlikely for the destructor declaration to occupy
				1433	# more than one line.
				1434	if Search(r'~%s\s*\(' % base_classname, line):
				1435	classinfo.has_virtual_destructor = True
				1436
				1437	# Look for class end.
				1438	brace_depth = classinfo.brace_depth
				1439	brace_depth = brace_depth + line.count('{') - line.count('}')
				1440	if brace_depth <= 0:
				1441	classinfo = classinfo_stack.pop()
				1442	# Try to detect missing virtual destructor declarations.
				1443	# For now, only warn if a non-derived class with virtual methods lacks
				1444	# a virtual destructor. This is to make it less likely that people will
				1445	# declare derived virtual destructors without declaring the base
				1446	# destructor virtual.
				1447	if ((classinfo.virtual_method_linenumber is not None) and
				1448	(not classinfo.has_virtual_destructor) and
				1449	(not classinfo.is_derived)): # Only warn for base classes
				1450	error(filename, classinfo.linenum, 'runtime/virtual', 4,
				1451	'The class %s probably needs a virtual destructor due to '
				1452	'having virtual method(s), one declared at line %d.'
				1453	% (classinfo.name, classinfo.virtual_method_linenumber))
				1454	else:
				1455	classinfo.brace_depth = brace_depth
				1456
				1457
				1458	def CheckSpacingForFunctionCall(filename, line, linenum, error):
				1459	"""Checks for the correctness of various spacing around function calls.
				1460
				1461	Args:
				1462	filename: The name of the current file.
				1463	line: The text of the line to check.
				1464	linenum: The number of the line to check.
				1465	error: The function to call with any errors found.
				1466	"""
				1467
				1468	# Since function calls often occur inside if/for/while/switch
				1469	# expressions - which have their own, more liberal conventions - we
				1470	# first see if we should be looking inside such an expression for a
				1471	# function call, to which we can apply more strict standards.
				1472	fncall = line # if there's no control flow construct, look at whole line
				1473	for pattern in (r'\bif\s$(.)$\s*{',
				1474	r'\bfor\s$(.)$\s*{',
				1475	r'\bwhile\s$(.)$\s*[{;]',
				1476	r'\bswitch\s$(.)$\s*{'):
				1477	match = Search(pattern, line)
				1478	if match:
				1479	fncall = match.group(1) # look inside the parens for function calls
				1480	break
				1481
				1482	# Except in if/for/while/switch, there should never be space
				1483	# immediately inside parens (eg "f( 3, 4 )"). We make an exception
				1484	# for nested parens ( (a+b) + c ). Likewise, there should never be
				1485	# a space before a ( when it's a function argument. I assume it's a
				1486	# function argument when the char before the whitespace is legal in
				1487	# a function name (alnum + _) and we're not starting a macro. Also ignore
				1488	# pointers and references to arrays and functions coz they're too tricky:
				1489	# we use a very simple way to recognize these:
				1490	# " (something)(maybe-something)" or
				1491	# " (something)(maybe-something," or
				1492	# " (something)[something]"
				1493	# Note that we assume the contents of [] to be short enough that
				1494	# they'll never need to wrap.
				1495	if ( # Ignore control structures.
				1496	not Search(r'\b(if\|for\|while\|switch\|return\|delete)\b', fncall) and
				1497	# Ignore pointers/references to functions.
				1498	not Search(r' $[^)]+$$[^)]*($\|,$)', fncall) and
				1499	# Ignore pointers/references to arrays.
				1500	not Search(r' $[^)]+$\[[^\]]+\]', fncall)):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1501	if Search(r'\w\s\(\s(?!\s\\$)', fncall): # a ( used for a fn call
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1502	error(filename, linenum, 'whitespace/parens', 4,
				1503	'Extra space after ( in function call')
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1504	elif Search(r'$\s+(?!(\s*\$\|\()', fncall):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1505	error(filename, linenum, 'whitespace/parens', 2,
				1506	'Extra space after (')
				1507	if (Search(r'\w\s+\(', fncall) and
				1508	not Search(r'#\s*define\|typedef', fncall)):
				1509	error(filename, linenum, 'whitespace/parens', 4,
				1510	'Extra space before ( in function call')
				1511	# If the ) is followed only by a newline or a { + newline, assume it's
				1512	# part of a control statement (if/while/etc), and don't complain
				1513	if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
				1514	error(filename, linenum, 'whitespace/parens', 2,
				1515	'Extra space before )')
				1516
				1517
				1518	def IsBlankLine(line):
				1519	"""Returns true if the given line is blank.
				1520
				1521	We consider a line to be blank if the line is empty or consists of
				1522	only white spaces.
				1523
				1524	Args:
				1525	line: A line of a string.
				1526
				1527	Returns:
				1528	True, if the given line is blank.
				1529	"""
				1530	return not line or line.isspace()
				1531
				1532
				1533	def CheckForFunctionLengths(filename, clean_lines, linenum,
				1534	function_state, error):
				1535	"""Reports for long function bodies.
				1536
				1537	For an overview why this is done, see:
				1538	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
				1539
				1540	Uses a simplistic algorithm assuming other style guidelines
				1541	(especially spacing) are followed.
				1542	Only checks unindented functions, so class members are unchecked.
				1543	Trivial bodies are unchecked, so constructors with huge initializer lists
				1544	may be missed.
				1545	Blank/comment lines are not counted so as to avoid encouraging the removal
				1546	of vertical space and commments just to get through a lint check.
				1547	NOLINT on the last line of a function disables this check.
				1548
				1549	Args:
				1550	filename: The name of the current file.
				1551	clean_lines: A CleansedLines instance containing the file.
				1552	linenum: The number of the line to check.
				1553	function_state: Current function name and lines in body so far.
				1554	error: The function to call with any errors found.
				1555	"""
				1556	lines = clean_lines.lines
				1557	line = lines[linenum]
				1558	raw = clean_lines.raw_lines
				1559	raw_line = raw[linenum]
				1560	joined_line = ''
				1561
				1562	starting_func = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1563	regexp = r'(\w(\w\|::\|\\|\&\|\s))\(' # decls * & space::name( ...
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1564	match_result = Match(regexp, line)
				1565	if match_result:
				1566	# If the name is all caps and underscores, figure it's a macro and
				1567	# ignore it, unless it's TEST or TEST_F.
				1568	function_name = match_result.group(1).split()[-1]
				1569	if function_name == 'TEST' or function_name == 'TEST_F' or (
				1570	not Match(r'[A-Z_]+$', function_name)):
				1571	starting_func = True
				1572
				1573	if starting_func:
				1574	body_found = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1575	for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1576	start_line = lines[start_linenum]
				1577	joined_line += ' ' + start_line.lstrip()
				1578	if Search(r'(;\|})', start_line): # Declarations and trivial functions
				1579	body_found = True
				1580	break # ... ignore
				1581	elif Search(r'{', start_line):
				1582	body_found = True
				1583	function = Search(r'((\w\|:)*)\(', line).group(1)
				1584	if Match(r'TEST', function): # Handle TEST... macros
				1585	parameter_regexp = Search(r'($.*$)', joined_line)
				1586	if parameter_regexp: # Ignore bad syntax
				1587	function += parameter_regexp.group(1)
				1588	else:
				1589	function += '()'
				1590	function_state.Begin(function)
				1591	break
				1592	if not body_found:
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1593	# No body for the function (or evidence of a non-function) was found.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1594	error(filename, linenum, 'readability/fn_size', 5,
				1595	'Lint failed to find start of function body.')
				1596	elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1597	function_state.Check(error, filename, linenum)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1598	function_state.End()
				1599	elif not Match(r'^\s*$', line):
				1600	function_state.Count() # Count non-blank/non-comment lines.
				1601
				1602
				1603	_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO($.+?$)?:?(\s\|$)?')
				1604
				1605
				1606	def CheckComment(comment, filename, linenum, error):
				1607	"""Checks for common mistakes in TODO comments.
				1608
				1609	Args:
				1610	comment: The text of the comment from the line in question.
				1611	filename: The name of the current file.
				1612	linenum: The number of the line to check.
				1613	error: The function to call with any errors found.
				1614	"""
				1615	match = _RE_PATTERN_TODO.match(comment)
				1616	if match:
				1617	# One whitespace is correct; zero whitespace is handled elsewhere.
				1618	leading_whitespace = match.group(1)
				1619	if len(leading_whitespace) > 1:
				1620	error(filename, linenum, 'whitespace/todo', 2,
				1621	'Too many spaces before TODO')
				1622
				1623	username = match.group(2)
				1624	if not username:
				1625	error(filename, linenum, 'readability/todo', 2,
				1626	'Missing username in TODO; it should look like '
				1627	'"// TODO(my_username): Stuff."')
				1628
				1629	middle_whitespace = match.group(3)
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1630	# Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1631	if middle_whitespace != ' ' and middle_whitespace != '':
				1632	error(filename, linenum, 'whitespace/todo', 2,
				1633	'TODO(my_username) should be followed by a space')
				1634
				1635
				1636	def CheckSpacing(filename, clean_lines, linenum, error):
				1637	"""Checks for the correctness of various spacing issues in the code.
				1638
				1639	Things we check for: spaces around operators, spaces after
				1640	if/for/while/switch, no spaces around parens in function calls, two
				1641	spaces between code and comment, don't start a block with a blank
				1642	line, don't end a function with a blank line, don't have too many
				1643	blank lines in a row.
				1644
				1645	Args:
				1646	filename: The name of the current file.
				1647	clean_lines: A CleansedLines instance containing the file.
				1648	linenum: The number of the line to check.
				1649	error: The function to call with any errors found.
				1650	"""
				1651
				1652	raw = clean_lines.raw_lines
				1653	line = raw[linenum]
				1654
				1655	# Before nixing comments, check if the line is blank for no good
				1656	# reason. This includes the first line after a block is opened, and
				1657	# blank lines at the end of a function (ie, right before a line like '}'
				1658	if IsBlankLine(line):
				1659	elided = clean_lines.elided
				1660	prev_line = elided[linenum - 1]
				1661	prevbrace = prev_line.rfind('{')
				1662	# TODO(unknown): Don't complain if line before blank line, and line after,
				1663	# both start with alnums and are indented the same amount.
				1664	# This ignores whitespace at the start of a namespace block
				1665	# because those are not usually indented.
				1666	if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
				1667	and prev_line[:prevbrace].find('namespace') == -1):
				1668	# OK, we have a blank line at the start of a code block. Before we
				1669	# complain, we check if it is an exception to the rule: The previous
				1670	# non-empty line has the paramters of a function header that are indented
				1671	# 4 spaces (because they did not fit in a 80 column line when placed on
				1672	# the same line as the function name). We also check for the case where
				1673	# the previous line is indented 6 spaces, which may happen when the
				1674	# initializers of a constructor do not fit into a 80 column line.
				1675	exception = False
				1676	if Match(r' {6}\w', prev_line): # Initializer list?
				1677	# We are looking for the opening column of initializer list, which
				1678	# should be indented 4 spaces to cause 6 space indentation afterwards.
				1679	search_position = linenum-2
				1680	while (search_position >= 0
				1681	and Match(r' {6}\w', elided[search_position])):
				1682	search_position -= 1
				1683	exception = (search_position >= 0
				1684	and elided[search_position][:5] == ' :')
				1685	else:
				1686	# Search for the function arguments or an initializer list. We use a
				1687	# simple heuristic here: If the line is indented 4 spaces; and we have a
				1688	# closing paren, without the opening paren, followed by an opening brace
				1689	# or colon (for initializer lists) we assume that it is the last line of
				1690	# a function header. If we have a colon indented 4 spaces, it is an
				1691	# initializer list.
				1692	exception = (Match(r' {4}\w[^$]$\s(const\s)?(\{\s$\|:)',
				1693	prev_line)
				1694	or Match(r' {4}:', prev_line))
				1695
				1696	if not exception:
				1697	error(filename, linenum, 'whitespace/blank_line', 2,
				1698	'Blank line at the start of a code block. Is this needed?')
				1699	# This doesn't ignore whitespace at the end of a namespace block
				1700	# because that is too hard without pairing open/close braces;
				1701	# however, a special exception is made for namespace closing
				1702	# brackets which have a comment containing "namespace".
				1703	#
				1704	# Also, ignore blank lines at the end of a block in a long if-else
				1705	# chain, like this:
				1706	# if (condition1) {
				1707	# // Something followed by a blank line
				1708	#
				1709	# } else if (condition2) {
				1710	# // Something else
				1711	# }
				1712	if linenum + 1 < clean_lines.NumLines():
				1713	next_line = raw[linenum + 1]
				1714	if (next_line
				1715	and Match(r'\s*}', next_line)
				1716	and next_line.find('namespace') == -1
				1717	and next_line.find('} else ') == -1):
				1718	error(filename, linenum, 'whitespace/blank_line', 3,
				1719	'Blank line at the end of a code block. Is this needed?')
				1720
				1721	# Next, we complain if there's a comment too near the text
				1722	commentpos = line.find('//')
				1723	if commentpos != -1:
				1724	# Check if the // may be in quotes. If so, ignore it
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1725	# Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1726	if (line.count('"', 0, commentpos) -
				1727	line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
				1728	# Allow one space for new scopes, two spaces otherwise:
				1729	if (not Match(r'^\s*{ //', line) and
				1730	((commentpos >= 1 and
				1731	line[commentpos-1] not in string.whitespace) or
				1732	(commentpos >= 2 and
				1733	line[commentpos-2] not in string.whitespace))):
				1734	error(filename, linenum, 'whitespace/comments', 2,
				1735	'At least two spaces is best between code and comments')
				1736	# There should always be a space between the // and the comment
				1737	commentend = commentpos + 2
				1738	if commentend < len(line) and not line[commentend] == ' ':
				1739	# but some lines are exceptions -- e.g. if they're big
				1740	# comment delimiters like:
				1741	# //----------------------------------------------------------
erg@google.com	a51c16b	2010-11-17 18:09:31 +0000	[diff] [blame]	1742	# or are an empty C++ style Doxygen comment, like:
				1743	# ///
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	1744	# or they begin with multiple slashes followed by a space:
				1745	# //////// Header comment
				1746	match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.com	a51c16b	2010-11-17 18:09:31 +0000	[diff] [blame]	1747	Search(r'^/$', line[commentend:]) or
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	1748	Search(r'^/+ ', line[commentend:]))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1749	if not match:
				1750	error(filename, linenum, 'whitespace/comments', 4,
				1751	'Should have a space between // and comment')
				1752	CheckComment(line[commentpos:], filename, linenum, error)
				1753
				1754	line = clean_lines.elided[linenum] # get rid of comments and strings
				1755
				1756	# Don't try to do spacing checks for operator methods
				1757	line = re.sub(r'operator(==\|!=\|<\|<<\|<=\|>=\|>>\|>)\(', 'operator\(', line)
				1758
				1759	# We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
				1760	# Otherwise not. Note we only check for non-spaces on both sides;
				1761	# sometimes people put non-spaces on one side when aligning ='s among
				1762	# many lines (not that this is behavior that I approve of...)
				1763	if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if\|while) ', line):
				1764	error(filename, linenum, 'whitespace/operators', 4,
				1765	'Missing spaces around =')
				1766
				1767	# It's ok not to have spaces around binary operators like + - * /, but if
				1768	# there's too little whitespace, we get concerned. It's hard to tell,
				1769	# though, so we punt on this one for now. TODO.
				1770
				1771	# You should always have whitespace around binary operators.
				1772	# Alas, we can't test < or > because they're legitimately used sans spaces
				1773	# (a->b, vector<int> a). The only time we can tell is a < with no >, and
				1774	# only if it's not template params list spilling into the next line.
				1775	match = Search(r'[^<>=!\s](==\|!=\|<=\|>=)[^<>=!\s]', line)
				1776	if not match:
				1777	# Note that while it seems that the '<[^<]*' term in the following
				1778	# regexp could be simplified to '<.*', which would indeed match
				1779	# the same class of strings, the [^<] means that searching for the
				1780	# regexp takes linear rather than quadratic time.
				1781	if not Search(r'<[^<],\s$', line): # template params spill
				1782	match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]\|->)*$', line)
				1783	if match:
				1784	error(filename, linenum, 'whitespace/operators', 3,
				1785	'Missing spaces around %s' % match.group(1))
				1786	# We allow no-spaces around << and >> when used like this: 10<<20, but
				1787	# not otherwise (particularly, not when used as streams)
				1788	match = Search(r'[^0-9\s](<<\|>>)[^0-9\s]', line)
				1789	if match:
				1790	error(filename, linenum, 'whitespace/operators', 3,
				1791	'Missing spaces around %s' % match.group(1))
				1792
				1793	# There shouldn't be space around unary operators
				1794	match = Search(r'(!\s\|~\s\|[\s]--[\s;]\|[\s]\+\+[\s;])', line)
				1795	if match:
				1796	error(filename, linenum, 'whitespace/operators', 4,
				1797	'Extra space for operator %s' % match.group(1))
				1798
				1799	# A pet peeve of mine: no spaces after an if, while, switch, or for
				1800	match = Search(r' (if\(\|for\(\|while\(\|switch\()', line)
				1801	if match:
				1802	error(filename, linenum, 'whitespace/parens', 5,
				1803	'Missing space before ( in %s' % match.group(1))
				1804
				1805	# For if/for/while/switch, the left and right parens should be
				1806	# consistent about how many spaces are inside the parens, and
				1807	# there should either be zero or one spaces inside the parens.
				1808	# We don't want: "if ( foo)" or "if ( foo )".
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	1809	# Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1810	match = Search(r'\b(if\|for\|while\|switch)\s*'
				1811	r'$([ ])(.).[^ ]+([ ])$\s{\s*$',
				1812	line)
				1813	if match:
				1814	if len(match.group(2)) != len(match.group(4)):
				1815	if not (match.group(3) == ';' and
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	1816	len(match.group(2)) == 1 + len(match.group(4)) or
				1817	not match.group(2) and Search(r'\bfor\s$.; $', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1818	error(filename, linenum, 'whitespace/parens', 5,
				1819	'Mismatching spaces inside () in %s' % match.group(1))
				1820	if not len(match.group(2)) in [0, 1]:
				1821	error(filename, linenum, 'whitespace/parens', 5,
				1822	'Should have zero or one spaces inside ( and ) in %s' %
				1823	match.group(1))
				1824
				1825	# You should always have a space after a comma (either as fn arg or operator)
				1826	if Search(r',[^\s]', line):
				1827	error(filename, linenum, 'whitespace/comma', 3,
				1828	'Missing space after ,')
				1829
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	1830	# You should always have a space after a semicolon
				1831	# except for few corner cases
				1832	# TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
				1833	# space after ;
				1834	if Search(r';[^\s};\\)/]', line):
				1835	error(filename, linenum, 'whitespace/semicolon', 3,
				1836	'Missing space after ;')
				1837
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1838	# Next we will look for issues with function calls.
				1839	CheckSpacingForFunctionCall(filename, line, linenum, error)
				1840
				1841	# Except after an opening paren, you should have spaces before your braces.
				1842	# And since you should never have braces at the beginning of a line, this is
				1843	# an easy test.
				1844	if Search(r'[^ (]{', line):
				1845	error(filename, linenum, 'whitespace/braces', 5,
				1846	'Missing space before {')
				1847
				1848	# Make sure '} else {' has spaces.
				1849	if Search(r'}else', line):
				1850	error(filename, linenum, 'whitespace/braces', 5,
				1851	'Missing space before else')
				1852
				1853	# You shouldn't have spaces before your brackets, except maybe after
				1854	# 'delete []' or 'new char * []'.
				1855	if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
				1856	error(filename, linenum, 'whitespace/braces', 5,
				1857	'Extra space before [')
				1858
				1859	# You shouldn't have a space before a semicolon at the end of the line.
				1860	# There's a special case for "for" since the style guide allows space before
				1861	# the semicolon there.
				1862	if Search(r':\s;\s$', line):
				1863	error(filename, linenum, 'whitespace/semicolon', 5,
				1864	'Semicolon defining empty statement. Use { } instead.')
				1865	elif Search(r'^\s;\s$', line):
				1866	error(filename, linenum, 'whitespace/semicolon', 5,
				1867	'Line contains only semicolon. If this should be an empty statement, '
				1868	'use { } instead.')
				1869	elif (Search(r'\s+;\s*$', line) and
				1870	not Search(r'\bfor\b', line)):
				1871	error(filename, linenum, 'whitespace/semicolon', 5,
				1872	'Extra space before last semicolon. If this should be an empty '
				1873	'statement, use { } instead.')
				1874
				1875
				1876	def GetPreviousNonBlankLine(clean_lines, linenum):
				1877	"""Return the most recent non-blank line and its line number.
				1878
				1879	Args:
				1880	clean_lines: A CleansedLines instance containing the file contents.
				1881	linenum: The number of the line to check.
				1882
				1883	Returns:
				1884	A tuple with two elements. The first element is the contents of the last
				1885	non-blank line before the current line, or the empty string if this is the
				1886	first non-blank line. The second is the line number of that line, or -1
				1887	if this is the first non-blank line.
				1888	"""
				1889
				1890	prevlinenum = linenum - 1
				1891	while prevlinenum >= 0:
				1892	prevline = clean_lines.elided[prevlinenum]
				1893	if not IsBlankLine(prevline): # if not a blank line...
				1894	return (prevline, prevlinenum)
				1895	prevlinenum -= 1
				1896	return ('', -1)
				1897
				1898
				1899	def CheckBraces(filename, clean_lines, linenum, error):
				1900	"""Looks for misplaced braces (e.g. at the end of line).
				1901
				1902	Args:
				1903	filename: The name of the current file.
				1904	clean_lines: A CleansedLines instance containing the file.
				1905	linenum: The number of the line to check.
				1906	error: The function to call with any errors found.
				1907	"""
				1908
				1909	line = clean_lines.elided[linenum] # get rid of comments and strings
				1910
				1911	if Match(r'\s{\s$', line):
				1912	# We allow an open brace to start a line in the case where someone
				1913	# is using braces in a block to explicitly create a new scope,
				1914	# which is commonly used to control the lifetime of
				1915	# stack-allocated variables. We don't detect this perfectly: we
				1916	# just don't complain if the last non-whitespace character on the
				1917	# previous non-blank line is ';', ':', '{', or '}'.
				1918	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				1919	if not Search(r'[;:}{]\s*$', prevline):
				1920	error(filename, linenum, 'whitespace/braces', 4,
				1921	'{ should almost always be at the end of the previous line')
				1922
				1923	# An else clause should be on the same line as the preceding closing brace.
				1924	if Match(r'\selse\s', line):
				1925	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				1926	if Match(r'\s}\s$', prevline):
				1927	error(filename, linenum, 'whitespace/newline', 4,
				1928	'An else should appear on the same line as the preceding }')
				1929
				1930	# If braces come on one side of an else, they should be on both.
				1931	# However, we have to worry about "else if" that spans multiple lines!
				1932	if Search(r'}\selse[^{]$', line) or Match(r'[^}]else\s{', line):
				1933	if Search(r'}\selse if([^{])$', line): # could be multi-line if
				1934	# find the ( after the if
				1935	pos = line.find('else if')
				1936	pos = line.find('(', pos)
				1937	if pos > 0:
				1938	(endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
				1939	if endline[endpos:].find('{') == -1: # must be brace after if
				1940	error(filename, linenum, 'readability/braces', 5,
				1941	'If an else has a brace on one side, it should have it on both')
				1942	else: # common case: else not followed by a multi-line if
				1943	error(filename, linenum, 'readability/braces', 5,
				1944	'If an else has a brace on one side, it should have it on both')
				1945
				1946	# Likewise, an else should never have the else clause on the same line
				1947	if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
				1948	error(filename, linenum, 'whitespace/newline', 4,
				1949	'Else clause should never be on same line as else (use 2 lines)')
				1950
				1951	# In the same way, a do/while should never be on one line
				1952	if Match(r'\s*do [^\s{]', line):
				1953	error(filename, linenum, 'whitespace/newline', 4,
				1954	'do/while clauses should not be on a single line')
				1955
				1956	# Braces shouldn't be followed by a ; unless they're defining a struct
				1957	# or initializing an array.
				1958	# We can't tell in general, but we can for some common cases.
				1959	prevlinenum = linenum
				1960	while True:
				1961	(prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
				1962	if Match(r'\s+{.}\s;', line) and not prevline.count(';'):
				1963	line = prevline + line
				1964	else:
				1965	break
				1966	if (Search(r'{.}\s;', line) and
				1967	line.count('{') == line.count('}') and
				1968	not Search(r'struct\|class\|enum\|\s=\s{', line)):
				1969	error(filename, linenum, 'readability/braces', 4,
				1970	"You don't need a ; after a }")
				1971
				1972
				1973	def ReplaceableCheck(operator, macro, line):
				1974	"""Determine whether a basic CHECK can be replaced with a more specific one.
				1975
				1976	For example suggest using CHECK_EQ instead of CHECK(a == b) and
				1977	similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
				1978
				1979	Args:
				1980	operator: The C++ operator used in the CHECK.
				1981	macro: The CHECK or EXPECT macro being called.
				1982	line: The current source line.
				1983
				1984	Returns:
				1985	True if the CHECK can be replaced with a more specific one.
				1986	"""
				1987
				1988	# This matches decimal and hex integers, strings, and chars (in that order).
				1989	match_constant = r'([-+]?(\d+\|0[xX][0-9a-fA-F]+)[lLuU]{0,3}\|"."\|\'.\')'
				1990
				1991	# Expression to match two sides of the operator with something that
				1992	# looks like a literal, since CHECK(x == iterator) won't compile.
				1993	# This means we can't catch all the cases where a more specific
				1994	# CHECK is possible, but it's less annoying than dealing with
				1995	# extraneous warnings.
				1996	match_this = (r'\s' + macro + r'\((\s' +
				1997	match_constant + r'\s' + operator + r'[^<>].\|'
				1998	r'.[^<>]' + operator + r'\s' + match_constant +
				1999	r'\s*\))')
				2000
				2001	# Don't complain about CHECK(x == NULL) or similar because
				2002	# CHECK_EQ(x, NULL) won't compile (requires a cast).
				2003	# Also, don't complain about more complex boolean expressions
				2004	# involving && or \|\| such as CHECK(a == b \|\| c == d).
				2005	return Match(match_this, line) and not Search(r'NULL\|&&\|\\|\\|', line)
				2006
				2007
				2008	def CheckCheck(filename, clean_lines, linenum, error):
				2009	"""Checks the use of CHECK and EXPECT macros.
				2010
				2011	Args:
				2012	filename: The name of the current file.
				2013	clean_lines: A CleansedLines instance containing the file.
				2014	linenum: The number of the line to check.
				2015	error: The function to call with any errors found.
				2016	"""
				2017
				2018	# Decide the set of replacement macros that should be suggested
				2019	raw_lines = clean_lines.raw_lines
				2020	current_macro = ''
				2021	for macro in _CHECK_MACROS:
				2022	if raw_lines[linenum].find(macro) >= 0:
				2023	current_macro = macro
				2024	break
				2025	if not current_macro:
				2026	# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
				2027	return
				2028
				2029	line = clean_lines.elided[linenum] # get rid of comments and strings
				2030
				2031	# Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
				2032	for operator in ['==', '!=', '>=', '>', '<=', '<']:
				2033	if ReplaceableCheck(operator, current_macro, line):
				2034	error(filename, linenum, 'readability/check', 2,
				2035	'Consider using %s instead of %s(a %s b)' % (
				2036	_CHECK_REPLACEMENT[current_macro][operator],
				2037	current_macro, operator))
				2038	break
				2039
				2040
				2041	def GetLineWidth(line):
				2042	"""Determines the width of the line in column positions.
				2043
				2044	Args:
				2045	line: A string, which may be a Unicode string.
				2046
				2047	Returns:
				2048	The width of the line in column positions, accounting for Unicode
				2049	combining characters and wide characters.
				2050	"""
				2051	if isinstance(line, unicode):
				2052	width = 0
				2053	for c in unicodedata.normalize('NFC', line):
				2054	if unicodedata.east_asian_width(c) in ('W', 'F'):
				2055	width += 2
				2056	elif not unicodedata.combining(c):
				2057	width += 1
				2058	return width
				2059	else:
				2060	return len(line)
				2061
				2062
				2063	def CheckStyle(filename, clean_lines, linenum, file_extension, error):
				2064	"""Checks rules from the 'C++ style rules' section of cppguide.html.
				2065
				2066	Most of these rules are hard to test (naming, comment style), but we
				2067	do what we can. In particular we check for 2-space indents, line lengths,
				2068	tab usage, spaces inside code, etc.
				2069
				2070	Args:
				2071	filename: The name of the current file.
				2072	clean_lines: A CleansedLines instance containing the file.
				2073	linenum: The number of the line to check.
				2074	file_extension: The extension (without the dot) of the filename.
				2075	error: The function to call with any errors found.
				2076	"""
				2077
				2078	raw_lines = clean_lines.raw_lines
				2079	line = raw_lines[linenum]
				2080
				2081	if line.find('\t') != -1:
				2082	error(filename, linenum, 'whitespace/tab', 1,
				2083	'Tab found; better to use spaces')
				2084
				2085	# One or three blank spaces at the beginning of the line is weird; it's
				2086	# hard to reconcile that with 2-space indents.
				2087	# NOTE: here are the conditions rob pike used for his tests. Mine aren't
				2088	# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
				2089	# if(RLENGTH > 20) complain = 0;
				2090	# if(match($0, " +(error\|private\|public\|protected):")) complain = 0;
				2091	# if(match(prev, "&& *$")) complain = 0;
				2092	# if(match(prev, "\\\|\\\| *$")) complain = 0;
				2093	# if(match(prev, "[\",=><] *$")) complain = 0;
				2094	# if(match($0, " <<")) complain = 0;
				2095	# if(match(prev, " +for \\(")) complain = 0;
				2096	# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
				2097	initial_spaces = 0
				2098	cleansed_line = clean_lines.elided[linenum]
				2099	while initial_spaces < len(line) and line[initial_spaces] == ' ':
				2100	initial_spaces += 1
				2101	if line and line[-1].isspace():
				2102	error(filename, linenum, 'whitespace/end_of_line', 4,
				2103	'Line ends in whitespace. Consider deleting these extra spaces.')
				2104	# There are certain situations we allow one space, notably for labels
				2105	elif ((initial_spaces == 1 or initial_spaces == 3) and
				2106	not Match(r'\s\w+\s:\s*$', cleansed_line)):
				2107	error(filename, linenum, 'whitespace/indent', 3,
				2108	'Weird number of spaces at line-start. '
				2109	'Are you using a 2-space indent?')
				2110	# Labels should always be indented at least one space.
				2111	elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
				2112	line):
				2113	error(filename, linenum, 'whitespace/labels', 4,
				2114	'Labels should always be indented at least one space. '
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2115	'If this is a member-initializer list in a constructor or '
				2116	'the base class list in a class definition, the colon should '
				2117	'be on the following line.')
				2118
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2119
				2120	# Check if the line is a header guard.
				2121	is_header_guard = False
				2122	if file_extension == 'h':
				2123	cppvar = GetHeaderGuardCPPVariable(filename)
				2124	if (line.startswith('#ifndef %s' % cppvar) or
				2125	line.startswith('#define %s' % cppvar) or
				2126	line.startswith('#endif // %s' % cppvar)):
				2127	is_header_guard = True
				2128	# #include lines and header guards can be long, since there's no clean way to
				2129	# split them.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2130	#
				2131	# URLs can be long too. It's possible to split these, but it makes them
				2132	# harder to cut&paste.
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	2133	#
				2134	# The "$Id:...$" comment may also get very long without it being the
				2135	# developers fault.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2136	if (not line.startswith('#include') and not is_header_guard and
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	2137	not Match(r'^\s//.http(s?)://\S*$', line) and
				2138	not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2139	line_width = GetLineWidth(line)
				2140	if line_width > 100:
				2141	error(filename, linenum, 'whitespace/line_length', 4,
				2142	'Lines should very rarely be longer than 100 characters')
				2143	elif line_width > 80:
				2144	error(filename, linenum, 'whitespace/line_length', 2,
				2145	'Lines should be <= 80 characters long')
				2146
				2147	if (cleansed_line.count(';') > 1 and
				2148	# for loops are allowed two ;'s (and may run over two lines).
				2149	cleansed_line.find('for') == -1 and
				2150	(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
				2151	GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
				2152	# It's ok to have many commands in a switch case that fits in 1 line
				2153	not ((cleansed_line.find('case ') != -1 or
				2154	cleansed_line.find('default:') != -1) and
				2155	cleansed_line.find('break;') != -1)):
				2156	error(filename, linenum, 'whitespace/newline', 4,
				2157	'More than one command on the same line')
				2158
				2159	# Some more style checks
				2160	CheckBraces(filename, clean_lines, linenum, error)
				2161	CheckSpacing(filename, clean_lines, linenum, error)
				2162	CheckCheck(filename, clean_lines, linenum, error)
				2163
				2164
				2165	_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
				2166	_RE_PATTERN_INCLUDE = re.compile(r'^\s#\sinclude\s([<"])([^>"])[>"].*$')
				2167	# Matches the first component of a filename delimited by -s and _s. That is:
				2168	# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
				2169	# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
				2170	# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
				2171	# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
				2172	_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
				2173
				2174
				2175	def _DropCommonSuffixes(filename):
				2176	"""Drops common suffixes like _test.cc or -inl.h from filename.
				2177
				2178	For example:
				2179	>>> _DropCommonSuffixes('foo/foo-inl.h')
				2180	'foo/foo'
				2181	>>> _DropCommonSuffixes('foo/bar/foo.cc')
				2182	'foo/bar/foo'
				2183	>>> _DropCommonSuffixes('foo/foo_internal.h')
				2184	'foo/foo'
				2185	>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
				2186	'foo/foo_unusualinternal'
				2187
				2188	Args:
				2189	filename: The input filename.
				2190
				2191	Returns:
				2192	The filename with the common suffix removed.
				2193	"""
				2194	for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
				2195	'inl.h', 'impl.h', 'internal.h'):
				2196	if (filename.endswith(suffix) and len(filename) > len(suffix) and
				2197	filename[-len(suffix) - 1] in ('-', '_')):
				2198	return filename[:-len(suffix) - 1]
				2199	return os.path.splitext(filename)[0]
				2200
				2201
				2202	def _IsTestFilename(filename):
				2203	"""Determines if the given filename has a suffix that identifies it as a test.
				2204
				2205	Args:
				2206	filename: The input filename.
				2207
				2208	Returns:
				2209	True if 'filename' looks like a test, False otherwise.
				2210	"""
				2211	if (filename.endswith('_test.cc') or
				2212	filename.endswith('_unittest.cc') or
				2213	filename.endswith('_regtest.cc')):
				2214	return True
				2215	else:
				2216	return False
				2217
				2218
				2219	def _ClassifyInclude(fileinfo, include, is_system):
				2220	"""Figures out what kind of header 'include' is.
				2221
				2222	Args:
				2223	fileinfo: The current file cpplint is running over. A FileInfo instance.
				2224	include: The path to a #included file.
				2225	is_system: True if the #include used <> rather than "".
				2226
				2227	Returns:
				2228	One of the _XXX_HEADER constants.
				2229
				2230	For example:
				2231	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
				2232	_C_SYS_HEADER
				2233	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
				2234	_CPP_SYS_HEADER
				2235	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
				2236	_LIKELY_MY_HEADER
				2237	>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
				2238	... 'bar/foo_other_ext.h', False)
				2239	_POSSIBLE_MY_HEADER
				2240	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
				2241	_OTHER_HEADER
				2242	"""
				2243	# This is a list of all standard c++ header files, except
				2244	# those already checked for above.
				2245	is_stl_h = include in _STL_HEADERS
				2246	is_cpp_h = is_stl_h or include in _CPP_HEADERS
				2247
				2248	if is_system:
				2249	if is_cpp_h:
				2250	return _CPP_SYS_HEADER
				2251	else:
				2252	return _C_SYS_HEADER
				2253
				2254	# If the target file and the include we're checking share a
				2255	# basename when we drop common extensions, and the include
				2256	# lives in . , then it's likely to be owned by the target file.
				2257	target_dir, target_base = (
				2258	os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
				2259	include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
				2260	if target_base == include_base and (
				2261	include_dir == target_dir or
				2262	include_dir == os.path.normpath(target_dir + '/../public')):
				2263	return _LIKELY_MY_HEADER
				2264
				2265	# If the target and include share some initial basename
				2266	# component, it's possible the target is implementing the
				2267	# include, so it's allowed to be first, but we'll never
				2268	# complain if it's not there.
				2269	target_first_component = _RE_FIRST_COMPONENT.match(target_base)
				2270	include_first_component = _RE_FIRST_COMPONENT.match(include_base)
				2271	if (target_first_component and include_first_component and
				2272	target_first_component.group(0) ==
				2273	include_first_component.group(0)):
				2274	return _POSSIBLE_MY_HEADER
				2275
				2276	return _OTHER_HEADER
				2277
				2278
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2279
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2280	def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
				2281	"""Check rules that are applicable to #include lines.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2282
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2283	Strings on #include lines are NOT removed from elided line, to make
				2284	certain tasks easier. However, to prevent false positives, checks
				2285	applicable to #include lines in CheckLanguage must be put here.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2286
				2287	Args:
				2288	filename: The name of the current file.
				2289	clean_lines: A CleansedLines instance containing the file.
				2290	linenum: The number of the line to check.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2291	include_state: An _IncludeState instance in which the headers are inserted.
				2292	error: The function to call with any errors found.
				2293	"""
				2294	fileinfo = FileInfo(filename)
				2295
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2296	line = clean_lines.lines[linenum]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2297
				2298	# "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2299	if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2300	error(filename, linenum, 'build/include', 4,
				2301	'Include the directory when naming .h files')
				2302
				2303	# we shouldn't include a file more than once. actually, there are a
				2304	# handful of instances where doing so is okay, but in general it's
				2305	# not.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2306	match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2307	if match:
				2308	include = match.group(2)
				2309	is_system = (match.group(1) == '<')
				2310	if include in include_state:
				2311	error(filename, linenum, 'build/include', 4,
				2312	'"%s" already included at %s:%s' %
				2313	(include, filename, include_state[include]))
				2314	else:
				2315	include_state[include] = linenum
				2316
				2317	# We want to ensure that headers appear in the right order:
				2318	# 1) for foo.cc, foo.h (preferred location)
				2319	# 2) c system files
				2320	# 3) cpp system files
				2321	# 4) for foo.cc, foo.h (deprecated location)
				2322	# 5) other google headers
				2323	#
				2324	# We classify each include statement as one of those 5 types
				2325	# using a number of techniques. The include_state object keeps
				2326	# track of the highest type seen, and complains if we see a
				2327	# lower type after that.
				2328	error_message = include_state.CheckNextIncludeOrder(
				2329	_ClassifyInclude(fileinfo, include, is_system))
				2330	if error_message:
				2331	error(filename, linenum, 'build/include_order', 4,
				2332	'%s. Should be: %s.h, c system, c++ system, other.' %
				2333	(error_message, fileinfo.BaseName()))
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2334	if not include_state.IsInAlphabeticalOrder(include):
				2335	error(filename, linenum, 'build/include_alpha', 4,
				2336	'Include "%s" not in alphabetical order' % include)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2337
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2338	# Look for any of the stream classes that are part of standard C++.
				2339	match = _RE_PATTERN_INCLUDE.match(line)
				2340	if match:
				2341	include = match.group(2)
				2342	if Match(r'(f\|ind\|io\|i\|o\|parse\|pf\|stdio\|str\|)?stream$', include):
				2343	# Many unit tests use cout, so we exempt them.
				2344	if not _IsTestFilename(filename):
				2345	error(filename, linenum, 'readability/streams', 3,
				2346	'Streams are highly discouraged.')
				2347
				2348	def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
				2349	error):
				2350	"""Checks rules from the 'C++ language rules' section of cppguide.html.
				2351
				2352	Some of these rules are hard to test (function overloading, using
				2353	uint32 inappropriately), but we do the best we can.
				2354
				2355	Args:
				2356	filename: The name of the current file.
				2357	clean_lines: A CleansedLines instance containing the file.
				2358	linenum: The number of the line to check.
				2359	file_extension: The extension (without the dot) of the filename.
				2360	include_state: An _IncludeState instance in which the headers are inserted.
				2361	error: The function to call with any errors found.
				2362	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2363	# If the line is empty or consists of entirely a comment, no need to
				2364	# check it.
				2365	line = clean_lines.elided[linenum]
				2366	if not line:
				2367	return
				2368
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2369	match = _RE_PATTERN_INCLUDE.search(line)
				2370	if match:
				2371	CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
				2372	return
				2373
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2374	# Create an extended_line, which is the concatenation of the current and
				2375	# next lines, for more effective checking of code that may span more than one
				2376	# line.
				2377	if linenum + 1 < clean_lines.NumLines():
				2378	extended_line = line + clean_lines.elided[linenum + 1]
				2379	else:
				2380	extended_line = line
				2381
				2382	# Make Windows paths like Unix.
				2383	fullname = os.path.abspath(filename).replace('\\', '/')
				2384
				2385	# TODO(unknown): figure out if they're using default arguments in fn proto.
				2386
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2387	# Check for non-const references in functions. This is tricky because &
				2388	# is also used to take the address of something. We allow <> for templates,
				2389	# (ignoring whatever is between the braces) and : for classes.
				2390	# These are complicated re's. They try to capture the following:
				2391	# paren (for fn-prototype start), typename, &, varname. For the const
				2392	# version, we're willing for const to be before typename or after
				2393	# Don't check the implemention on same line.
				2394	fnline = line.split('{', 1)[0]
				2395	if (len(re.findall(r'\([^()]\b(?:[\w:]\|<[^()]>)+(\s?&\|&\s?)\w+', fnline)) >
				2396	len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
				2397	r'(?:[\w:]\|<[^()]*>)+(\s?&\|&\s?)\w+', fnline)) +
				2398	len(re.findall(r'\([^()]\b(?:[\w:]\|<[^()]>)+\s+const(\s?&\|&\s?)[\w]+',
				2399	fnline))):
				2400
				2401	# We allow non-const references in a few standard places, like functions
				2402	# called "swap()" or iostream operators like "<<" or ">>".
				2403	if not Search(
				2404	r'(swap\|Swap\|operator[<>][<>])\s\(\s(?:[\w:]\|<.>)+\s&',
				2405	fnline):
				2406	error(filename, linenum, 'runtime/references', 2,
				2407	'Is this a non-const reference? '
				2408	'If so, make const or use a pointer.')
				2409
				2410	# Check to see if they're using an conversion function cast.
				2411	# I just try to capture the most common basic types, though there are more.
				2412	# Parameterless conversion functions, such as bool(), are allowed as they are
				2413	# probably a member operator declaration or default constructor.
				2414	match = Search(
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2415	r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
				2416	r'(int\|float\|double\|bool\|char\|int32\|uint32\|int64\|uint64)\([^)]', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2417	if match:
				2418	# gMock methods are defined using some variant of MOCK_METHODx(name, type)
				2419	# where type may be float(), int(string), etc. Without context they are
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	2420	# virtually indistinguishable from int(x) casts. Likewise, gMock's
				2421	# MockCallback takes a template parameter of the form return_type(arg_type),
				2422	# which looks much like the cast we're trying to detect.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2423	if (match.group(1) is None and # If new operator, then this isn't a cast
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	2424	not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
				2425	Match(r'^\sMockCallback<.>', line))):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2426	error(filename, linenum, 'readability/casting', 4,
				2427	'Using deprecated casting style. '
				2428	'Use static_cast<%s>(...) instead' %
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2429	match.group(2))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2430
				2431	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2432	'static_cast',
				2433	r'$(int\|float\|double\|bool\|char\|u?int(16\|32\|64))$',
				2434	error)
				2435	# This doesn't catch all cases. Consider (const char * const)"hello".
				2436	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2437	'reinterpret_cast', r'$(\w+\s?\*+\s?)$', error)
				2438
				2439	# In addition, we look for people taking the address of a cast. This
				2440	# is dangerous -- casts can assign to temporaries, so the pointer doesn't
				2441	# point where you think.
				2442	if Search(
				2443	r'(&$[^)]+$[\w(])\|(&(static\|dynamic\|reinterpret)_cast\b)', line):
				2444	error(filename, linenum, 'runtime/casting', 4,
				2445	('Are you taking an address of a cast? '
				2446	'This is dangerous: could be a temp var. '
				2447	'Take the address before doing the cast, rather than after'))
				2448
				2449	# Check for people declaring static/global STL strings at the top level.
				2450	# This is dangerous because the C++ language does not guarantee that
				2451	# globals with constructors are initialized before the first access.
				2452	match = Match(
				2453	r'((?:\|static +)(?:\|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
				2454	line)
				2455	# Make sure it's not a function.
				2456	# Function template specialization looks like: "string foo<Type>(...".
				2457	# Class template definitions look like: "string Foo<Type>::Method(...".
				2458	if match and not Match(r'\s(<.>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]\|$)',
				2459	match.group(3)):
				2460	error(filename, linenum, 'runtime/string', 4,
				2461	'For a static/global string constant, use a C style string instead: '
				2462	'"%schar %s[]".' %
				2463	(match.group(1), match.group(2)))
				2464
				2465	# Check that we're not using RTTI outside of testing code.
				2466	if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
				2467	error(filename, linenum, 'runtime/rtti', 5,
				2468	'Do not use dynamic_cast<>. If you need to cast within a class '
				2469	"hierarchy, use static_cast<> to upcast. Google doesn't support "
				2470	'RTTI.')
				2471
				2472	if Search(r'\b([A-Za-z0-9_]*_)$\1$', line):
				2473	error(filename, linenum, 'runtime/init', 4,
				2474	'You seem to be initializing a member variable with itself.')
				2475
				2476	if file_extension == 'h':
				2477	# TODO(unknown): check that 1-arg constructors are explicit.
				2478	# How to tell it's a constructor?
				2479	# (handled in CheckForNonStandardConstructs for now)
				2480	# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
				2481	# (level 1 error)
				2482	pass
				2483
				2484	# Check if people are using the verboten C basic types. The only exception
				2485	# we regularly allow is "unsigned short port" for port.
				2486	if Search(r'\bshort port\b', line):
				2487	if not Search(r'\bunsigned short port\b', line):
				2488	error(filename, linenum, 'runtime/int', 4,
				2489	'Use "unsigned short" for ports, not "short"')
				2490	else:
				2491	match = Search(r'\b(short\|long(?! +double)\|long long)\b', line)
				2492	if match:
				2493	error(filename, linenum, 'runtime/int', 4,
				2494	'Use int16/int64/etc, rather than the C type %s' % match.group(1))
				2495
				2496	# When snprintf is used, the second argument shouldn't be a literal.
				2497	match = Search(r'snprintf\s\(([^,]),\s([0-9])\s*,', line)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2498	if match and match.group(2) != '0':
				2499	# If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2500	error(filename, linenum, 'runtime/printf', 3,
				2501	'If you can, use sizeof(%s) instead of %s as the 2nd arg '
				2502	'to snprintf.' % (match.group(1), match.group(2)))
				2503
				2504	# Check if some verboten C functions are being used.
				2505	if Search(r'\bsprintf\b', line):
				2506	error(filename, linenum, 'runtime/printf', 5,
				2507	'Never use sprintf. Use snprintf instead.')
				2508	match = Search(r'\b(strcpy\|strcat)\b', line)
				2509	if match:
				2510	error(filename, linenum, 'runtime/printf', 4,
				2511	'Almost always, snprintf is better than %s' % match.group(1))
				2512
				2513	if Search(r'\bsscanf\b', line):
				2514	error(filename, linenum, 'runtime/printf', 1,
				2515	'sscanf can be ok, but is slow and can overflow buffers.')
				2516
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2517	# Check if some verboten operator overloading is going on
				2518	# TODO(unknown): catch out-of-line unary operator&:
				2519	# class X {};
				2520	# int operator&(const X& x) { return 42; } // unary operator&
				2521	# The trick is it's hard to tell apart from binary operator&:
				2522	# class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
				2523	if Search(r'\boperator\s&\s$\s*$', line):
				2524	error(filename, linenum, 'runtime/operator', 4,
				2525	'Unary operator& is dangerous. Do not use it.')
				2526
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2527	# Check for suspicious usage of "if" like
				2528	# } if (a == b) {
				2529	if Search(r'\}\sif\s\(', line):
				2530	error(filename, linenum, 'readability/braces', 4,
				2531	'Did you mean "else if"? If not, start a new line for "if".')
				2532
				2533	# Check for potential format string bugs like printf(foo).
				2534	# We constrain the pattern not to pick things like DocidForPrintf(foo).
				2535	# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
				2536	match = re.search(r'\b((?:string)?printf)\s*$([\w.\->()]+)$', line, re.I)
				2537	if match:
				2538	error(filename, linenum, 'runtime/printf', 4,
				2539	'Potential format string bug. Do %s("%%s", %s) instead.'
				2540	% (match.group(1), match.group(2)))
				2541
				2542	# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
				2543	match = Search(r'memset\s$([^,]),\s([^,]),\s0\s$', line)
				2544	if match and not Match(r"^''\|-?[0-9]+\|0x[0-9A-Fa-f]$", match.group(2)):
				2545	error(filename, linenum, 'runtime/memset', 4,
				2546	'Did you mean "memset(%s, 0, %s)"?'
				2547	% (match.group(1), match.group(2)))
				2548
				2549	if Search(r'\busing namespace\b', line):
				2550	error(filename, linenum, 'build/namespaces', 5,
				2551	'Do not use namespace using-directives. '
				2552	'Use using-declarations instead.')
				2553
				2554	# Detect variable-length arrays.
				2555	match = Match(r'\s(.+::)?(\w+) [a-z]\w\[(.+)];', line)
				2556	if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
				2557	match.group(3).find(']') == -1):
				2558	# Split the size using space and arithmetic operators as delimiters.
				2559	# If any of the resulting tokens are not compile time constants then
				2560	# report the error.
				2561	tokens = re.split(r'\s\|\+\|\-\|\*\|\/\|<<\|>>]', match.group(3))
				2562	is_const = True
				2563	skip_next = False
				2564	for tok in tokens:
				2565	if skip_next:
				2566	skip_next = False
				2567	continue
				2568
				2569	if Search(r'sizeof$.+$', tok): continue
				2570	if Search(r'arraysize$\w+$', tok): continue
				2571
				2572	tok = tok.lstrip('(')
				2573	tok = tok.rstrip(')')
				2574	if not tok: continue
				2575	if Match(r'\d+', tok): continue
				2576	if Match(r'0[xX][0-9a-fA-F]+', tok): continue
				2577	if Match(r'k[A-Z0-9]\w*', tok): continue
				2578	if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
				2579	if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
				2580	# A catch all for tricky sizeof cases, including 'sizeof expression',
				2581	# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
				2582	# requires skipping the next token becasue we split on ' ' and '*'.
				2583	if tok.startswith('sizeof'):
				2584	skip_next = True
				2585	continue
				2586	is_const = False
				2587	break
				2588	if not is_const:
				2589	error(filename, linenum, 'runtime/arrays', 1,
				2590	'Do not use variable-length arrays. Use an appropriately named '
				2591	"('k' followed by CamelCase) compile-time constant for the size.")
				2592
				2593	# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
				2594	# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
				2595	# in the class declaration.
				2596	match = Match(
				2597	(r'\s*'
				2598	r'(DISALLOW_(EVIL_CONSTRUCTORS\|COPY_AND_ASSIGN\|IMPLICIT_CONSTRUCTORS))'
				2599	r'$.*$;$'),
				2600	line)
				2601	if match and linenum + 1 < clean_lines.NumLines():
				2602	next_line = clean_lines.elided[linenum + 1]
				2603	if not Search(r'^\s*};', next_line):
				2604	error(filename, linenum, 'readability/constructors', 3,
				2605	match.group(1) + ' should be the last thing in the class')
				2606
				2607	# Check for use of unnamed namespaces in header files. Registration
				2608	# macros are typically OK, so we allow use of "namespace {" on lines
				2609	# that end with backslashes.
				2610	if (file_extension == 'h'
				2611	and Search(r'\bnamespace\s*{', line)
				2612	and line[-1] != '\\'):
				2613	error(filename, linenum, 'build/namespaces', 4,
				2614	'Do not use unnamed namespaces in header files. See '
				2615	'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
				2616	' for more information.')
				2617
				2618
				2619	def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
				2620	error):
				2621	"""Checks for a C-style cast by looking for the pattern.
				2622
				2623	This also handles sizeof(type) warnings, due to similarity of content.
				2624
				2625	Args:
				2626	filename: The name of the current file.
				2627	linenum: The number of the line to check.
				2628	line: The line of code to check.
				2629	raw_line: The raw line of code to check, with comments.
				2630	cast_type: The string for the C++ cast to recommend. This is either
				2631	reinterpret_cast or static_cast, depending.
				2632	pattern: The regular expression used to find C-style casts.
				2633	error: The function to call with any errors found.
				2634	"""
				2635	match = Search(pattern, line)
				2636	if not match:
				2637	return
				2638
				2639	# e.g., sizeof(int)
				2640	sizeof_match = Match(r'.sizeof\s$', line[0:match.start(1) - 1])
				2641	if sizeof_match:
				2642	error(filename, linenum, 'runtime/sizeof', 1,
				2643	'Using sizeof(type). Use sizeof(varname) instead if possible')
				2644	return
				2645
				2646	remainder = line[match.end(0):]
				2647
				2648	# The close paren is for function pointers as arguments to a function.
				2649	# eg, void foo(void (*bar)(int));
				2650	# The semicolon check is a more basic function check; also possibly a
				2651	# function pointer typedef.
				2652	# eg, void foo(int); or void foo(int) const;
				2653	# The equals check is for function pointer assignment.
				2654	# eg, void (foo)(int) = ...
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	2655	# The > is for MockCallback<...> ...
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2656	#
				2657	# Right now, this will only catch cases where there's a single argument, and
				2658	# it's unnamed. It should probably be expanded to check for multiple
				2659	# arguments with some unnamed.
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	2660	function_match = Match(r'\s(\)\|=\|(const)?\s(;\|\{\|throw\|>))', remainder)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2661	if function_match:
				2662	if (not function_match.group(3) or
				2663	function_match.group(3) == ';' or
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame^]	2664	('MockCallback<' not in raw_line and
				2665	'/*' not in raw_line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2666	error(filename, linenum, 'readability/function', 3,
				2667	'All parameters should be named in a function')
				2668	return
				2669
				2670	# At this point, all that should be left is actual casts.
				2671	error(filename, linenum, 'readability/casting', 4,
				2672	'Using C-style cast. Use %s<%s>(...) instead' %
				2673	(cast_type, match.group(1)))
				2674
				2675
				2676	_HEADERS_CONTAINING_TEMPLATES = (
				2677	('<deque>', ('deque',)),
				2678	('<functional>', ('unary_function', 'binary_function',
				2679	'plus', 'minus', 'multiplies', 'divides', 'modulus',
				2680	'negate',
				2681	'equal_to', 'not_equal_to', 'greater', 'less',
				2682	'greater_equal', 'less_equal',
				2683	'logical_and', 'logical_or', 'logical_not',
				2684	'unary_negate', 'not1', 'binary_negate', 'not2',
				2685	'bind1st', 'bind2nd',
				2686	'pointer_to_unary_function',
				2687	'pointer_to_binary_function',
				2688	'ptr_fun',
				2689	'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
				2690	'mem_fun_ref_t',
				2691	'const_mem_fun_t', 'const_mem_fun1_t',
				2692	'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
				2693	'mem_fun_ref',
				2694	)),
				2695	('<limits>', ('numeric_limits',)),
				2696	('<list>', ('list',)),
				2697	('<map>', ('map', 'multimap',)),
				2698	('<memory>', ('allocator',)),
				2699	('<queue>', ('queue', 'priority_queue',)),
				2700	('<set>', ('set', 'multiset',)),
				2701	('<stack>', ('stack',)),
				2702	('<string>', ('char_traits', 'basic_string',)),
				2703	('<utility>', ('pair',)),
				2704	('<vector>', ('vector',)),
				2705
				2706	# gcc extensions.
				2707	# Note: std::hash is their hash, ::hash is our hash
				2708	('<hash_map>', ('hash_map', 'hash_multimap',)),
				2709	('<hash_set>', ('hash_set', 'hash_multiset',)),
				2710	('<slist>', ('slist',)),
				2711	)
				2712
				2713	_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
				2714	# We can trust with reasonable confidence that map gives us pair<>, too.
				2715	'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
				2716	}
				2717
				2718	_RE_PATTERN_STRING = re.compile(r'\bstring\b')
				2719
				2720	_re_pattern_algorithm_header = []
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2721	for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
				2722	'transform'):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2723	# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
				2724	# type::max().
				2725	_re_pattern_algorithm_header.append(
				2726	(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?$[^$]'),
				2727	_template,
				2728	'<algorithm>'))
				2729
				2730	_re_pattern_templates = []
				2731	for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
				2732	for _template in _templates:
				2733	_re_pattern_templates.append(
				2734	(re.compile(r'(\<\|\b)' + _template + r'\s*\<'),
				2735	_template + '<>',
				2736	_header))
				2737
				2738
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2739	def FilesBelongToSameModule(filename_cc, filename_h):
				2740	"""Check if these two filenames belong to the same module.
				2741
				2742	The concept of a 'module' here is a as follows:
				2743	foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
				2744	same 'module' if they are in the same directory.
				2745	some/path/public/xyzzy and some/path/internal/xyzzy are also considered
				2746	to belong to the same module here.
				2747
				2748	If the filename_cc contains a longer path than the filename_h, for example,
				2749	'/absolute/path/to/base/sysinfo.cc', and this file would include
				2750	'base/sysinfo.h', this function also produces the prefix needed to open the
				2751	header. This is used by the caller of this function to more robustly open the
				2752	header file. We don't have access to the real include paths in this context,
				2753	so we need this guesswork here.
				2754
				2755	Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
				2756	according to this implementation. Because of this, this function gives
				2757	some false positives. This should be sufficiently rare in practice.
				2758
				2759	Args:
				2760	filename_cc: is the path for the .cc file
				2761	filename_h: is the path for the header path
				2762
				2763	Returns:
				2764	Tuple with a bool and a string:
				2765	bool: True if filename_cc and filename_h belong to the same module.
				2766	string: the additional prefix needed to open the header file.
				2767	"""
				2768
				2769	if not filename_cc.endswith('.cc'):
				2770	return (False, '')
				2771	filename_cc = filename_cc[:-len('.cc')]
				2772	if filename_cc.endswith('_unittest'):
				2773	filename_cc = filename_cc[:-len('_unittest')]
				2774	elif filename_cc.endswith('_test'):
				2775	filename_cc = filename_cc[:-len('_test')]
				2776	filename_cc = filename_cc.replace('/public/', '/')
				2777	filename_cc = filename_cc.replace('/internal/', '/')
				2778
				2779	if not filename_h.endswith('.h'):
				2780	return (False, '')
				2781	filename_h = filename_h[:-len('.h')]
				2782	if filename_h.endswith('-inl'):
				2783	filename_h = filename_h[:-len('-inl')]
				2784	filename_h = filename_h.replace('/public/', '/')
				2785	filename_h = filename_h.replace('/internal/', '/')
				2786
				2787	files_belong_to_same_module = filename_cc.endswith(filename_h)
				2788	common_path = ''
				2789	if files_belong_to_same_module:
				2790	common_path = filename_cc[:-len(filename_h)]
				2791	return files_belong_to_same_module, common_path
				2792
				2793
				2794	def UpdateIncludeState(filename, include_state, io=codecs):
				2795	"""Fill up the include_state with new includes found from the file.
				2796
				2797	Args:
				2798	filename: the name of the header to read.
				2799	include_state: an _IncludeState instance in which the headers are inserted.
				2800	io: The io factory to use to read the file. Provided for testability.
				2801
				2802	Returns:
				2803	True if a header was succesfully added. False otherwise.
				2804	"""
				2805	headerfile = None
				2806	try:
				2807	headerfile = io.open(filename, 'r', 'utf8', 'replace')
				2808	except IOError:
				2809	return False
				2810	linenum = 0
				2811	for line in headerfile:
				2812	linenum += 1
				2813	clean_line = CleanseComments(line)
				2814	match = _RE_PATTERN_INCLUDE.search(clean_line)
				2815	if match:
				2816	include = match.group(2)
				2817	# The value formatting is cute, but not really used right now.
				2818	# What matters here is that the key is in include_state.
				2819	include_state.setdefault(include, '%s:%d' % (filename, linenum))
				2820	return True
				2821
				2822
				2823	def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
				2824	io=codecs):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2825	"""Reports for missing stl includes.
				2826
				2827	This function will output warnings to make sure you are including the headers
				2828	necessary for the stl containers and functions that you use. We only give one
				2829	reason to include a header. For example, if you use both equal_to<> and
				2830	less<> in a .h file, only one (the latter in the file) of these will be
				2831	reported as a reason to include the <functional>.
				2832
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2833	Args:
				2834	filename: The name of the current file.
				2835	clean_lines: A CleansedLines instance containing the file.
				2836	include_state: An _IncludeState instance.
				2837	error: The function to call with any errors found.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2838	io: The IO factory to use to read the header file. Provided for unittest
				2839	injection.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2840	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2841	required = {} # A map of header name to linenumber and the template entity.
				2842	# Example of required: { '<functional>': (1219, 'less<>') }
				2843
				2844	for linenum in xrange(clean_lines.NumLines()):
				2845	line = clean_lines.elided[linenum]
				2846	if not line or line[0] == '#':
				2847	continue
				2848
				2849	# String is special -- it is a non-templatized type in STL.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2850	m = _RE_PATTERN_STRING.search(line)
				2851	if m:
				2852	# Don't warn about strings in non-STL namespaces:
				2853	# (We check only the first match per line; good enough.)
				2854	prefix = line[:m.start()]
				2855	if prefix.endswith('std::') or not prefix.endswith('::'):
				2856	required['<string>'] = (linenum, 'string')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2857
				2858	for pattern, template, header in _re_pattern_algorithm_header:
				2859	if pattern.search(line):
				2860	required[header] = (linenum, template)
				2861
				2862	# The following function is just a speed up, no semantics are changed.
				2863	if not '<' in line: # Reduces the cpu time usage by skipping lines.
				2864	continue
				2865
				2866	for pattern, template, header in _re_pattern_templates:
				2867	if pattern.search(line):
				2868	required[header] = (linenum, template)
				2869
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2870	# The policy is that if you #include something in foo.h you don't need to
				2871	# include it again in foo.cc. Here, we will look at possible includes.
				2872	# Let's copy the include_state so it is only messed up within this function.
				2873	include_state = include_state.copy()
				2874
				2875	# Did we find the header for this file (if any) and succesfully load it?
				2876	header_found = False
				2877
				2878	# Use the absolute path so that matching works properly.
				2879	abs_filename = os.path.abspath(filename)
				2880
				2881	# For Emacs's flymake.
				2882	# If cpplint is invoked from Emacs's flymake, a temporary file is generated
				2883	# by flymake and that file name might end with '_flymake.cc'. In that case,
				2884	# restore original file name here so that the corresponding header file can be
				2885	# found.
				2886	# e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
				2887	# instead of 'foo_flymake.h'
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2888	abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2889
				2890	# include_state is modified during iteration, so we iterate over a copy of
				2891	# the keys.
				2892	for header in include_state.keys(): #NOLINT
				2893	(same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
				2894	fullpath = common_path + header
				2895	if same_module and UpdateIncludeState(fullpath, include_state, io):
				2896	header_found = True
				2897
				2898	# If we can't find the header file for a .cc, assume it's because we don't
				2899	# know where to look. In that case we'll give up as we're not sure they
				2900	# didn't include it in the .h file.
				2901	# TODO(unknown): Do a better job of finding .h files so we are confident that
				2902	# not having the .h file means there isn't one.
				2903	if filename.endswith('.cc') and not header_found:
				2904	return
				2905
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2906	# All the lines have been processed, report the errors found.
				2907	for required_header_unstripped in required:
				2908	template = required[required_header_unstripped][1]
				2909	if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
				2910	headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
				2911	if [True for header in headers if header in include_state]:
				2912	continue
				2913	if required_header_unstripped.strip('<>"') not in include_state:
				2914	error(filename, required[required_header_unstripped][0],
				2915	'build/include_what_you_use', 4,
				2916	'Add #include ' + required_header_unstripped + ' for ' + template)
				2917
				2918
				2919	def ProcessLine(filename, file_extension,
				2920	clean_lines, line, include_state, function_state,
				2921	class_state, error):
				2922	"""Processes a single line in the file.
				2923
				2924	Args:
				2925	filename: Filename of the file that is being processed.
				2926	file_extension: The extension (dot not included) of the file.
				2927	clean_lines: An array of strings, each representing a line of the file,
				2928	with comments stripped.
				2929	line: Number of line being processed.
				2930	include_state: An _IncludeState instance in which the headers are inserted.
				2931	function_state: A _FunctionState instance which counts function lines, etc.
				2932	class_state: A _ClassState instance which maintains information about
				2933	the current stack of nested class declarations being parsed.
				2934	error: A callable to which errors are reported, which takes 4 arguments:
				2935	filename, line number, error level, and message
				2936
				2937	"""
				2938	raw_lines = clean_lines.raw_lines
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2939	ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2940	CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2941	CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
				2942	CheckStyle(filename, clean_lines, line, file_extension, error)
				2943	CheckLanguage(filename, clean_lines, line, file_extension, include_state,
				2944	error)
				2945	CheckForNonStandardConstructs(filename, clean_lines, line,
				2946	class_state, error)
				2947	CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	2948	CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2949
				2950
				2951	def ProcessFileData(filename, file_extension, lines, error):
				2952	"""Performs lint checks and reports any errors to the given error function.
				2953
				2954	Args:
				2955	filename: Filename of the file that is being processed.
				2956	file_extension: The extension (dot not included) of the file.
				2957	lines: An array of strings, each representing a line of the file, with the
				2958	last element being empty if the file is termined with a newline.
				2959	error: A callable to which errors are reported, which takes 4 arguments:
				2960	"""
				2961	lines = (['// marker so line numbers and indices both start at 1'] + lines +
				2962	['// marker so line numbers end in a known way'])
				2963
				2964	include_state = _IncludeState()
				2965	function_state = _FunctionState()
				2966	class_state = _ClassState()
				2967
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2968	ResetNolintSuppressions()
				2969
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2970	CheckForCopyright(filename, lines, error)
				2971
				2972	if file_extension == 'h':
				2973	CheckForHeaderGuard(filename, lines, error)
				2974
				2975	RemoveMultiLineComments(filename, lines, error)
				2976	clean_lines = CleansedLines(lines)
				2977	for line in xrange(clean_lines.NumLines()):
				2978	ProcessLine(filename, file_extension, clean_lines, line,
				2979	include_state, function_state, class_state, error)
				2980	class_state.CheckFinished(filename, error)
				2981
				2982	CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
				2983
				2984	# We check here rather than inside ProcessLine so that we see raw
				2985	# lines rather than "cleaned" lines.
				2986	CheckForUnicodeReplacementCharacters(filename, lines, error)
				2987
				2988	CheckForNewlineAtEOF(filename, lines, error)
				2989
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2990	def ProcessFile(filename, vlevel):
				2991	"""Does google-lint on a single file.
				2992
				2993	Args:
				2994	filename: The name of the file to parse.
				2995
				2996	vlevel: The level of errors to report. Every error of confidence
				2997	>= verbose_level will be reported. 0 is a good default.
				2998	"""
				2999
				3000	_SetVerboseLevel(vlevel)
				3001
				3002	try:
				3003	# Support the UNIX convention of using "-" for stdin. Note that
				3004	# we are not opening the file with universal newline support
				3005	# (which codecs doesn't support anyway), so the resulting lines do
				3006	# contain trailing '\r' characters if we are reading a file that
				3007	# has CRLF endings.
				3008	# If after the split a trailing '\r' is present, it is removed
				3009	# below. If it is not expected to be present (i.e. os.linesep !=
				3010	# '\r\n' as in Windows), a warning is issued below if this file
				3011	# is processed.
				3012
				3013	if filename == '-':
				3014	lines = codecs.StreamReaderWriter(sys.stdin,
				3015	codecs.getreader('utf8'),
				3016	codecs.getwriter('utf8'),
				3017	'replace').read().split('\n')
				3018	else:
				3019	lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
				3020
				3021	carriage_return_found = False
				3022	# Remove trailing '\r'.
				3023	for linenum in range(len(lines)):
				3024	if lines[linenum].endswith('\r'):
				3025	lines[linenum] = lines[linenum].rstrip('\r')
				3026	carriage_return_found = True
				3027
				3028	except IOError:
				3029	sys.stderr.write(
				3030	"Skipping input '%s': Can't open for reading\n" % filename)
				3031	return
				3032
				3033	# Note, if no dot is found, this will give the entire filename as the ext.
				3034	file_extension = filename[filename.rfind('.') + 1:]
				3035
				3036	# When reading from stdin, the extension is unknown, so no cpplint tests
				3037	# should rely on the extension.
				3038	if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
				3039	and file_extension != 'cpp'):
				3040	sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
				3041	else:
				3042	ProcessFileData(filename, file_extension, lines, Error)
				3043	if carriage_return_found and os.linesep != '\r\n':
				3044	# Use 0 for linenum since outputing only one error for potentially
				3045	# several lines.
				3046	Error(filename, 0, 'whitespace/newline', 1,
				3047	'One or more unexpected \\r (^M) found;'
				3048	'better to use only a \\n')
				3049
				3050	sys.stderr.write('Done processing %s\n' % filename)
				3051
				3052
				3053	def PrintUsage(message):
				3054	"""Prints a brief usage string and exits, optionally with an error message.
				3055
				3056	Args:
				3057	message: The optional error message.
				3058	"""
				3059	sys.stderr.write(_USAGE)
				3060	if message:
				3061	sys.exit('\nFATAL ERROR: ' + message)
				3062	else:
				3063	sys.exit(1)
				3064
				3065
				3066	def PrintCategories():
				3067	"""Prints a list of all the error-categories used by error messages.
				3068
				3069	These are the categories used to filter messages via --filter.
				3070	"""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	3071	sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3072	sys.exit(0)
				3073
				3074
				3075	def ParseArguments(args):
				3076	"""Parses the command line arguments.
				3077
				3078	This may set the output format and verbosity level as side-effects.
				3079
				3080	Args:
				3081	args: The command line arguments:
				3082
				3083	Returns:
				3084	The list of filenames to lint.
				3085	"""
				3086	try:
				3087	(opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3088	'counting=',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3089	'filter='])
				3090	except getopt.GetoptError:
				3091	PrintUsage('Invalid arguments.')
				3092
				3093	verbosity = _VerboseLevel()
				3094	output_format = _OutputFormat()
				3095	filters = ''
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3096	counting_style = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3097
				3098	for (opt, val) in opts:
				3099	if opt == '--help':
				3100	PrintUsage(None)
				3101	elif opt == '--output':
				3102	if not val in ('emacs', 'vs7'):
				3103	PrintUsage('The only allowed output formats are emacs and vs7.')
				3104	output_format = val
				3105	elif opt == '--verbose':
				3106	verbosity = int(val)
				3107	elif opt == '--filter':
				3108	filters = val
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3109	if not filters:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3110	PrintCategories()
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3111	elif opt == '--counting':
				3112	if val not in ('total', 'toplevel', 'detailed'):
				3113	PrintUsage('Valid counting options are total, toplevel, and detailed')
				3114	counting_style = val
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3115
				3116	if not filenames:
				3117	PrintUsage('No files were specified.')
				3118
				3119	_SetOutputFormat(output_format)
				3120	_SetVerboseLevel(verbosity)
				3121	_SetFilters(filters)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3122	_SetCountingStyle(counting_style)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3123
				3124	return filenames
				3125
				3126
				3127	def main():
				3128	filenames = ParseArguments(sys.argv[1:])
				3129
				3130	# Change stderr to write with replacement characters so we don't die
				3131	# if we try to print something containing non-ASCII characters.
				3132	sys.stderr = codecs.StreamReaderWriter(sys.stderr,
				3133	codecs.getreader('utf8'),
				3134	codecs.getwriter('utf8'),
				3135	'replace')
				3136
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3137	_cpplint_state.ResetErrorCounts()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3138	for filename in filenames:
				3139	ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3140	_cpplint_state.PrintErrorCounts()
				3141
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3142	sys.exit(_cpplint_state.error_count > 0)
				3143
				3144
				3145	if __name__ == '__main__':
				3146	main()