Blame - cpplint/cpplint.py - platform/external/google-styleguide

blob: bc46f09709687637a9da631105c1c564052bb314 [file] [log] [blame]

erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1	#!/usr/bin/python2.4
				2	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame^]	3	# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame^]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	8	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame^]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	18	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame^]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	30
				31	# Here are some issues that I've had people identify in my code during reviews,
				32	# that I think are possible to flag automatically in a lint tool. If these were
				33	# caught by lint, it would save time both for myself and that of my reviewers.
				34	# Most likely, some of these are beyond the scope of the current lint framework,
				35	# but I think it is valuable to retain these wish-list items even if they cannot
				36	# be immediately implemented.
				37	#
				38	# Suggestions
				39	# -----------
				40	# - Check for no 'explicit' for multi-arg ctor
				41	# - Check for boolean assign RHS in parens
				42	# - Check for ctor initializer-list colon position and spacing
				43	# - Check that if there's a ctor, there should be a dtor
				44	# - Check accessors that return non-pointer member variables are
				45	# declared const
				46	# - Check accessors that return non-const pointer member vars are
				47	# not declared const
				48	# - Check for using public includes for testing
				49	# - Check for spaces between brackets in one-line inline method
				50	# - Check for no assert()
				51	# - Check for spaces surrounding operators
				52	# - Check for 0 in pointer context (should be NULL)
				53	# - Check for 0 in char context (should be '\0')
				54	# - Check for camel-case method name conventions for methods
				55	# that are not simple inline getters and setters
				56	# - Check that base classes have virtual destructors
				57	# put " // namespace" after } that closes a namespace, with
				58	# namespace's name after 'namespace' if it is named.
				59	# - Do not indent namespace contents
				60	# - Avoid inlining non-trivial constructors in header files
				61	# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
				62	# - Check for old-school (void) cast for call-sites of functions
				63	# ignored return value
				64	# - Check gUnit usage of anonymous namespace
				65	# - Check for class declaration order (typedefs, consts, enums,
				66	# ctor(s?), dtor, friend declarations, methods, member vars)
				67	#
				68
				69	"""Does google-lint on c++ files.
				70
				71	The goal of this script is to identify places in the code that may
				72	be in non-compliance with google style. It does not attempt to fix
				73	up these problems -- the point is to educate. It does also not
				74	attempt to find all problems, or to ensure that everything it does
				75	find is legitimately a problem.
				76
				77	In particular, we can get very confused by /* and // inside strings!
				78	We do a small hack, which is to ignore //'s with "'s after them on the
				79	same line, but it is far from perfect (in either direction).
				80	"""
				81
				82	import codecs
				83	import getopt
				84	import math # for log
				85	import os
				86	import re
				87	import sre_compile
				88	import string
				89	import sys
				90	import unicodedata
				91
				92
				93	_USAGE = """
				94	Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
				95	<file> [file] ...
				96
				97	The style guidelines this tries to follow are those in
				98	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
				99
				100	Every problem is given a confidence score from 1-5, with 5 meaning we are
				101	certain of the problem, and 1 meaning it could be a legitimate construct.
				102	This will miss some errors, and is not a substitute for a code review.
				103
				104	To prevent specific lines from being linted, add a '// NOLINT' comment to the
				105	end of the line.
				106
				107	The files passed in will be linted; at least one file must be provided.
				108	Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
				109
				110	Flags:
				111
				112	output=vs7
				113	By default, the output is formatted to ease emacs parsing. Visual Studio
				114	compatible output (vs7) may also be used. Other formats are unsupported.
				115
				116	verbose=#
				117	Specify a number 0-5 to restrict errors to certain verbosity levels.
				118
				119	filter=-x,+y,...
				120	Specify a comma-separated list of category-filters to apply: only
				121	error messages whose category names pass the filters will be printed.
				122	(Category names are printed with the message and look like
				123	"[whitespace/indent]".) Filters are evaluated left to right.
				124	"-FOO" and "FOO" means "do not print categories that start with FOO".
				125	"+FOO" means "do print categories that start with FOO".
				126
				127	Examples: --filter=-whitespace,+whitespace/braces
				128	--filter=whitespace,runtime/printf,+runtime/printf_format
				129	--filter=-,+build/include_what_you_use
				130
				131	To see a list of all the categories used in cpplint, pass no arg:
				132	--filter=
				133	"""
				134
				135	# We categorize each error message we print. Here are the categories.
				136	# We want an explicit list so we can list them all in cpplint --filter=.
				137	# If you add a new error message with a new category, add it to the list
				138	# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	139	# \ used for clearer layout -- pylint: disable-msg=C6013
				140	_ERROR_CATEGORIES = '''\
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	141	build/class
				142	build/deprecated
				143	build/endif_comment
				144	build/forward_decl
				145	build/header_guard
				146	build/include
				147	build/include_order
				148	build/include_what_you_use
				149	build/namespaces
				150	build/printf_format
				151	build/storage_class
				152	legal/copyright
				153	readability/braces
				154	readability/casting
				155	readability/check
				156	readability/constructors
				157	readability/fn_size
				158	readability/function
				159	readability/multiline_comment
				160	readability/multiline_string
				161	readability/streams
				162	readability/todo
				163	readability/utf8
				164	runtime/arrays
				165	runtime/casting
				166	runtime/explicit
				167	runtime/int
				168	runtime/init
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	169	runtime/invalid_increment
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	170	runtime/memset
				171	runtime/printf
				172	runtime/printf_format
				173	runtime/references
				174	runtime/rtti
				175	runtime/sizeof
				176	runtime/string
				177	runtime/threadsafe_fn
				178	runtime/virtual
				179	whitespace/blank_line
				180	whitespace/braces
				181	whitespace/comma
				182	whitespace/comments
				183	whitespace/end_of_line
				184	whitespace/ending_newline
				185	whitespace/indent
				186	whitespace/labels
				187	whitespace/line_length
				188	whitespace/newline
				189	whitespace/operators
				190	whitespace/parens
				191	whitespace/semicolon
				192	whitespace/tab
				193	whitespace/todo
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	194	'''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	195
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	196	# The default state of the category filter. This is overrided by the --filter=
				197	# flag. By default all errors are on, so only add here categories that should be
				198	# off by default (i.e., categories that must be enabled by the --filter= flags).
				199	# All entries here should start with a '-' or '+', as in the --filter= flag.
				200	_DEFAULT_FILTERS = []
				201
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	202	# We used to check for high-bit characters, but after much discussion we
				203	# decided those were OK, as long as they were in UTF-8 and didn't represent
				204	# hard-coded international strings, which belong in a seperate i18n file.
				205
				206	# Headers that we consider STL headers.
				207	_STL_HEADERS = frozenset([
				208	'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
				209	'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
				210	'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
				211	'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
				212	'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
				213	'utility', 'vector', 'vector.h',
				214	])
				215
				216
				217	# Non-STL C++ system headers.
				218	_CPP_HEADERS = frozenset([
				219	'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
				220	'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
				221	'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
				222	'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
				223	'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
				224	'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
				225	'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
				226	'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
				227	'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
				228	'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
				229	'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
				230	'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
				231	'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
				232	])
				233
				234
				235	# Assertion macros. These are defined in base/logging.h and
				236	# testing/base/gunit.h. Note that the _M versions need to come first
				237	# for substring matching to work.
				238	_CHECK_MACROS = [
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	239	'DCHECK', 'CHECK',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	240	'EXPECT_TRUE_M', 'EXPECT_TRUE',
				241	'ASSERT_TRUE_M', 'ASSERT_TRUE',
				242	'EXPECT_FALSE_M', 'EXPECT_FALSE',
				243	'ASSERT_FALSE_M', 'ASSERT_FALSE',
				244	]
				245
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	246	# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	247	_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
				248
				249	for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
				250	('>=', 'GE'), ('>', 'GT'),
				251	('<=', 'LE'), ('<', 'LT')]:
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	252	_CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	253	_CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
				254	_CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
				255	_CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
				256	_CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
				257	_CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
				258
				259	for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
				260	('>=', 'LT'), ('>', 'LE'),
				261	('<=', 'GT'), ('<', 'GE')]:
				262	_CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
				263	_CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
				264	_CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
				265	_CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
				266
				267
				268	# These constants define types of headers for use with
				269	# _IncludeState.CheckNextIncludeOrder().
				270	_C_SYS_HEADER = 1
				271	_CPP_SYS_HEADER = 2
				272	_LIKELY_MY_HEADER = 3
				273	_POSSIBLE_MY_HEADER = 4
				274	_OTHER_HEADER = 5
				275
				276
				277	_regexp_compile_cache = {}
				278
				279
				280	def Match(pattern, s):
				281	"""Matches the string with the pattern, caching the compiled regexp."""
				282	# The regexp compilation caching is inlined in both Match and Search for
				283	# performance reasons; factoring it out into a separate function turns out
				284	# to be noticeably expensive.
				285	if not pattern in _regexp_compile_cache:
				286	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				287	return _regexp_compile_cache[pattern].match(s)
				288
				289
				290	def Search(pattern, s):
				291	"""Searches the string for the pattern, caching the compiled regexp."""
				292	if not pattern in _regexp_compile_cache:
				293	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				294	return _regexp_compile_cache[pattern].search(s)
				295
				296
				297	class _IncludeState(dict):
				298	"""Tracks line numbers for includes, and the order in which includes appear.
				299
				300	As a dict, an _IncludeState object serves as a mapping between include
				301	filename and line number on which that file was included.
				302
				303	Call CheckNextIncludeOrder() once for each header in the file, passing
				304	in the type constants defined above. Calls in an illegal order will
				305	raise an _IncludeError with an appropriate error message.
				306
				307	"""
				308	# self._section will move monotonically through this set. If it ever
				309	# needs to move backwards, CheckNextIncludeOrder will raise an error.
				310	_INITIAL_SECTION = 0
				311	_MY_H_SECTION = 1
				312	_C_SECTION = 2
				313	_CPP_SECTION = 3
				314	_OTHER_H_SECTION = 4
				315
				316	_TYPE_NAMES = {
				317	_C_SYS_HEADER: 'C system header',
				318	_CPP_SYS_HEADER: 'C++ system header',
				319	_LIKELY_MY_HEADER: 'header this file implements',
				320	_POSSIBLE_MY_HEADER: 'header this file may implement',
				321	_OTHER_HEADER: 'other header',
				322	}
				323	_SECTION_NAMES = {
				324	_INITIAL_SECTION: "... nothing. (This can't be an error.)",
				325	_MY_H_SECTION: 'a header this file implements',
				326	_C_SECTION: 'C system header',
				327	_CPP_SECTION: 'C++ system header',
				328	_OTHER_H_SECTION: 'other header',
				329	}
				330
				331	def __init__(self):
				332	dict.__init__(self)
				333	self._section = self._INITIAL_SECTION
				334
				335	def CheckNextIncludeOrder(self, header_type):
				336	"""Returns a non-empty error message if the next header is out of order.
				337
				338	This function also updates the internal state to be ready to check
				339	the next include.
				340
				341	Args:
				342	header_type: One of the _XXX_HEADER constants defined above.
				343
				344	Returns:
				345	The empty string if the header is in the right order, or an
				346	error message describing what's wrong.
				347
				348	"""
				349	error_message = ('Found %s after %s' %
				350	(self._TYPE_NAMES[header_type],
				351	self._SECTION_NAMES[self._section]))
				352
				353	if header_type == _C_SYS_HEADER:
				354	if self._section <= self._C_SECTION:
				355	self._section = self._C_SECTION
				356	else:
				357	return error_message
				358	elif header_type == _CPP_SYS_HEADER:
				359	if self._section <= self._CPP_SECTION:
				360	self._section = self._CPP_SECTION
				361	else:
				362	return error_message
				363	elif header_type == _LIKELY_MY_HEADER:
				364	if self._section <= self._MY_H_SECTION:
				365	self._section = self._MY_H_SECTION
				366	else:
				367	self._section = self._OTHER_H_SECTION
				368	elif header_type == _POSSIBLE_MY_HEADER:
				369	if self._section <= self._MY_H_SECTION:
				370	self._section = self._MY_H_SECTION
				371	else:
				372	# This will always be the fallback because we're not sure
				373	# enough that the header is associated with this file.
				374	self._section = self._OTHER_H_SECTION
				375	else:
				376	assert header_type == _OTHER_HEADER
				377	self._section = self._OTHER_H_SECTION
				378
				379	return ''
				380
				381
				382	class _CppLintState(object):
				383	"""Maintains module-wide state.."""
				384
				385	def __init__(self):
				386	self.verbose_level = 1 # global setting.
				387	self.error_count = 0 # global count of reported errors
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	388	# filters to apply when emitting error messages
				389	self.filters = _DEFAULT_FILTERS[:]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	390
				391	# output format:
				392	# "emacs" - format that emacs can parse (default)
				393	# "vs7" - format that Microsoft Visual Studio 7 can parse
				394	self.output_format = 'emacs'
				395
				396	def SetOutputFormat(self, output_format):
				397	"""Sets the output format for errors."""
				398	self.output_format = output_format
				399
				400	def SetVerboseLevel(self, level):
				401	"""Sets the module's verbosity, and returns the previous setting."""
				402	last_verbose_level = self.verbose_level
				403	self.verbose_level = level
				404	return last_verbose_level
				405
				406	def SetFilters(self, filters):
				407	"""Sets the error-message filters.
				408
				409	These filters are applied when deciding whether to emit a given
				410	error message.
				411
				412	Args:
				413	filters: A string of comma-separated filters (eg "+whitespace/indent").
				414	Each filter should start with + or -; else we die.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	415
				416	Raises:
				417	ValueError: The comma-separated filters did not all start with '+' or '-'.
				418	E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	419	"""
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	420	# Default filters always have less priority than the flag ones.
				421	self.filters = _DEFAULT_FILTERS[:]
				422	for filt in filters.split(','):
				423	clean_filt = filt.strip()
				424	if clean_filt:
				425	self.filters.append(clean_filt)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	426	for filt in self.filters:
				427	if not (filt.startswith('+') or filt.startswith('-')):
				428	raise ValueError('Every filter in --filters must start with + or -'
				429	' (%s does not)' % filt)
				430
				431	def ResetErrorCount(self):
				432	"""Sets the module's error statistic back to zero."""
				433	self.error_count = 0
				434
				435	def IncrementErrorCount(self):
				436	"""Bumps the module's error statistic."""
				437	self.error_count += 1
				438
				439
				440	_cpplint_state = _CppLintState()
				441
				442
				443	def _OutputFormat():
				444	"""Gets the module's output format."""
				445	return _cpplint_state.output_format
				446
				447
				448	def _SetOutputFormat(output_format):
				449	"""Sets the module's output format."""
				450	_cpplint_state.SetOutputFormat(output_format)
				451
				452
				453	def _VerboseLevel():
				454	"""Returns the module's verbosity setting."""
				455	return _cpplint_state.verbose_level
				456
				457
				458	def _SetVerboseLevel(level):
				459	"""Sets the module's verbosity, and returns the previous setting."""
				460	return _cpplint_state.SetVerboseLevel(level)
				461
				462
				463	def _Filters():
				464	"""Returns the module's list of output filters, as a list."""
				465	return _cpplint_state.filters
				466
				467
				468	def _SetFilters(filters):
				469	"""Sets the module's error-message filters.
				470
				471	These filters are applied when deciding whether to emit a given
				472	error message.
				473
				474	Args:
				475	filters: A string of comma-separated filters (eg "whitespace/indent").
				476	Each filter should start with + or -; else we die.
				477	"""
				478	_cpplint_state.SetFilters(filters)
				479
				480
				481	class _FunctionState(object):
				482	"""Tracks current function name and the number of lines in its body."""
				483
				484	_NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
				485	_TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
				486
				487	def __init__(self):
				488	self.in_a_function = False
				489	self.lines_in_function = 0
				490	self.current_function = ''
				491
				492	def Begin(self, function_name):
				493	"""Start analyzing function body.
				494
				495	Args:
				496	function_name: The name of the function being tracked.
				497	"""
				498	self.in_a_function = True
				499	self.lines_in_function = 0
				500	self.current_function = function_name
				501
				502	def Count(self):
				503	"""Count line in current function body."""
				504	if self.in_a_function:
				505	self.lines_in_function += 1
				506
				507	def Check(self, error, filename, linenum):
				508	"""Report if too many lines in function body.
				509
				510	Args:
				511	error: The function to call with any errors found.
				512	filename: The name of the current file.
				513	linenum: The number of the line to check.
				514	"""
				515	if Match(r'T(EST\|est)', self.current_function):
				516	base_trigger = self._TEST_TRIGGER
				517	else:
				518	base_trigger = self._NORMAL_TRIGGER
				519	trigger = base_trigger * 2**_VerboseLevel()
				520
				521	if self.lines_in_function > trigger:
				522	error_level = int(math.log(self.lines_in_function / base_trigger, 2))
				523	# 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
				524	if error_level > 5:
				525	error_level = 5
				526	error(filename, linenum, 'readability/fn_size', error_level,
				527	'Small and focused functions are preferred:'
				528	' %s has %d non-comment lines'
				529	' (error triggered by exceeding %d lines).' % (
				530	self.current_function, self.lines_in_function, trigger))
				531
				532	def End(self):
				533	"""Stop analizing function body."""
				534	self.in_a_function = False
				535
				536
				537	class _IncludeError(Exception):
				538	"""Indicates a problem with the include order in a file."""
				539	pass
				540
				541
				542	class FileInfo:
				543	"""Provides utility functions for filenames.
				544
				545	FileInfo provides easy access to the components of a file's path
				546	relative to the project root.
				547	"""
				548
				549	def __init__(self, filename):
				550	self._filename = filename
				551
				552	def FullName(self):
				553	"""Make Windows paths like Unix."""
				554	return os.path.abspath(self._filename).replace('\\', '/')
				555
				556	def RepositoryName(self):
				557	"""FullName after removing the local path to the repository.
				558
				559	If we have a real absolute path name here we can try to do something smart:
				560	detecting the root of the checkout and truncating /path/to/checkout from
				561	the name so that we get header guards that don't include things like
				562	"C:\Documents and Settings\..." or "/home/username/..." in them and thus
				563	people on different computers who have checked the source out to different
				564	locations won't see bogus errors.
				565	"""
				566	fullname = self.FullName()
				567
				568	if os.path.exists(fullname):
				569	project_dir = os.path.dirname(fullname)
				570
				571	if os.path.exists(os.path.join(project_dir, ".svn")):
				572	# If there's a .svn file in the current directory, we recursively look
				573	# up the directory tree for the top of the SVN checkout
				574	root_dir = project_dir
				575	one_up_dir = os.path.dirname(root_dir)
				576	while os.path.exists(os.path.join(one_up_dir, ".svn")):
				577	root_dir = os.path.dirname(root_dir)
				578	one_up_dir = os.path.dirname(one_up_dir)
				579
				580	prefix = os.path.commonprefix([root_dir, project_dir])
				581	return fullname[len(prefix) + 1:]
				582
				583	# Not SVN? Try to find a git top level directory by searching up from the
				584	# current path.
				585	root_dir = os.path.dirname(fullname)
				586	while (root_dir != os.path.dirname(root_dir) and
				587	not os.path.exists(os.path.join(root_dir, ".git"))):
				588	root_dir = os.path.dirname(root_dir)
				589	if os.path.exists(os.path.join(root_dir, ".git")):
				590	prefix = os.path.commonprefix([root_dir, project_dir])
				591	return fullname[len(prefix) + 1:]
				592
				593	# Don't know what to do; header guard warnings may be wrong...
				594	return fullname
				595
				596	def Split(self):
				597	"""Splits the file into the directory, basename, and extension.
				598
				599	For 'chrome/browser/browser.cc', Split() would
				600	return ('chrome/browser', 'browser', '.cc')
				601
				602	Returns:
				603	A tuple of (directory, basename, extension).
				604	"""
				605
				606	googlename = self.RepositoryName()
				607	project, rest = os.path.split(googlename)
				608	return (project,) + os.path.splitext(rest)
				609
				610	def BaseName(self):
				611	"""File base name - text after the final slash, before the final period."""
				612	return self.Split()[1]
				613
				614	def Extension(self):
				615	"""File extension - text following the final period."""
				616	return self.Split()[2]
				617
				618	def NoExtension(self):
				619	"""File has no source file extension."""
				620	return '/'.join(self.Split()[0:2])
				621
				622	def IsSource(self):
				623	"""File has a source file extension."""
				624	return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
				625
				626
				627	def _ShouldPrintError(category, confidence):
				628	"""Returns true iff confidence >= verbose, and category passes filter."""
				629	# There are two ways we might decide not to print an error message:
				630	# the verbosity level isn't high enough, or the filters filter it out.
				631	if confidence < _cpplint_state.verbose_level:
				632	return False
				633
				634	is_filtered = False
				635	for one_filter in _Filters():
				636	if one_filter.startswith('-'):
				637	if category.startswith(one_filter[1:]):
				638	is_filtered = True
				639	elif one_filter.startswith('+'):
				640	if category.startswith(one_filter[1:]):
				641	is_filtered = False
				642	else:
				643	assert False # should have been checked for in SetFilter.
				644	if is_filtered:
				645	return False
				646
				647	return True
				648
				649
				650	def Error(filename, linenum, category, confidence, message):
				651	"""Logs the fact we've found a lint error.
				652
				653	We log where the error was found, and also our confidence in the error,
				654	that is, how certain we are this is a legitimate style regression, and
				655	not a misidentification or a use that's sometimes justified.
				656
				657	Args:
				658	filename: The name of the file containing the error.
				659	linenum: The number of the line containing the error.
				660	category: A string used to describe the "category" this bug
				661	falls under: "whitespace", say, or "runtime". Categories
				662	may have a hierarchy separated by slashes: "whitespace/indent".
				663	confidence: A number from 1-5 representing a confidence score for
				664	the error, with 5 meaning that we are certain of the problem,
				665	and 1 meaning that it could be a legitimate construct.
				666	message: The error message.
				667	"""
				668	# There are two ways we might decide not to print an error message:
				669	# the verbosity level isn't high enough, or the filters filter it out.
				670	if _ShouldPrintError(category, confidence):
				671	_cpplint_state.IncrementErrorCount()
				672	if _cpplint_state.output_format == 'vs7':
				673	sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
				674	filename, linenum, message, category, confidence))
				675	else:
				676	sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
				677	filename, linenum, message, category, confidence))
				678
				679
				680	# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
				681	_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
				682	r'\\([abfnrtv?"\\\']\|\d+\|x[0-9a-fA-F]+)')
				683	# Matches strings. Escape codes should already be removed by ESCAPES.
				684	_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
				685	# Matches characters. Escape codes should already be removed by ESCAPES.
				686	_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
				687	# Matches multi-line C++ comments.
				688	# This RE is a little bit more complicated than one might expect, because we
				689	# have to take care of space removals tools so we can handle comments inside
				690	# statements better.
				691	# The current rule is: We only clear spaces from both sides when we're at the
				692	# end of the line. Otherwise, we try to remove spaces from the right side,
				693	# if this doesn't work we try on left side but only if there's a non-character
				694	# on the right.
				695	_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
				696	r"""(\s/\.\/\s*$\|
				697	/\.\*/\s+\|
				698	\s+/\.\*/(?=\W)\|
				699	/\.\*/)""", re.VERBOSE)
				700
				701
				702	def IsCppString(line):
				703	"""Does line terminate so, that the next symbol is in string constant.
				704
				705	This function does not consider single-line nor multi-line comments.
				706
				707	Args:
				708	line: is a partial line of code starting from the 0..n.
				709
				710	Returns:
				711	True, if next character appended to 'line' is inside a
				712	string constant.
				713	"""
				714
				715	line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
				716	return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
				717
				718
				719	def FindNextMultiLineCommentStart(lines, lineix):
				720	"""Find the beginning marker for a multiline comment."""
				721	while lineix < len(lines):
				722	if lines[lineix].strip().startswith('/*'):
				723	# Only return this marker if the comment goes beyond this line
				724	if lines[lineix].strip().find('*/', 2) < 0:
				725	return lineix
				726	lineix += 1
				727	return len(lines)
				728
				729
				730	def FindNextMultiLineCommentEnd(lines, lineix):
				731	"""We are inside a comment, find the end marker."""
				732	while lineix < len(lines):
				733	if lines[lineix].strip().endswith('*/'):
				734	return lineix
				735	lineix += 1
				736	return len(lines)
				737
				738
				739	def RemoveMultiLineCommentsFromRange(lines, begin, end):
				740	"""Clears a range of lines for multi-line comments."""
				741	# Having // dummy comments makes the lines non-empty, so we will not get
				742	# unnecessary blank line warnings later in the code.
				743	for i in range(begin, end):
				744	lines[i] = '// dummy'
				745
				746
				747	def RemoveMultiLineComments(filename, lines, error):
				748	"""Removes multiline (c-style) comments from lines."""
				749	lineix = 0
				750	while lineix < len(lines):
				751	lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
				752	if lineix_begin >= len(lines):
				753	return
				754	lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
				755	if lineix_end >= len(lines):
				756	error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
				757	'Could not find end of multi-line comment')
				758	return
				759	RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
				760	lineix = lineix_end + 1
				761
				762
				763	def CleanseComments(line):
				764	"""Removes //-comments and single-line C-style /* */ comments.
				765
				766	Args:
				767	line: A line of C++ source.
				768
				769	Returns:
				770	The line with single-line comments removed.
				771	"""
				772	commentpos = line.find('//')
				773	if commentpos != -1 and not IsCppString(line[:commentpos]):
				774	line = line[:commentpos]
				775	# get rid of /* ... */
				776	return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
				777
				778
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	779	class CleansedLines(object):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	780	"""Holds 3 copies of all lines with different preprocessing applied to them.
				781
				782	1) elided member contains lines without strings and comments,
				783	2) lines member contains lines without comments, and
				784	3) raw member contains all the lines without processing.
				785	All these three members are of <type 'list'>, and of the same length.
				786	"""
				787
				788	def __init__(self, lines):
				789	self.elided = []
				790	self.lines = []
				791	self.raw_lines = lines
				792	self.num_lines = len(lines)
				793	for linenum in range(len(lines)):
				794	self.lines.append(CleanseComments(lines[linenum]))
				795	elided = self._CollapseStrings(lines[linenum])
				796	self.elided.append(CleanseComments(elided))
				797
				798	def NumLines(self):
				799	"""Returns the number of lines represented."""
				800	return self.num_lines
				801
				802	@staticmethod
				803	def _CollapseStrings(elided):
				804	"""Collapses strings and chars on a line to simple "" or '' blocks.
				805
				806	We nix strings first so we're not fooled by text like '"http://"'
				807
				808	Args:
				809	elided: The line being processed.
				810
				811	Returns:
				812	The line with collapsed strings.
				813	"""
				814	if not _RE_PATTERN_INCLUDE.match(elided):
				815	# Remove escaped characters first to make quote/single quote collapsing
				816	# basic. Things that look like escaped characters shouldn't occur
				817	# outside of strings and chars.
				818	elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
				819	elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
				820	elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
				821	return elided
				822
				823
				824	def CloseExpression(clean_lines, linenum, pos):
				825	"""If input points to ( or { or [, finds the position that closes it.
				826
				827	If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
				828	linenum/pos that correspond to the closing of the expression.
				829
				830	Args:
				831	clean_lines: A CleansedLines instance containing the file.
				832	linenum: The number of the line to check.
				833	pos: A position on the line.
				834
				835	Returns:
				836	A tuple (line, linenum, pos) pointer past the closing brace, or
				837	(line, len(lines), -1) if we never find a close. Note we ignore
				838	strings and comments when matching; and the line we return is the
				839	'cleansed' line at linenum.
				840	"""
				841
				842	line = clean_lines.elided[linenum]
				843	startchar = line[pos]
				844	if startchar not in '({[':
				845	return (line, clean_lines.NumLines(), -1)
				846	if startchar == '(': endchar = ')'
				847	if startchar == '[': endchar = ']'
				848	if startchar == '{': endchar = '}'
				849
				850	num_open = line.count(startchar) - line.count(endchar)
				851	while linenum < clean_lines.NumLines() and num_open > 0:
				852	linenum += 1
				853	line = clean_lines.elided[linenum]
				854	num_open += line.count(startchar) - line.count(endchar)
				855	# OK, now find the endchar that actually got us back to even
				856	endpos = len(line)
				857	while num_open >= 0:
				858	endpos = line.rfind(')', 0, endpos)
				859	num_open -= 1 # chopped off another )
				860	return (line, linenum, endpos + 1)
				861
				862
				863	def CheckForCopyright(filename, lines, error):
				864	"""Logs an error if no Copyright message appears at the top of the file."""
				865
				866	# We'll say it should occur by line 10. Don't forget there's a
				867	# dummy line at the front.
				868	for line in xrange(1, min(len(lines), 11)):
				869	if re.search(r'Copyright', lines[line], re.I): break
				870	else: # means no copyright line was found
				871	error(filename, 0, 'legal/copyright', 5,
				872	'No copyright message found. '
				873	'You should have a line: "Copyright [year] <Copyright Owner>"')
				874
				875
				876	def GetHeaderGuardCPPVariable(filename):
				877	"""Returns the CPP variable that should be used as a header guard.
				878
				879	Args:
				880	filename: The name of a C++ header file.
				881
				882	Returns:
				883	The CPP variable that should be used as a header guard in the
				884	named file.
				885
				886	"""
				887
				888	fileinfo = FileInfo(filename)
				889	return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
				890
				891
				892	def CheckForHeaderGuard(filename, lines, error):
				893	"""Checks that the file contains a header guard.
				894
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	895	Logs an error if no #ifndef header guard is present. For other
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	896	headers, checks that the full pathname is used.
				897
				898	Args:
				899	filename: The name of the C++ header file.
				900	lines: An array of strings, each representing a line of the file.
				901	error: The function to call with any errors found.
				902	"""
				903
				904	cppvar = GetHeaderGuardCPPVariable(filename)
				905
				906	ifndef = None
				907	ifndef_linenum = 0
				908	define = None
				909	endif = None
				910	endif_linenum = 0
				911	for linenum, line in enumerate(lines):
				912	linesplit = line.split()
				913	if len(linesplit) >= 2:
				914	# find the first occurrence of #ifndef and #define, save arg
				915	if not ifndef and linesplit[0] == '#ifndef':
				916	# set ifndef to the header guard presented on the #ifndef line.
				917	ifndef = linesplit[1]
				918	ifndef_linenum = linenum
				919	if not define and linesplit[0] == '#define':
				920	define = linesplit[1]
				921	# find the last occurrence of #endif, save entire line
				922	if line.startswith('#endif'):
				923	endif = line
				924	endif_linenum = linenum
				925
				926	if not ifndef or not define or ifndef != define:
				927	error(filename, 0, 'build/header_guard', 5,
				928	'No #ifndef header guard found, suggested CPP variable is: %s' %
				929	cppvar)
				930	return
				931
				932	# The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
				933	# for backward compatibility.
				934	if ifndef != cppvar:
				935	error_level = 0
				936	if ifndef != cppvar + '_':
				937	error_level = 5
				938
				939	error(filename, ifndef_linenum, 'build/header_guard', error_level,
				940	'#ifndef header guard has wrong style, please use: %s' % cppvar)
				941
				942	if endif != ('#endif // %s' % cppvar):
				943	error_level = 0
				944	if endif != ('#endif // %s' % (cppvar + '_')):
				945	error_level = 5
				946
				947	error(filename, endif_linenum, 'build/header_guard', error_level,
				948	'#endif line should be "#endif // %s"' % cppvar)
				949
				950
				951	def CheckForUnicodeReplacementCharacters(filename, lines, error):
				952	"""Logs an error for each line containing Unicode replacement characters.
				953
				954	These indicate that either the file contained invalid UTF-8 (likely)
				955	or Unicode replacement characters (which it shouldn't). Note that
				956	it's possible for this to throw off line numbering if the invalid
				957	UTF-8 occurred adjacent to a newline.
				958
				959	Args:
				960	filename: The name of the current file.
				961	lines: An array of strings, each representing a line of the file.
				962	error: The function to call with any errors found.
				963	"""
				964	for linenum, line in enumerate(lines):
				965	if u'\ufffd' in line:
				966	error(filename, linenum, 'readability/utf8', 5,
				967	'Line contains invalid UTF-8 (or Unicode replacement character).')
				968
				969
				970	def CheckForNewlineAtEOF(filename, lines, error):
				971	"""Logs an error if there is no newline char at the end of the file.
				972
				973	Args:
				974	filename: The name of the current file.
				975	lines: An array of strings, each representing a line of the file.
				976	error: The function to call with any errors found.
				977	"""
				978
				979	# The array lines() was created by adding two newlines to the
				980	# original file (go figure), then splitting on \n.
				981	# To verify that the file ends in \n, we just have to make sure the
				982	# last-but-two element of lines() exists and is empty.
				983	if len(lines) < 3 or lines[-2]:
				984	error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
				985	'Could not find a newline character at the end of the file.')
				986
				987
				988	def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
				989	"""Logs an error if we see /* ... */ or "..." that extend past one line.
				990
				991	/* ... */ comments are legit inside macros, for one line.
				992	Otherwise, we prefer // comments, so it's ok to warn about the
				993	other. Likewise, it's ok for strings to extend across multiple
				994	lines, as long as a line continuation character (backslash)
				995	terminates each line. Although not currently prohibited by the C++
				996	style guide, it's ugly and unnecessary. We don't do well with either
				997	in this lint program, so we warn about both.
				998
				999	Args:
				1000	filename: The name of the current file.
				1001	clean_lines: A CleansedLines instance containing the file.
				1002	linenum: The number of the line to check.
				1003	error: The function to call with any errors found.
				1004	"""
				1005	line = clean_lines.elided[linenum]
				1006
				1007	# Remove all \\ (escaped backslashes) from the line. They are OK, and the
				1008	# second (escaped) slash may trigger later \" detection erroneously.
				1009	line = line.replace('\\\\', '')
				1010
				1011	if line.count('/') > line.count('/'):
				1012	error(filename, linenum, 'readability/multiline_comment', 5,
				1013	'Complex multi-line /.../-style comment found. '
				1014	'Lint may give bogus warnings. '
				1015	'Consider replacing these with //-style comments, '
				1016	'with #if 0...#endif, '
				1017	'or with more clearly structured multi-line comments.')
				1018
				1019	if (line.count('"') - line.count('\\"')) % 2:
				1020	error(filename, linenum, 'readability/multiline_string', 5,
				1021	'Multi-line string ("...") found. This lint script doesn\'t '
				1022	'do well with such strings, and may give bogus warnings. They\'re '
				1023	'ugly and unnecessary, and you should use concatenation instead".')
				1024
				1025
				1026	threading_list = (
				1027	('asctime(', 'asctime_r('),
				1028	('ctime(', 'ctime_r('),
				1029	('getgrgid(', 'getgrgid_r('),
				1030	('getgrnam(', 'getgrnam_r('),
				1031	('getlogin(', 'getlogin_r('),
				1032	('getpwnam(', 'getpwnam_r('),
				1033	('getpwuid(', 'getpwuid_r('),
				1034	('gmtime(', 'gmtime_r('),
				1035	('localtime(', 'localtime_r('),
				1036	('rand(', 'rand_r('),
				1037	('readdir(', 'readdir_r('),
				1038	('strtok(', 'strtok_r('),
				1039	('ttyname(', 'ttyname_r('),
				1040	)
				1041
				1042
				1043	def CheckPosixThreading(filename, clean_lines, linenum, error):
				1044	"""Checks for calls to thread-unsafe functions.
				1045
				1046	Much code has been originally written without consideration of
				1047	multi-threading. Also, engineers are relying on their old experience;
				1048	they have learned posix before threading extensions were added. These
				1049	tests guide the engineers to use thread-safe functions (when using
				1050	posix directly).
				1051
				1052	Args:
				1053	filename: The name of the current file.
				1054	clean_lines: A CleansedLines instance containing the file.
				1055	linenum: The number of the line to check.
				1056	error: The function to call with any errors found.
				1057	"""
				1058	line = clean_lines.elided[linenum]
				1059	for single_thread_function, multithread_safe_function in threading_list:
				1060	ix = line.find(single_thread_function)
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1061	# Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1062	if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
				1063	line[ix - 1] not in ('_', '.', '>'))):
				1064	error(filename, linenum, 'runtime/threadsafe_fn', 2,
				1065	'Consider using ' + multithread_safe_function +
				1066	'...) instead of ' + single_thread_function +
				1067	'...) for improved thread safety.')
				1068
				1069
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1070	# Matches invalid increment: *count++, which moves pointer insead of
				1071	# incrementing a value.
				1072	_RE_PATTERN_IVALID_INCREMENT = re.compile(
				1073	r'^\s\\w+(\+\+\|--);')
				1074
				1075
				1076	def CheckInvalidIncrement(filename, clean_lines, linenum, error):
				1077	"""Checks for invalud increment *count++.
				1078
				1079	For example following function:
				1080	void increment_counter(int* count) {
				1081	*count++;
				1082	}
				1083	is invalid, because it effectively does count++, moving pointer, and should
				1084	be replaced with ++count, (count)++ or *count += 1.
				1085
				1086	Args:
				1087	filename: The name of the current file.
				1088	clean_lines: A CleansedLines instance containing the file.
				1089	linenum: The number of the line to check.
				1090	error: The function to call with any errors found.
				1091	"""
				1092	line = clean_lines.elided[linenum]
				1093	if _RE_PATTERN_IVALID_INCREMENT.match(line):
				1094	error(filename, linenum, 'runtime/invalid_increment', 5,
				1095	'Changing pointer instead of value (or unused value of operator*).')
				1096
				1097
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1098	class _ClassInfo(object):
				1099	"""Stores information about a class."""
				1100
				1101	def __init__(self, name, linenum):
				1102	self.name = name
				1103	self.linenum = linenum
				1104	self.seen_open_brace = False
				1105	self.is_derived = False
				1106	self.virtual_method_linenumber = None
				1107	self.has_virtual_destructor = False
				1108	self.brace_depth = 0
				1109
				1110
				1111	class _ClassState(object):
				1112	"""Holds the current state of the parse relating to class declarations.
				1113
				1114	It maintains a stack of _ClassInfos representing the parser's guess
				1115	as to the current nesting of class declarations. The innermost class
				1116	is at the top (back) of the stack. Typically, the stack will either
				1117	be empty or have exactly one entry.
				1118	"""
				1119
				1120	def __init__(self):
				1121	self.classinfo_stack = []
				1122
				1123	def CheckFinished(self, filename, error):
				1124	"""Checks that all classes have been completely parsed.
				1125
				1126	Call this when all lines in a file have been processed.
				1127	Args:
				1128	filename: The name of the current file.
				1129	error: The function to call with any errors found.
				1130	"""
				1131	if self.classinfo_stack:
				1132	# Note: This test can result in false positives if #ifdef constructs
				1133	# get in the way of brace matching. See the testBuildClass test in
				1134	# cpplint_unittest.py for an example of this.
				1135	error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
				1136	'Failed to find complete declaration of class %s' %
				1137	self.classinfo_stack[0].name)
				1138
				1139
				1140	def CheckForNonStandardConstructs(filename, clean_lines, linenum,
				1141	class_state, error):
				1142	"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
				1143
				1144	Complain about several constructs which gcc-2 accepts, but which are
				1145	not standard C++. Warning about these in lint is one way to ease the
				1146	transition to new compilers.
				1147	- put storage class first (e.g. "static const" instead of "const static").
				1148	- "%lld" instead of %qd" in printf-type functions.
				1149	- "%1$d" is non-standard in printf-type functions.
				1150	- "\%" is an undefined character escape sequence.
				1151	- text after #endif is not allowed.
				1152	- invalid inner-style forward declaration.
				1153	- >? and <? operators, and their >?= and <?= cousins.
				1154	- classes with virtual methods need virtual destructors (compiler warning
				1155	available, but not turned on yet.)
				1156
				1157	Additionally, check for constructor/destructor style violations as it
				1158	is very convenient to do so while checking for gcc-2 compliance.
				1159
				1160	Args:
				1161	filename: The name of the current file.
				1162	clean_lines: A CleansedLines instance containing the file.
				1163	linenum: The number of the line to check.
				1164	class_state: A _ClassState instance which maintains information about
				1165	the current stack of nested class declarations being parsed.
				1166	error: A callable to which errors are reported, which takes 4 arguments:
				1167	filename, line number, error level, and message
				1168	"""
				1169
				1170	# Remove comments from the line, but leave in strings for now.
				1171	line = clean_lines.lines[linenum]
				1172
				1173	if Search(r'printf\s\(.".%[-+ ]?\dq', line):
				1174	error(filename, linenum, 'runtime/printf_format', 3,
				1175	'%q in format strings is deprecated. Use %ll instead.')
				1176
				1177	if Search(r'printf\s\(.".*%\d+\$', line):
				1178	error(filename, linenum, 'runtime/printf_format', 2,
				1179	'%N$ formats are unconventional. Try rewriting to avoid them.')
				1180
				1181	# Remove escaped backslashes before looking for undefined escapes.
				1182	line = line.replace('\\\\', '')
				1183
				1184	if Search(r'("\|\').*\\(%\|\[\|\(\|{)', line):
				1185	error(filename, linenum, 'build/printf_format', 3,
				1186	'%, [, (, and { are undefined character escapes. Unescape them.')
				1187
				1188	# For the rest, work with both comments and strings removed.
				1189	line = clean_lines.elided[linenum]
				1190
				1191	if Search(r'\b(const\|volatile\|void\|char\|short\|int\|long'
				1192	r'\|float\|double\|signed\|unsigned'
				1193	r'\|schar\|u?int8\|u?int16\|u?int32\|u?int64)'
				1194	r'\s+(auto\|register\|static\|extern\|typedef)\b',
				1195	line):
				1196	error(filename, linenum, 'build/storage_class', 5,
				1197	'Storage class (static, extern, typedef, etc) should be first.')
				1198
				1199	if Match(r'\s#\sendif\s*[^/\s]+', line):
				1200	error(filename, linenum, 'build/endif_comment', 5,
				1201	'Uncommented text after #endif is non-standard. Use a comment.')
				1202
				1203	if Match(r'\sclass\s+(\w+\s::\s)+\w+\s;', line):
				1204	error(filename, linenum, 'build/forward_decl', 5,
				1205	'Inner-style forward declarations are invalid. Remove this line.')
				1206
				1207	if Search(r'(\w+\|[+-]?\d+(\.\d)?)\s(<\|>)\?=?\s(\w+\|[+-]?\d+)(\.\d)?',
				1208	line):
				1209	error(filename, linenum, 'build/deprecated', 3,
				1210	'>? and <? (max and min) operators are non-standard and deprecated.')
				1211
				1212	# Track class entry and exit, and attempt to find cases within the
				1213	# class declaration that don't meet the C++ style
				1214	# guidelines. Tracking is very dependent on the code matching Google
				1215	# style guidelines, but it seems to perform well enough in testing
				1216	# to be a worthwhile addition to the checks.
				1217	classinfo_stack = class_state.classinfo_stack
				1218	# Look for a class declaration
				1219	class_decl_match = Match(
				1220	r'\s(template\s<[\w\s<>,:]>\s)?(class\|struct)\s+(\w+(::\w+)*)', line)
				1221	if class_decl_match:
				1222	classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
				1223
				1224	# Everything else in this function uses the top of the stack if it's
				1225	# not empty.
				1226	if not classinfo_stack:
				1227	return
				1228
				1229	classinfo = classinfo_stack[-1]
				1230
				1231	# If the opening brace hasn't been seen look for it and also
				1232	# parent class declarations.
				1233	if not classinfo.seen_open_brace:
				1234	# If the line has a ';' in it, assume it's a forward declaration or
				1235	# a single-line class declaration, which we won't process.
				1236	if line.find(';') != -1:
				1237	classinfo_stack.pop()
				1238	return
				1239	classinfo.seen_open_brace = (line.find('{') != -1)
				1240	# Look for a bare ':'
				1241	if Search('(^\|[^:]):($\|[^:])', line):
				1242	classinfo.is_derived = True
				1243	if not classinfo.seen_open_brace:
				1244	return # Everything else in this function is for after open brace
				1245
				1246	# The class may have been declared with namespace or classname qualifiers.
				1247	# The constructor and destructor will not have those qualifiers.
				1248	base_classname = classinfo.name.split('::')[-1]
				1249
				1250	# Look for single-argument constructors that aren't marked explicit.
				1251	# Technically a valid construct, but against style.
				1252	args = Match(r'(?<!explicit)\s+%s\s*$([^,()]+)$'
				1253	% re.escape(base_classname),
				1254	line)
				1255	if (args and
				1256	args.group(1) != 'void' and
				1257	not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
				1258	args.group(1).strip())):
				1259	error(filename, linenum, 'runtime/explicit', 5,
				1260	'Single-argument constructors should be marked explicit.')
				1261
				1262	# Look for methods declared virtual.
				1263	if Search(r'\bvirtual\b', line):
				1264	classinfo.virtual_method_linenumber = linenum
				1265	# Only look for a destructor declaration on the same line. It would
				1266	# be extremely unlikely for the destructor declaration to occupy
				1267	# more than one line.
				1268	if Search(r'~%s\s*\(' % base_classname, line):
				1269	classinfo.has_virtual_destructor = True
				1270
				1271	# Look for class end.
				1272	brace_depth = classinfo.brace_depth
				1273	brace_depth = brace_depth + line.count('{') - line.count('}')
				1274	if brace_depth <= 0:
				1275	classinfo = classinfo_stack.pop()
				1276	# Try to detect missing virtual destructor declarations.
				1277	# For now, only warn if a non-derived class with virtual methods lacks
				1278	# a virtual destructor. This is to make it less likely that people will
				1279	# declare derived virtual destructors without declaring the base
				1280	# destructor virtual.
				1281	if ((classinfo.virtual_method_linenumber is not None) and
				1282	(not classinfo.has_virtual_destructor) and
				1283	(not classinfo.is_derived)): # Only warn for base classes
				1284	error(filename, classinfo.linenum, 'runtime/virtual', 4,
				1285	'The class %s probably needs a virtual destructor due to '
				1286	'having virtual method(s), one declared at line %d.'
				1287	% (classinfo.name, classinfo.virtual_method_linenumber))
				1288	else:
				1289	classinfo.brace_depth = brace_depth
				1290
				1291
				1292	def CheckSpacingForFunctionCall(filename, line, linenum, error):
				1293	"""Checks for the correctness of various spacing around function calls.
				1294
				1295	Args:
				1296	filename: The name of the current file.
				1297	line: The text of the line to check.
				1298	linenum: The number of the line to check.
				1299	error: The function to call with any errors found.
				1300	"""
				1301
				1302	# Since function calls often occur inside if/for/while/switch
				1303	# expressions - which have their own, more liberal conventions - we
				1304	# first see if we should be looking inside such an expression for a
				1305	# function call, to which we can apply more strict standards.
				1306	fncall = line # if there's no control flow construct, look at whole line
				1307	for pattern in (r'\bif\s$(.)$\s*{',
				1308	r'\bfor\s$(.)$\s*{',
				1309	r'\bwhile\s$(.)$\s*[{;]',
				1310	r'\bswitch\s$(.)$\s*{'):
				1311	match = Search(pattern, line)
				1312	if match:
				1313	fncall = match.group(1) # look inside the parens for function calls
				1314	break
				1315
				1316	# Except in if/for/while/switch, there should never be space
				1317	# immediately inside parens (eg "f( 3, 4 )"). We make an exception
				1318	# for nested parens ( (a+b) + c ). Likewise, there should never be
				1319	# a space before a ( when it's a function argument. I assume it's a
				1320	# function argument when the char before the whitespace is legal in
				1321	# a function name (alnum + _) and we're not starting a macro. Also ignore
				1322	# pointers and references to arrays and functions coz they're too tricky:
				1323	# we use a very simple way to recognize these:
				1324	# " (something)(maybe-something)" or
				1325	# " (something)(maybe-something," or
				1326	# " (something)[something]"
				1327	# Note that we assume the contents of [] to be short enough that
				1328	# they'll never need to wrap.
				1329	if ( # Ignore control structures.
				1330	not Search(r'\b(if\|for\|while\|switch\|return\|delete)\b', fncall) and
				1331	# Ignore pointers/references to functions.
				1332	not Search(r' $[^)]+$$[^)]*($\|,$)', fncall) and
				1333	# Ignore pointers/references to arrays.
				1334	not Search(r' $[^)]+$\[[^\]]+\]', fncall)):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1335	if Search(r'\w\s\(\s(?!\s\\$)', fncall): # a ( used for a fn call
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1336	error(filename, linenum, 'whitespace/parens', 4,
				1337	'Extra space after ( in function call')
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1338	elif Search(r'$\s+(?!(\s*\$\|\()', fncall):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1339	error(filename, linenum, 'whitespace/parens', 2,
				1340	'Extra space after (')
				1341	if (Search(r'\w\s+\(', fncall) and
				1342	not Search(r'#\s*define\|typedef', fncall)):
				1343	error(filename, linenum, 'whitespace/parens', 4,
				1344	'Extra space before ( in function call')
				1345	# If the ) is followed only by a newline or a { + newline, assume it's
				1346	# part of a control statement (if/while/etc), and don't complain
				1347	if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
				1348	error(filename, linenum, 'whitespace/parens', 2,
				1349	'Extra space before )')
				1350
				1351
				1352	def IsBlankLine(line):
				1353	"""Returns true if the given line is blank.
				1354
				1355	We consider a line to be blank if the line is empty or consists of
				1356	only white spaces.
				1357
				1358	Args:
				1359	line: A line of a string.
				1360
				1361	Returns:
				1362	True, if the given line is blank.
				1363	"""
				1364	return not line or line.isspace()
				1365
				1366
				1367	def CheckForFunctionLengths(filename, clean_lines, linenum,
				1368	function_state, error):
				1369	"""Reports for long function bodies.
				1370
				1371	For an overview why this is done, see:
				1372	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
				1373
				1374	Uses a simplistic algorithm assuming other style guidelines
				1375	(especially spacing) are followed.
				1376	Only checks unindented functions, so class members are unchecked.
				1377	Trivial bodies are unchecked, so constructors with huge initializer lists
				1378	may be missed.
				1379	Blank/comment lines are not counted so as to avoid encouraging the removal
				1380	of vertical space and commments just to get through a lint check.
				1381	NOLINT on the last line of a function disables this check.
				1382
				1383	Args:
				1384	filename: The name of the current file.
				1385	clean_lines: A CleansedLines instance containing the file.
				1386	linenum: The number of the line to check.
				1387	function_state: Current function name and lines in body so far.
				1388	error: The function to call with any errors found.
				1389	"""
				1390	lines = clean_lines.lines
				1391	line = lines[linenum]
				1392	raw = clean_lines.raw_lines
				1393	raw_line = raw[linenum]
				1394	joined_line = ''
				1395
				1396	starting_func = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1397	regexp = r'(\w(\w\|::\|\\|\&\|\s))\(' # decls * & space::name( ...
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1398	match_result = Match(regexp, line)
				1399	if match_result:
				1400	# If the name is all caps and underscores, figure it's a macro and
				1401	# ignore it, unless it's TEST or TEST_F.
				1402	function_name = match_result.group(1).split()[-1]
				1403	if function_name == 'TEST' or function_name == 'TEST_F' or (
				1404	not Match(r'[A-Z_]+$', function_name)):
				1405	starting_func = True
				1406
				1407	if starting_func:
				1408	body_found = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1409	for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1410	start_line = lines[start_linenum]
				1411	joined_line += ' ' + start_line.lstrip()
				1412	if Search(r'(;\|})', start_line): # Declarations and trivial functions
				1413	body_found = True
				1414	break # ... ignore
				1415	elif Search(r'{', start_line):
				1416	body_found = True
				1417	function = Search(r'((\w\|:)*)\(', line).group(1)
				1418	if Match(r'TEST', function): # Handle TEST... macros
				1419	parameter_regexp = Search(r'($.*$)', joined_line)
				1420	if parameter_regexp: # Ignore bad syntax
				1421	function += parameter_regexp.group(1)
				1422	else:
				1423	function += '()'
				1424	function_state.Begin(function)
				1425	break
				1426	if not body_found:
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1427	# No body for the function (or evidence of a non-function) was found.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1428	error(filename, linenum, 'readability/fn_size', 5,
				1429	'Lint failed to find start of function body.')
				1430	elif Match(r'^\}\s*$', line): # function end
				1431	if not Search(r'\bNOLINT\b', raw_line):
				1432	function_state.Check(error, filename, linenum)
				1433	function_state.End()
				1434	elif not Match(r'^\s*$', line):
				1435	function_state.Count() # Count non-blank/non-comment lines.
				1436
				1437
				1438	_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO($.+?$)?:?(\s\|$)?')
				1439
				1440
				1441	def CheckComment(comment, filename, linenum, error):
				1442	"""Checks for common mistakes in TODO comments.
				1443
				1444	Args:
				1445	comment: The text of the comment from the line in question.
				1446	filename: The name of the current file.
				1447	linenum: The number of the line to check.
				1448	error: The function to call with any errors found.
				1449	"""
				1450	match = _RE_PATTERN_TODO.match(comment)
				1451	if match:
				1452	# One whitespace is correct; zero whitespace is handled elsewhere.
				1453	leading_whitespace = match.group(1)
				1454	if len(leading_whitespace) > 1:
				1455	error(filename, linenum, 'whitespace/todo', 2,
				1456	'Too many spaces before TODO')
				1457
				1458	username = match.group(2)
				1459	if not username:
				1460	error(filename, linenum, 'readability/todo', 2,
				1461	'Missing username in TODO; it should look like '
				1462	'"// TODO(my_username): Stuff."')
				1463
				1464	middle_whitespace = match.group(3)
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1465	# Comparisons made explicit for correctness -- pylint: disable-msg=C6403
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1466	if middle_whitespace != ' ' and middle_whitespace != '':
				1467	error(filename, linenum, 'whitespace/todo', 2,
				1468	'TODO(my_username) should be followed by a space')
				1469
				1470
				1471	def CheckSpacing(filename, clean_lines, linenum, error):
				1472	"""Checks for the correctness of various spacing issues in the code.
				1473
				1474	Things we check for: spaces around operators, spaces after
				1475	if/for/while/switch, no spaces around parens in function calls, two
				1476	spaces between code and comment, don't start a block with a blank
				1477	line, don't end a function with a blank line, don't have too many
				1478	blank lines in a row.
				1479
				1480	Args:
				1481	filename: The name of the current file.
				1482	clean_lines: A CleansedLines instance containing the file.
				1483	linenum: The number of the line to check.
				1484	error: The function to call with any errors found.
				1485	"""
				1486
				1487	raw = clean_lines.raw_lines
				1488	line = raw[linenum]
				1489
				1490	# Before nixing comments, check if the line is blank for no good
				1491	# reason. This includes the first line after a block is opened, and
				1492	# blank lines at the end of a function (ie, right before a line like '}'
				1493	if IsBlankLine(line):
				1494	elided = clean_lines.elided
				1495	prev_line = elided[linenum - 1]
				1496	prevbrace = prev_line.rfind('{')
				1497	# TODO(unknown): Don't complain if line before blank line, and line after,
				1498	# both start with alnums and are indented the same amount.
				1499	# This ignores whitespace at the start of a namespace block
				1500	# because those are not usually indented.
				1501	if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
				1502	and prev_line[:prevbrace].find('namespace') == -1):
				1503	# OK, we have a blank line at the start of a code block. Before we
				1504	# complain, we check if it is an exception to the rule: The previous
				1505	# non-empty line has the paramters of a function header that are indented
				1506	# 4 spaces (because they did not fit in a 80 column line when placed on
				1507	# the same line as the function name). We also check for the case where
				1508	# the previous line is indented 6 spaces, which may happen when the
				1509	# initializers of a constructor do not fit into a 80 column line.
				1510	exception = False
				1511	if Match(r' {6}\w', prev_line): # Initializer list?
				1512	# We are looking for the opening column of initializer list, which
				1513	# should be indented 4 spaces to cause 6 space indentation afterwards.
				1514	search_position = linenum-2
				1515	while (search_position >= 0
				1516	and Match(r' {6}\w', elided[search_position])):
				1517	search_position -= 1
				1518	exception = (search_position >= 0
				1519	and elided[search_position][:5] == ' :')
				1520	else:
				1521	# Search for the function arguments or an initializer list. We use a
				1522	# simple heuristic here: If the line is indented 4 spaces; and we have a
				1523	# closing paren, without the opening paren, followed by an opening brace
				1524	# or colon (for initializer lists) we assume that it is the last line of
				1525	# a function header. If we have a colon indented 4 spaces, it is an
				1526	# initializer list.
				1527	exception = (Match(r' {4}\w[^$]$\s(const\s)?(\{\s$\|:)',
				1528	prev_line)
				1529	or Match(r' {4}:', prev_line))
				1530
				1531	if not exception:
				1532	error(filename, linenum, 'whitespace/blank_line', 2,
				1533	'Blank line at the start of a code block. Is this needed?')
				1534	# This doesn't ignore whitespace at the end of a namespace block
				1535	# because that is too hard without pairing open/close braces;
				1536	# however, a special exception is made for namespace closing
				1537	# brackets which have a comment containing "namespace".
				1538	#
				1539	# Also, ignore blank lines at the end of a block in a long if-else
				1540	# chain, like this:
				1541	# if (condition1) {
				1542	# // Something followed by a blank line
				1543	#
				1544	# } else if (condition2) {
				1545	# // Something else
				1546	# }
				1547	if linenum + 1 < clean_lines.NumLines():
				1548	next_line = raw[linenum + 1]
				1549	if (next_line
				1550	and Match(r'\s*}', next_line)
				1551	and next_line.find('namespace') == -1
				1552	and next_line.find('} else ') == -1):
				1553	error(filename, linenum, 'whitespace/blank_line', 3,
				1554	'Blank line at the end of a code block. Is this needed?')
				1555
				1556	# Next, we complain if there's a comment too near the text
				1557	commentpos = line.find('//')
				1558	if commentpos != -1:
				1559	# Check if the // may be in quotes. If so, ignore it
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1560	# Comparisons made explicit for clarity -- pylint: disable-msg=C6403
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1561	if (line.count('"', 0, commentpos) -
				1562	line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
				1563	# Allow one space for new scopes, two spaces otherwise:
				1564	if (not Match(r'^\s*{ //', line) and
				1565	((commentpos >= 1 and
				1566	line[commentpos-1] not in string.whitespace) or
				1567	(commentpos >= 2 and
				1568	line[commentpos-2] not in string.whitespace))):
				1569	error(filename, linenum, 'whitespace/comments', 2,
				1570	'At least two spaces is best between code and comments')
				1571	# There should always be a space between the // and the comment
				1572	commentend = commentpos + 2
				1573	if commentend < len(line) and not line[commentend] == ' ':
				1574	# but some lines are exceptions -- e.g. if they're big
				1575	# comment delimiters like:
				1576	# //----------------------------------------------------------
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	1577	# or they begin with multiple slashes followed by a space:
				1578	# //////// Header comment
				1579	match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
				1580	Search(r'^/+ ', line[commentend:]))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1581	if not match:
				1582	error(filename, linenum, 'whitespace/comments', 4,
				1583	'Should have a space between // and comment')
				1584	CheckComment(line[commentpos:], filename, linenum, error)
				1585
				1586	line = clean_lines.elided[linenum] # get rid of comments and strings
				1587
				1588	# Don't try to do spacing checks for operator methods
				1589	line = re.sub(r'operator(==\|!=\|<\|<<\|<=\|>=\|>>\|>)\(', 'operator\(', line)
				1590
				1591	# We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
				1592	# Otherwise not. Note we only check for non-spaces on both sides;
				1593	# sometimes people put non-spaces on one side when aligning ='s among
				1594	# many lines (not that this is behavior that I approve of...)
				1595	if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if\|while) ', line):
				1596	error(filename, linenum, 'whitespace/operators', 4,
				1597	'Missing spaces around =')
				1598
				1599	# It's ok not to have spaces around binary operators like + - * /, but if
				1600	# there's too little whitespace, we get concerned. It's hard to tell,
				1601	# though, so we punt on this one for now. TODO.
				1602
				1603	# You should always have whitespace around binary operators.
				1604	# Alas, we can't test < or > because they're legitimately used sans spaces
				1605	# (a->b, vector<int> a). The only time we can tell is a < with no >, and
				1606	# only if it's not template params list spilling into the next line.
				1607	match = Search(r'[^<>=!\s](==\|!=\|<=\|>=)[^<>=!\s]', line)
				1608	if not match:
				1609	# Note that while it seems that the '<[^<]*' term in the following
				1610	# regexp could be simplified to '<.*', which would indeed match
				1611	# the same class of strings, the [^<] means that searching for the
				1612	# regexp takes linear rather than quadratic time.
				1613	if not Search(r'<[^<],\s$', line): # template params spill
				1614	match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]\|->)*$', line)
				1615	if match:
				1616	error(filename, linenum, 'whitespace/operators', 3,
				1617	'Missing spaces around %s' % match.group(1))
				1618	# We allow no-spaces around << and >> when used like this: 10<<20, but
				1619	# not otherwise (particularly, not when used as streams)
				1620	match = Search(r'[^0-9\s](<<\|>>)[^0-9\s]', line)
				1621	if match:
				1622	error(filename, linenum, 'whitespace/operators', 3,
				1623	'Missing spaces around %s' % match.group(1))
				1624
				1625	# There shouldn't be space around unary operators
				1626	match = Search(r'(!\s\|~\s\|[\s]--[\s;]\|[\s]\+\+[\s;])', line)
				1627	if match:
				1628	error(filename, linenum, 'whitespace/operators', 4,
				1629	'Extra space for operator %s' % match.group(1))
				1630
				1631	# A pet peeve of mine: no spaces after an if, while, switch, or for
				1632	match = Search(r' (if\(\|for\(\|while\(\|switch\()', line)
				1633	if match:
				1634	error(filename, linenum, 'whitespace/parens', 5,
				1635	'Missing space before ( in %s' % match.group(1))
				1636
				1637	# For if/for/while/switch, the left and right parens should be
				1638	# consistent about how many spaces are inside the parens, and
				1639	# there should either be zero or one spaces inside the parens.
				1640	# We don't want: "if ( foo)" or "if ( foo )".
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	1641	# Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1642	match = Search(r'\b(if\|for\|while\|switch)\s*'
				1643	r'$([ ])(.).[^ ]+([ ])$\s{\s*$',
				1644	line)
				1645	if match:
				1646	if len(match.group(2)) != len(match.group(4)):
				1647	if not (match.group(3) == ';' and
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	1648	len(match.group(2)) == 1 + len(match.group(4)) or
				1649	not match.group(2) and Search(r'\bfor\s$.; $', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1650	error(filename, linenum, 'whitespace/parens', 5,
				1651	'Mismatching spaces inside () in %s' % match.group(1))
				1652	if not len(match.group(2)) in [0, 1]:
				1653	error(filename, linenum, 'whitespace/parens', 5,
				1654	'Should have zero or one spaces inside ( and ) in %s' %
				1655	match.group(1))
				1656
				1657	# You should always have a space after a comma (either as fn arg or operator)
				1658	if Search(r',[^\s]', line):
				1659	error(filename, linenum, 'whitespace/comma', 3,
				1660	'Missing space after ,')
				1661
				1662	# Next we will look for issues with function calls.
				1663	CheckSpacingForFunctionCall(filename, line, linenum, error)
				1664
				1665	# Except after an opening paren, you should have spaces before your braces.
				1666	# And since you should never have braces at the beginning of a line, this is
				1667	# an easy test.
				1668	if Search(r'[^ (]{', line):
				1669	error(filename, linenum, 'whitespace/braces', 5,
				1670	'Missing space before {')
				1671
				1672	# Make sure '} else {' has spaces.
				1673	if Search(r'}else', line):
				1674	error(filename, linenum, 'whitespace/braces', 5,
				1675	'Missing space before else')
				1676
				1677	# You shouldn't have spaces before your brackets, except maybe after
				1678	# 'delete []' or 'new char * []'.
				1679	if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
				1680	error(filename, linenum, 'whitespace/braces', 5,
				1681	'Extra space before [')
				1682
				1683	# You shouldn't have a space before a semicolon at the end of the line.
				1684	# There's a special case for "for" since the style guide allows space before
				1685	# the semicolon there.
				1686	if Search(r':\s;\s$', line):
				1687	error(filename, linenum, 'whitespace/semicolon', 5,
				1688	'Semicolon defining empty statement. Use { } instead.')
				1689	elif Search(r'^\s;\s$', line):
				1690	error(filename, linenum, 'whitespace/semicolon', 5,
				1691	'Line contains only semicolon. If this should be an empty statement, '
				1692	'use { } instead.')
				1693	elif (Search(r'\s+;\s*$', line) and
				1694	not Search(r'\bfor\b', line)):
				1695	error(filename, linenum, 'whitespace/semicolon', 5,
				1696	'Extra space before last semicolon. If this should be an empty '
				1697	'statement, use { } instead.')
				1698
				1699
				1700	def GetPreviousNonBlankLine(clean_lines, linenum):
				1701	"""Return the most recent non-blank line and its line number.
				1702
				1703	Args:
				1704	clean_lines: A CleansedLines instance containing the file contents.
				1705	linenum: The number of the line to check.
				1706
				1707	Returns:
				1708	A tuple with two elements. The first element is the contents of the last
				1709	non-blank line before the current line, or the empty string if this is the
				1710	first non-blank line. The second is the line number of that line, or -1
				1711	if this is the first non-blank line.
				1712	"""
				1713
				1714	prevlinenum = linenum - 1
				1715	while prevlinenum >= 0:
				1716	prevline = clean_lines.elided[prevlinenum]
				1717	if not IsBlankLine(prevline): # if not a blank line...
				1718	return (prevline, prevlinenum)
				1719	prevlinenum -= 1
				1720	return ('', -1)
				1721
				1722
				1723	def CheckBraces(filename, clean_lines, linenum, error):
				1724	"""Looks for misplaced braces (e.g. at the end of line).
				1725
				1726	Args:
				1727	filename: The name of the current file.
				1728	clean_lines: A CleansedLines instance containing the file.
				1729	linenum: The number of the line to check.
				1730	error: The function to call with any errors found.
				1731	"""
				1732
				1733	line = clean_lines.elided[linenum] # get rid of comments and strings
				1734
				1735	if Match(r'\s{\s$', line):
				1736	# We allow an open brace to start a line in the case where someone
				1737	# is using braces in a block to explicitly create a new scope,
				1738	# which is commonly used to control the lifetime of
				1739	# stack-allocated variables. We don't detect this perfectly: we
				1740	# just don't complain if the last non-whitespace character on the
				1741	# previous non-blank line is ';', ':', '{', or '}'.
				1742	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				1743	if not Search(r'[;:}{]\s*$', prevline):
				1744	error(filename, linenum, 'whitespace/braces', 4,
				1745	'{ should almost always be at the end of the previous line')
				1746
				1747	# An else clause should be on the same line as the preceding closing brace.
				1748	if Match(r'\selse\s', line):
				1749	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				1750	if Match(r'\s}\s$', prevline):
				1751	error(filename, linenum, 'whitespace/newline', 4,
				1752	'An else should appear on the same line as the preceding }')
				1753
				1754	# If braces come on one side of an else, they should be on both.
				1755	# However, we have to worry about "else if" that spans multiple lines!
				1756	if Search(r'}\selse[^{]$', line) or Match(r'[^}]else\s{', line):
				1757	if Search(r'}\selse if([^{])$', line): # could be multi-line if
				1758	# find the ( after the if
				1759	pos = line.find('else if')
				1760	pos = line.find('(', pos)
				1761	if pos > 0:
				1762	(endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
				1763	if endline[endpos:].find('{') == -1: # must be brace after if
				1764	error(filename, linenum, 'readability/braces', 5,
				1765	'If an else has a brace on one side, it should have it on both')
				1766	else: # common case: else not followed by a multi-line if
				1767	error(filename, linenum, 'readability/braces', 5,
				1768	'If an else has a brace on one side, it should have it on both')
				1769
				1770	# Likewise, an else should never have the else clause on the same line
				1771	if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
				1772	error(filename, linenum, 'whitespace/newline', 4,
				1773	'Else clause should never be on same line as else (use 2 lines)')
				1774
				1775	# In the same way, a do/while should never be on one line
				1776	if Match(r'\s*do [^\s{]', line):
				1777	error(filename, linenum, 'whitespace/newline', 4,
				1778	'do/while clauses should not be on a single line')
				1779
				1780	# Braces shouldn't be followed by a ; unless they're defining a struct
				1781	# or initializing an array.
				1782	# We can't tell in general, but we can for some common cases.
				1783	prevlinenum = linenum
				1784	while True:
				1785	(prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
				1786	if Match(r'\s+{.}\s;', line) and not prevline.count(';'):
				1787	line = prevline + line
				1788	else:
				1789	break
				1790	if (Search(r'{.}\s;', line) and
				1791	line.count('{') == line.count('}') and
				1792	not Search(r'struct\|class\|enum\|\s=\s{', line)):
				1793	error(filename, linenum, 'readability/braces', 4,
				1794	"You don't need a ; after a }")
				1795
				1796
				1797	def ReplaceableCheck(operator, macro, line):
				1798	"""Determine whether a basic CHECK can be replaced with a more specific one.
				1799
				1800	For example suggest using CHECK_EQ instead of CHECK(a == b) and
				1801	similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
				1802
				1803	Args:
				1804	operator: The C++ operator used in the CHECK.
				1805	macro: The CHECK or EXPECT macro being called.
				1806	line: The current source line.
				1807
				1808	Returns:
				1809	True if the CHECK can be replaced with a more specific one.
				1810	"""
				1811
				1812	# This matches decimal and hex integers, strings, and chars (in that order).
				1813	match_constant = r'([-+]?(\d+\|0[xX][0-9a-fA-F]+)[lLuU]{0,3}\|"."\|\'.\')'
				1814
				1815	# Expression to match two sides of the operator with something that
				1816	# looks like a literal, since CHECK(x == iterator) won't compile.
				1817	# This means we can't catch all the cases where a more specific
				1818	# CHECK is possible, but it's less annoying than dealing with
				1819	# extraneous warnings.
				1820	match_this = (r'\s' + macro + r'\((\s' +
				1821	match_constant + r'\s' + operator + r'[^<>].\|'
				1822	r'.[^<>]' + operator + r'\s' + match_constant +
				1823	r'\s*\))')
				1824
				1825	# Don't complain about CHECK(x == NULL) or similar because
				1826	# CHECK_EQ(x, NULL) won't compile (requires a cast).
				1827	# Also, don't complain about more complex boolean expressions
				1828	# involving && or \|\| such as CHECK(a == b \|\| c == d).
				1829	return Match(match_this, line) and not Search(r'NULL\|&&\|\\|\\|', line)
				1830
				1831
				1832	def CheckCheck(filename, clean_lines, linenum, error):
				1833	"""Checks the use of CHECK and EXPECT macros.
				1834
				1835	Args:
				1836	filename: The name of the current file.
				1837	clean_lines: A CleansedLines instance containing the file.
				1838	linenum: The number of the line to check.
				1839	error: The function to call with any errors found.
				1840	"""
				1841
				1842	# Decide the set of replacement macros that should be suggested
				1843	raw_lines = clean_lines.raw_lines
				1844	current_macro = ''
				1845	for macro in _CHECK_MACROS:
				1846	if raw_lines[linenum].find(macro) >= 0:
				1847	current_macro = macro
				1848	break
				1849	if not current_macro:
				1850	# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
				1851	return
				1852
				1853	line = clean_lines.elided[linenum] # get rid of comments and strings
				1854
				1855	# Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
				1856	for operator in ['==', '!=', '>=', '>', '<=', '<']:
				1857	if ReplaceableCheck(operator, current_macro, line):
				1858	error(filename, linenum, 'readability/check', 2,
				1859	'Consider using %s instead of %s(a %s b)' % (
				1860	_CHECK_REPLACEMENT[current_macro][operator],
				1861	current_macro, operator))
				1862	break
				1863
				1864
				1865	def GetLineWidth(line):
				1866	"""Determines the width of the line in column positions.
				1867
				1868	Args:
				1869	line: A string, which may be a Unicode string.
				1870
				1871	Returns:
				1872	The width of the line in column positions, accounting for Unicode
				1873	combining characters and wide characters.
				1874	"""
				1875	if isinstance(line, unicode):
				1876	width = 0
				1877	for c in unicodedata.normalize('NFC', line):
				1878	if unicodedata.east_asian_width(c) in ('W', 'F'):
				1879	width += 2
				1880	elif not unicodedata.combining(c):
				1881	width += 1
				1882	return width
				1883	else:
				1884	return len(line)
				1885
				1886
				1887	def CheckStyle(filename, clean_lines, linenum, file_extension, error):
				1888	"""Checks rules from the 'C++ style rules' section of cppguide.html.
				1889
				1890	Most of these rules are hard to test (naming, comment style), but we
				1891	do what we can. In particular we check for 2-space indents, line lengths,
				1892	tab usage, spaces inside code, etc.
				1893
				1894	Args:
				1895	filename: The name of the current file.
				1896	clean_lines: A CleansedLines instance containing the file.
				1897	linenum: The number of the line to check.
				1898	file_extension: The extension (without the dot) of the filename.
				1899	error: The function to call with any errors found.
				1900	"""
				1901
				1902	raw_lines = clean_lines.raw_lines
				1903	line = raw_lines[linenum]
				1904
				1905	if line.find('\t') != -1:
				1906	error(filename, linenum, 'whitespace/tab', 1,
				1907	'Tab found; better to use spaces')
				1908
				1909	# One or three blank spaces at the beginning of the line is weird; it's
				1910	# hard to reconcile that with 2-space indents.
				1911	# NOTE: here are the conditions rob pike used for his tests. Mine aren't
				1912	# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
				1913	# if(RLENGTH > 20) complain = 0;
				1914	# if(match($0, " +(error\|private\|public\|protected):")) complain = 0;
				1915	# if(match(prev, "&& *$")) complain = 0;
				1916	# if(match(prev, "\\\|\\\| *$")) complain = 0;
				1917	# if(match(prev, "[\",=><] *$")) complain = 0;
				1918	# if(match($0, " <<")) complain = 0;
				1919	# if(match(prev, " +for \\(")) complain = 0;
				1920	# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
				1921	initial_spaces = 0
				1922	cleansed_line = clean_lines.elided[linenum]
				1923	while initial_spaces < len(line) and line[initial_spaces] == ' ':
				1924	initial_spaces += 1
				1925	if line and line[-1].isspace():
				1926	error(filename, linenum, 'whitespace/end_of_line', 4,
				1927	'Line ends in whitespace. Consider deleting these extra spaces.')
				1928	# There are certain situations we allow one space, notably for labels
				1929	elif ((initial_spaces == 1 or initial_spaces == 3) and
				1930	not Match(r'\s\w+\s:\s*$', cleansed_line)):
				1931	error(filename, linenum, 'whitespace/indent', 3,
				1932	'Weird number of spaces at line-start. '
				1933	'Are you using a 2-space indent?')
				1934	# Labels should always be indented at least one space.
				1935	elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
				1936	line):
				1937	error(filename, linenum, 'whitespace/labels', 4,
				1938	'Labels should always be indented at least one space. '
				1939	'If this is a member-initializer list in a constructor, '
				1940	'the colon should be on the line after the definition header.')
				1941
				1942	# Check if the line is a header guard.
				1943	is_header_guard = False
				1944	if file_extension == 'h':
				1945	cppvar = GetHeaderGuardCPPVariable(filename)
				1946	if (line.startswith('#ifndef %s' % cppvar) or
				1947	line.startswith('#define %s' % cppvar) or
				1948	line.startswith('#endif // %s' % cppvar)):
				1949	is_header_guard = True
				1950	# #include lines and header guards can be long, since there's no clean way to
				1951	# split them.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1952	#
				1953	# URLs can be long too. It's possible to split these, but it makes them
				1954	# harder to cut&paste.
				1955	if (not line.startswith('#include') and not is_header_guard and
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1956	not Match(r'^\s//.http(s?)://\S*$', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1957	line_width = GetLineWidth(line)
				1958	if line_width > 100:
				1959	error(filename, linenum, 'whitespace/line_length', 4,
				1960	'Lines should very rarely be longer than 100 characters')
				1961	elif line_width > 80:
				1962	error(filename, linenum, 'whitespace/line_length', 2,
				1963	'Lines should be <= 80 characters long')
				1964
				1965	if (cleansed_line.count(';') > 1 and
				1966	# for loops are allowed two ;'s (and may run over two lines).
				1967	cleansed_line.find('for') == -1 and
				1968	(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
				1969	GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
				1970	# It's ok to have many commands in a switch case that fits in 1 line
				1971	not ((cleansed_line.find('case ') != -1 or
				1972	cleansed_line.find('default:') != -1) and
				1973	cleansed_line.find('break;') != -1)):
				1974	error(filename, linenum, 'whitespace/newline', 4,
				1975	'More than one command on the same line')
				1976
				1977	# Some more style checks
				1978	CheckBraces(filename, clean_lines, linenum, error)
				1979	CheckSpacing(filename, clean_lines, linenum, error)
				1980	CheckCheck(filename, clean_lines, linenum, error)
				1981
				1982
				1983	_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
				1984	_RE_PATTERN_INCLUDE = re.compile(r'^\s#\sinclude\s([<"])([^>"])[>"].*$')
				1985	# Matches the first component of a filename delimited by -s and _s. That is:
				1986	# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
				1987	# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
				1988	# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
				1989	# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
				1990	_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
				1991
				1992
				1993	def _DropCommonSuffixes(filename):
				1994	"""Drops common suffixes like _test.cc or -inl.h from filename.
				1995
				1996	For example:
				1997	>>> _DropCommonSuffixes('foo/foo-inl.h')
				1998	'foo/foo'
				1999	>>> _DropCommonSuffixes('foo/bar/foo.cc')
				2000	'foo/bar/foo'
				2001	>>> _DropCommonSuffixes('foo/foo_internal.h')
				2002	'foo/foo'
				2003	>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
				2004	'foo/foo_unusualinternal'
				2005
				2006	Args:
				2007	filename: The input filename.
				2008
				2009	Returns:
				2010	The filename with the common suffix removed.
				2011	"""
				2012	for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
				2013	'inl.h', 'impl.h', 'internal.h'):
				2014	if (filename.endswith(suffix) and len(filename) > len(suffix) and
				2015	filename[-len(suffix) - 1] in ('-', '_')):
				2016	return filename[:-len(suffix) - 1]
				2017	return os.path.splitext(filename)[0]
				2018
				2019
				2020	def _IsTestFilename(filename):
				2021	"""Determines if the given filename has a suffix that identifies it as a test.
				2022
				2023	Args:
				2024	filename: The input filename.
				2025
				2026	Returns:
				2027	True if 'filename' looks like a test, False otherwise.
				2028	"""
				2029	if (filename.endswith('_test.cc') or
				2030	filename.endswith('_unittest.cc') or
				2031	filename.endswith('_regtest.cc')):
				2032	return True
				2033	else:
				2034	return False
				2035
				2036
				2037	def _ClassifyInclude(fileinfo, include, is_system):
				2038	"""Figures out what kind of header 'include' is.
				2039
				2040	Args:
				2041	fileinfo: The current file cpplint is running over. A FileInfo instance.
				2042	include: The path to a #included file.
				2043	is_system: True if the #include used <> rather than "".
				2044
				2045	Returns:
				2046	One of the _XXX_HEADER constants.
				2047
				2048	For example:
				2049	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
				2050	_C_SYS_HEADER
				2051	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
				2052	_CPP_SYS_HEADER
				2053	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
				2054	_LIKELY_MY_HEADER
				2055	>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
				2056	... 'bar/foo_other_ext.h', False)
				2057	_POSSIBLE_MY_HEADER
				2058	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
				2059	_OTHER_HEADER
				2060	"""
				2061	# This is a list of all standard c++ header files, except
				2062	# those already checked for above.
				2063	is_stl_h = include in _STL_HEADERS
				2064	is_cpp_h = is_stl_h or include in _CPP_HEADERS
				2065
				2066	if is_system:
				2067	if is_cpp_h:
				2068	return _CPP_SYS_HEADER
				2069	else:
				2070	return _C_SYS_HEADER
				2071
				2072	# If the target file and the include we're checking share a
				2073	# basename when we drop common extensions, and the include
				2074	# lives in . , then it's likely to be owned by the target file.
				2075	target_dir, target_base = (
				2076	os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
				2077	include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
				2078	if target_base == include_base and (
				2079	include_dir == target_dir or
				2080	include_dir == os.path.normpath(target_dir + '/../public')):
				2081	return _LIKELY_MY_HEADER
				2082
				2083	# If the target and include share some initial basename
				2084	# component, it's possible the target is implementing the
				2085	# include, so it's allowed to be first, but we'll never
				2086	# complain if it's not there.
				2087	target_first_component = _RE_FIRST_COMPONENT.match(target_base)
				2088	include_first_component = _RE_FIRST_COMPONENT.match(include_base)
				2089	if (target_first_component and include_first_component and
				2090	target_first_component.group(0) ==
				2091	include_first_component.group(0)):
				2092	return _POSSIBLE_MY_HEADER
				2093
				2094	return _OTHER_HEADER
				2095
				2096
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2097
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2098	def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
				2099	"""Check rules that are applicable to #include lines.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2100
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2101	Strings on #include lines are NOT removed from elided line, to make
				2102	certain tasks easier. However, to prevent false positives, checks
				2103	applicable to #include lines in CheckLanguage must be put here.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2104
				2105	Args:
				2106	filename: The name of the current file.
				2107	clean_lines: A CleansedLines instance containing the file.
				2108	linenum: The number of the line to check.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2109	include_state: An _IncludeState instance in which the headers are inserted.
				2110	error: The function to call with any errors found.
				2111	"""
				2112	fileinfo = FileInfo(filename)
				2113
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2114	line = clean_lines.lines[linenum]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2115
				2116	# "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2117	if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2118	error(filename, linenum, 'build/include', 4,
				2119	'Include the directory when naming .h files')
				2120
				2121	# we shouldn't include a file more than once. actually, there are a
				2122	# handful of instances where doing so is okay, but in general it's
				2123	# not.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2124	match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2125	if match:
				2126	include = match.group(2)
				2127	is_system = (match.group(1) == '<')
				2128	if include in include_state:
				2129	error(filename, linenum, 'build/include', 4,
				2130	'"%s" already included at %s:%s' %
				2131	(include, filename, include_state[include]))
				2132	else:
				2133	include_state[include] = linenum
				2134
				2135	# We want to ensure that headers appear in the right order:
				2136	# 1) for foo.cc, foo.h (preferred location)
				2137	# 2) c system files
				2138	# 3) cpp system files
				2139	# 4) for foo.cc, foo.h (deprecated location)
				2140	# 5) other google headers
				2141	#
				2142	# We classify each include statement as one of those 5 types
				2143	# using a number of techniques. The include_state object keeps
				2144	# track of the highest type seen, and complains if we see a
				2145	# lower type after that.
				2146	error_message = include_state.CheckNextIncludeOrder(
				2147	_ClassifyInclude(fileinfo, include, is_system))
				2148	if error_message:
				2149	error(filename, linenum, 'build/include_order', 4,
				2150	'%s. Should be: %s.h, c system, c++ system, other.' %
				2151	(error_message, fileinfo.BaseName()))
				2152
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2153	# Look for any of the stream classes that are part of standard C++.
				2154	match = _RE_PATTERN_INCLUDE.match(line)
				2155	if match:
				2156	include = match.group(2)
				2157	if Match(r'(f\|ind\|io\|i\|o\|parse\|pf\|stdio\|str\|)?stream$', include):
				2158	# Many unit tests use cout, so we exempt them.
				2159	if not _IsTestFilename(filename):
				2160	error(filename, linenum, 'readability/streams', 3,
				2161	'Streams are highly discouraged.')
				2162
				2163	def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
				2164	error):
				2165	"""Checks rules from the 'C++ language rules' section of cppguide.html.
				2166
				2167	Some of these rules are hard to test (function overloading, using
				2168	uint32 inappropriately), but we do the best we can.
				2169
				2170	Args:
				2171	filename: The name of the current file.
				2172	clean_lines: A CleansedLines instance containing the file.
				2173	linenum: The number of the line to check.
				2174	file_extension: The extension (without the dot) of the filename.
				2175	include_state: An _IncludeState instance in which the headers are inserted.
				2176	error: The function to call with any errors found.
				2177	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2178	# If the line is empty or consists of entirely a comment, no need to
				2179	# check it.
				2180	line = clean_lines.elided[linenum]
				2181	if not line:
				2182	return
				2183
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2184	match = _RE_PATTERN_INCLUDE.search(line)
				2185	if match:
				2186	CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
				2187	return
				2188
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2189	# Create an extended_line, which is the concatenation of the current and
				2190	# next lines, for more effective checking of code that may span more than one
				2191	# line.
				2192	if linenum + 1 < clean_lines.NumLines():
				2193	extended_line = line + clean_lines.elided[linenum + 1]
				2194	else:
				2195	extended_line = line
				2196
				2197	# Make Windows paths like Unix.
				2198	fullname = os.path.abspath(filename).replace('\\', '/')
				2199
				2200	# TODO(unknown): figure out if they're using default arguments in fn proto.
				2201
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2202	# Check for non-const references in functions. This is tricky because &
				2203	# is also used to take the address of something. We allow <> for templates,
				2204	# (ignoring whatever is between the braces) and : for classes.
				2205	# These are complicated re's. They try to capture the following:
				2206	# paren (for fn-prototype start), typename, &, varname. For the const
				2207	# version, we're willing for const to be before typename or after
				2208	# Don't check the implemention on same line.
				2209	fnline = line.split('{', 1)[0]
				2210	if (len(re.findall(r'\([^()]\b(?:[\w:]\|<[^()]>)+(\s?&\|&\s?)\w+', fnline)) >
				2211	len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
				2212	r'(?:[\w:]\|<[^()]*>)+(\s?&\|&\s?)\w+', fnline)) +
				2213	len(re.findall(r'\([^()]\b(?:[\w:]\|<[^()]>)+\s+const(\s?&\|&\s?)[\w]+',
				2214	fnline))):
				2215
				2216	# We allow non-const references in a few standard places, like functions
				2217	# called "swap()" or iostream operators like "<<" or ">>".
				2218	if not Search(
				2219	r'(swap\|Swap\|operator[<>][<>])\s\(\s(?:[\w:]\|<.>)+\s&',
				2220	fnline):
				2221	error(filename, linenum, 'runtime/references', 2,
				2222	'Is this a non-const reference? '
				2223	'If so, make const or use a pointer.')
				2224
				2225	# Check to see if they're using an conversion function cast.
				2226	# I just try to capture the most common basic types, though there are more.
				2227	# Parameterless conversion functions, such as bool(), are allowed as they are
				2228	# probably a member operator declaration or default constructor.
				2229	match = Search(
				2230	r'\b(int\|float\|double\|bool\|char\|int32\|uint32\|int64\|uint64)\([^)]', line)
				2231	if match:
				2232	# gMock methods are defined using some variant of MOCK_METHODx(name, type)
				2233	# where type may be float(), int(string), etc. Without context they are
				2234	# virtually indistinguishable from int(x) casts.
				2235	if not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
				2236	error(filename, linenum, 'readability/casting', 4,
				2237	'Using deprecated casting style. '
				2238	'Use static_cast<%s>(...) instead' %
				2239	match.group(1))
				2240
				2241	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2242	'static_cast',
				2243	r'$(int\|float\|double\|bool\|char\|u?int(16\|32\|64))$',
				2244	error)
				2245	# This doesn't catch all cases. Consider (const char * const)"hello".
				2246	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2247	'reinterpret_cast', r'$(\w+\s?\*+\s?)$', error)
				2248
				2249	# In addition, we look for people taking the address of a cast. This
				2250	# is dangerous -- casts can assign to temporaries, so the pointer doesn't
				2251	# point where you think.
				2252	if Search(
				2253	r'(&$[^)]+$[\w(])\|(&(static\|dynamic\|reinterpret)_cast\b)', line):
				2254	error(filename, linenum, 'runtime/casting', 4,
				2255	('Are you taking an address of a cast? '
				2256	'This is dangerous: could be a temp var. '
				2257	'Take the address before doing the cast, rather than after'))
				2258
				2259	# Check for people declaring static/global STL strings at the top level.
				2260	# This is dangerous because the C++ language does not guarantee that
				2261	# globals with constructors are initialized before the first access.
				2262	match = Match(
				2263	r'((?:\|static +)(?:\|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
				2264	line)
				2265	# Make sure it's not a function.
				2266	# Function template specialization looks like: "string foo<Type>(...".
				2267	# Class template definitions look like: "string Foo<Type>::Method(...".
				2268	if match and not Match(r'\s(<.>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]\|$)',
				2269	match.group(3)):
				2270	error(filename, linenum, 'runtime/string', 4,
				2271	'For a static/global string constant, use a C style string instead: '
				2272	'"%schar %s[]".' %
				2273	(match.group(1), match.group(2)))
				2274
				2275	# Check that we're not using RTTI outside of testing code.
				2276	if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
				2277	error(filename, linenum, 'runtime/rtti', 5,
				2278	'Do not use dynamic_cast<>. If you need to cast within a class '
				2279	"hierarchy, use static_cast<> to upcast. Google doesn't support "
				2280	'RTTI.')
				2281
				2282	if Search(r'\b([A-Za-z0-9_]*_)$\1$', line):
				2283	error(filename, linenum, 'runtime/init', 4,
				2284	'You seem to be initializing a member variable with itself.')
				2285
				2286	if file_extension == 'h':
				2287	# TODO(unknown): check that 1-arg constructors are explicit.
				2288	# How to tell it's a constructor?
				2289	# (handled in CheckForNonStandardConstructs for now)
				2290	# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
				2291	# (level 1 error)
				2292	pass
				2293
				2294	# Check if people are using the verboten C basic types. The only exception
				2295	# we regularly allow is "unsigned short port" for port.
				2296	if Search(r'\bshort port\b', line):
				2297	if not Search(r'\bunsigned short port\b', line):
				2298	error(filename, linenum, 'runtime/int', 4,
				2299	'Use "unsigned short" for ports, not "short"')
				2300	else:
				2301	match = Search(r'\b(short\|long(?! +double)\|long long)\b', line)
				2302	if match:
				2303	error(filename, linenum, 'runtime/int', 4,
				2304	'Use int16/int64/etc, rather than the C type %s' % match.group(1))
				2305
				2306	# When snprintf is used, the second argument shouldn't be a literal.
				2307	match = Search(r'snprintf\s\(([^,]),\s([0-9])\s*,', line)
				2308	if match:
				2309	error(filename, linenum, 'runtime/printf', 3,
				2310	'If you can, use sizeof(%s) instead of %s as the 2nd arg '
				2311	'to snprintf.' % (match.group(1), match.group(2)))
				2312
				2313	# Check if some verboten C functions are being used.
				2314	if Search(r'\bsprintf\b', line):
				2315	error(filename, linenum, 'runtime/printf', 5,
				2316	'Never use sprintf. Use snprintf instead.')
				2317	match = Search(r'\b(strcpy\|strcat)\b', line)
				2318	if match:
				2319	error(filename, linenum, 'runtime/printf', 4,
				2320	'Almost always, snprintf is better than %s' % match.group(1))
				2321
				2322	if Search(r'\bsscanf\b', line):
				2323	error(filename, linenum, 'runtime/printf', 1,
				2324	'sscanf can be ok, but is slow and can overflow buffers.')
				2325
				2326	# Check for suspicious usage of "if" like
				2327	# } if (a == b) {
				2328	if Search(r'\}\sif\s\(', line):
				2329	error(filename, linenum, 'readability/braces', 4,
				2330	'Did you mean "else if"? If not, start a new line for "if".')
				2331
				2332	# Check for potential format string bugs like printf(foo).
				2333	# We constrain the pattern not to pick things like DocidForPrintf(foo).
				2334	# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
				2335	match = re.search(r'\b((?:string)?printf)\s*$([\w.\->()]+)$', line, re.I)
				2336	if match:
				2337	error(filename, linenum, 'runtime/printf', 4,
				2338	'Potential format string bug. Do %s("%%s", %s) instead.'
				2339	% (match.group(1), match.group(2)))
				2340
				2341	# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
				2342	match = Search(r'memset\s$([^,]),\s([^,]),\s0\s$', line)
				2343	if match and not Match(r"^''\|-?[0-9]+\|0x[0-9A-Fa-f]$", match.group(2)):
				2344	error(filename, linenum, 'runtime/memset', 4,
				2345	'Did you mean "memset(%s, 0, %s)"?'
				2346	% (match.group(1), match.group(2)))
				2347
				2348	if Search(r'\busing namespace\b', line):
				2349	error(filename, linenum, 'build/namespaces', 5,
				2350	'Do not use namespace using-directives. '
				2351	'Use using-declarations instead.')
				2352
				2353	# Detect variable-length arrays.
				2354	match = Match(r'\s(.+::)?(\w+) [a-z]\w\[(.+)];', line)
				2355	if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
				2356	match.group(3).find(']') == -1):
				2357	# Split the size using space and arithmetic operators as delimiters.
				2358	# If any of the resulting tokens are not compile time constants then
				2359	# report the error.
				2360	tokens = re.split(r'\s\|\+\|\-\|\*\|\/\|<<\|>>]', match.group(3))
				2361	is_const = True
				2362	skip_next = False
				2363	for tok in tokens:
				2364	if skip_next:
				2365	skip_next = False
				2366	continue
				2367
				2368	if Search(r'sizeof$.+$', tok): continue
				2369	if Search(r'arraysize$\w+$', tok): continue
				2370
				2371	tok = tok.lstrip('(')
				2372	tok = tok.rstrip(')')
				2373	if not tok: continue
				2374	if Match(r'\d+', tok): continue
				2375	if Match(r'0[xX][0-9a-fA-F]+', tok): continue
				2376	if Match(r'k[A-Z0-9]\w*', tok): continue
				2377	if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
				2378	if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
				2379	# A catch all for tricky sizeof cases, including 'sizeof expression',
				2380	# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
				2381	# requires skipping the next token becasue we split on ' ' and '*'.
				2382	if tok.startswith('sizeof'):
				2383	skip_next = True
				2384	continue
				2385	is_const = False
				2386	break
				2387	if not is_const:
				2388	error(filename, linenum, 'runtime/arrays', 1,
				2389	'Do not use variable-length arrays. Use an appropriately named '
				2390	"('k' followed by CamelCase) compile-time constant for the size.")
				2391
				2392	# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
				2393	# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
				2394	# in the class declaration.
				2395	match = Match(
				2396	(r'\s*'
				2397	r'(DISALLOW_(EVIL_CONSTRUCTORS\|COPY_AND_ASSIGN\|IMPLICIT_CONSTRUCTORS))'
				2398	r'$.*$;$'),
				2399	line)
				2400	if match and linenum + 1 < clean_lines.NumLines():
				2401	next_line = clean_lines.elided[linenum + 1]
				2402	if not Search(r'^\s*};', next_line):
				2403	error(filename, linenum, 'readability/constructors', 3,
				2404	match.group(1) + ' should be the last thing in the class')
				2405
				2406	# Check for use of unnamed namespaces in header files. Registration
				2407	# macros are typically OK, so we allow use of "namespace {" on lines
				2408	# that end with backslashes.
				2409	if (file_extension == 'h'
				2410	and Search(r'\bnamespace\s*{', line)
				2411	and line[-1] != '\\'):
				2412	error(filename, linenum, 'build/namespaces', 4,
				2413	'Do not use unnamed namespaces in header files. See '
				2414	'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
				2415	' for more information.')
				2416
				2417
				2418	def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
				2419	error):
				2420	"""Checks for a C-style cast by looking for the pattern.
				2421
				2422	This also handles sizeof(type) warnings, due to similarity of content.
				2423
				2424	Args:
				2425	filename: The name of the current file.
				2426	linenum: The number of the line to check.
				2427	line: The line of code to check.
				2428	raw_line: The raw line of code to check, with comments.
				2429	cast_type: The string for the C++ cast to recommend. This is either
				2430	reinterpret_cast or static_cast, depending.
				2431	pattern: The regular expression used to find C-style casts.
				2432	error: The function to call with any errors found.
				2433	"""
				2434	match = Search(pattern, line)
				2435	if not match:
				2436	return
				2437
				2438	# e.g., sizeof(int)
				2439	sizeof_match = Match(r'.sizeof\s$', line[0:match.start(1) - 1])
				2440	if sizeof_match:
				2441	error(filename, linenum, 'runtime/sizeof', 1,
				2442	'Using sizeof(type). Use sizeof(varname) instead if possible')
				2443	return
				2444
				2445	remainder = line[match.end(0):]
				2446
				2447	# The close paren is for function pointers as arguments to a function.
				2448	# eg, void foo(void (*bar)(int));
				2449	# The semicolon check is a more basic function check; also possibly a
				2450	# function pointer typedef.
				2451	# eg, void foo(int); or void foo(int) const;
				2452	# The equals check is for function pointer assignment.
				2453	# eg, void (foo)(int) = ...
				2454	#
				2455	# Right now, this will only catch cases where there's a single argument, and
				2456	# it's unnamed. It should probably be expanded to check for multiple
				2457	# arguments with some unnamed.
				2458	function_match = Match(r'\s(\)\|=\|(const)?\s(;\|\{\|throw))', remainder)
				2459	if function_match:
				2460	if (not function_match.group(3) or
				2461	function_match.group(3) == ';' or
				2462	raw_line.find('/*') < 0):
				2463	error(filename, linenum, 'readability/function', 3,
				2464	'All parameters should be named in a function')
				2465	return
				2466
				2467	# At this point, all that should be left is actual casts.
				2468	error(filename, linenum, 'readability/casting', 4,
				2469	'Using C-style cast. Use %s<%s>(...) instead' %
				2470	(cast_type, match.group(1)))
				2471
				2472
				2473	_HEADERS_CONTAINING_TEMPLATES = (
				2474	('<deque>', ('deque',)),
				2475	('<functional>', ('unary_function', 'binary_function',
				2476	'plus', 'minus', 'multiplies', 'divides', 'modulus',
				2477	'negate',
				2478	'equal_to', 'not_equal_to', 'greater', 'less',
				2479	'greater_equal', 'less_equal',
				2480	'logical_and', 'logical_or', 'logical_not',
				2481	'unary_negate', 'not1', 'binary_negate', 'not2',
				2482	'bind1st', 'bind2nd',
				2483	'pointer_to_unary_function',
				2484	'pointer_to_binary_function',
				2485	'ptr_fun',
				2486	'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
				2487	'mem_fun_ref_t',
				2488	'const_mem_fun_t', 'const_mem_fun1_t',
				2489	'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
				2490	'mem_fun_ref',
				2491	)),
				2492	('<limits>', ('numeric_limits',)),
				2493	('<list>', ('list',)),
				2494	('<map>', ('map', 'multimap',)),
				2495	('<memory>', ('allocator',)),
				2496	('<queue>', ('queue', 'priority_queue',)),
				2497	('<set>', ('set', 'multiset',)),
				2498	('<stack>', ('stack',)),
				2499	('<string>', ('char_traits', 'basic_string',)),
				2500	('<utility>', ('pair',)),
				2501	('<vector>', ('vector',)),
				2502
				2503	# gcc extensions.
				2504	# Note: std::hash is their hash, ::hash is our hash
				2505	('<hash_map>', ('hash_map', 'hash_multimap',)),
				2506	('<hash_set>', ('hash_set', 'hash_multiset',)),
				2507	('<slist>', ('slist',)),
				2508	)
				2509
				2510	_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
				2511	# We can trust with reasonable confidence that map gives us pair<>, too.
				2512	'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
				2513	}
				2514
				2515	_RE_PATTERN_STRING = re.compile(r'\bstring\b')
				2516
				2517	_re_pattern_algorithm_header = []
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2518	for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
				2519	'transform'):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2520	# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
				2521	# type::max().
				2522	_re_pattern_algorithm_header.append(
				2523	(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?$[^$]'),
				2524	_template,
				2525	'<algorithm>'))
				2526
				2527	_re_pattern_templates = []
				2528	for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
				2529	for _template in _templates:
				2530	_re_pattern_templates.append(
				2531	(re.compile(r'(\<\|\b)' + _template + r'\s*\<'),
				2532	_template + '<>',
				2533	_header))
				2534
				2535
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2536	def FilesBelongToSameModule(filename_cc, filename_h):
				2537	"""Check if these two filenames belong to the same module.
				2538
				2539	The concept of a 'module' here is a as follows:
				2540	foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
				2541	same 'module' if they are in the same directory.
				2542	some/path/public/xyzzy and some/path/internal/xyzzy are also considered
				2543	to belong to the same module here.
				2544
				2545	If the filename_cc contains a longer path than the filename_h, for example,
				2546	'/absolute/path/to/base/sysinfo.cc', and this file would include
				2547	'base/sysinfo.h', this function also produces the prefix needed to open the
				2548	header. This is used by the caller of this function to more robustly open the
				2549	header file. We don't have access to the real include paths in this context,
				2550	so we need this guesswork here.
				2551
				2552	Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
				2553	according to this implementation. Because of this, this function gives
				2554	some false positives. This should be sufficiently rare in practice.
				2555
				2556	Args:
				2557	filename_cc: is the path for the .cc file
				2558	filename_h: is the path for the header path
				2559
				2560	Returns:
				2561	Tuple with a bool and a string:
				2562	bool: True if filename_cc and filename_h belong to the same module.
				2563	string: the additional prefix needed to open the header file.
				2564	"""
				2565
				2566	if not filename_cc.endswith('.cc'):
				2567	return (False, '')
				2568	filename_cc = filename_cc[:-len('.cc')]
				2569	if filename_cc.endswith('_unittest'):
				2570	filename_cc = filename_cc[:-len('_unittest')]
				2571	elif filename_cc.endswith('_test'):
				2572	filename_cc = filename_cc[:-len('_test')]
				2573	filename_cc = filename_cc.replace('/public/', '/')
				2574	filename_cc = filename_cc.replace('/internal/', '/')
				2575
				2576	if not filename_h.endswith('.h'):
				2577	return (False, '')
				2578	filename_h = filename_h[:-len('.h')]
				2579	if filename_h.endswith('-inl'):
				2580	filename_h = filename_h[:-len('-inl')]
				2581	filename_h = filename_h.replace('/public/', '/')
				2582	filename_h = filename_h.replace('/internal/', '/')
				2583
				2584	files_belong_to_same_module = filename_cc.endswith(filename_h)
				2585	common_path = ''
				2586	if files_belong_to_same_module:
				2587	common_path = filename_cc[:-len(filename_h)]
				2588	return files_belong_to_same_module, common_path
				2589
				2590
				2591	def UpdateIncludeState(filename, include_state, io=codecs):
				2592	"""Fill up the include_state with new includes found from the file.
				2593
				2594	Args:
				2595	filename: the name of the header to read.
				2596	include_state: an _IncludeState instance in which the headers are inserted.
				2597	io: The io factory to use to read the file. Provided for testability.
				2598
				2599	Returns:
				2600	True if a header was succesfully added. False otherwise.
				2601	"""
				2602	headerfile = None
				2603	try:
				2604	headerfile = io.open(filename, 'r', 'utf8', 'replace')
				2605	except IOError:
				2606	return False
				2607	linenum = 0
				2608	for line in headerfile:
				2609	linenum += 1
				2610	clean_line = CleanseComments(line)
				2611	match = _RE_PATTERN_INCLUDE.search(clean_line)
				2612	if match:
				2613	include = match.group(2)
				2614	# The value formatting is cute, but not really used right now.
				2615	# What matters here is that the key is in include_state.
				2616	include_state.setdefault(include, '%s:%d' % (filename, linenum))
				2617	return True
				2618
				2619
				2620	def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
				2621	io=codecs):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2622	"""Reports for missing stl includes.
				2623
				2624	This function will output warnings to make sure you are including the headers
				2625	necessary for the stl containers and functions that you use. We only give one
				2626	reason to include a header. For example, if you use both equal_to<> and
				2627	less<> in a .h file, only one (the latter in the file) of these will be
				2628	reported as a reason to include the <functional>.
				2629
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2630	Args:
				2631	filename: The name of the current file.
				2632	clean_lines: A CleansedLines instance containing the file.
				2633	include_state: An _IncludeState instance.
				2634	error: The function to call with any errors found.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2635	io: The IO factory to use to read the header file. Provided for unittest
				2636	injection.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2637	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2638	required = {} # A map of header name to linenumber and the template entity.
				2639	# Example of required: { '<functional>': (1219, 'less<>') }
				2640
				2641	for linenum in xrange(clean_lines.NumLines()):
				2642	line = clean_lines.elided[linenum]
				2643	if not line or line[0] == '#':
				2644	continue
				2645
				2646	# String is special -- it is a non-templatized type in STL.
				2647	if _RE_PATTERN_STRING.search(line):
				2648	required['<string>'] = (linenum, 'string')
				2649
				2650	for pattern, template, header in _re_pattern_algorithm_header:
				2651	if pattern.search(line):
				2652	required[header] = (linenum, template)
				2653
				2654	# The following function is just a speed up, no semantics are changed.
				2655	if not '<' in line: # Reduces the cpu time usage by skipping lines.
				2656	continue
				2657
				2658	for pattern, template, header in _re_pattern_templates:
				2659	if pattern.search(line):
				2660	required[header] = (linenum, template)
				2661
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2662	# The policy is that if you #include something in foo.h you don't need to
				2663	# include it again in foo.cc. Here, we will look at possible includes.
				2664	# Let's copy the include_state so it is only messed up within this function.
				2665	include_state = include_state.copy()
				2666
				2667	# Did we find the header for this file (if any) and succesfully load it?
				2668	header_found = False
				2669
				2670	# Use the absolute path so that matching works properly.
				2671	abs_filename = os.path.abspath(filename)
				2672
				2673	# For Emacs's flymake.
				2674	# If cpplint is invoked from Emacs's flymake, a temporary file is generated
				2675	# by flymake and that file name might end with '_flymake.cc'. In that case,
				2676	# restore original file name here so that the corresponding header file can be
				2677	# found.
				2678	# e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
				2679	# instead of 'foo_flymake.h'
				2680	emacs_flymake_suffix = '_flymake.cc'
				2681	if abs_filename.endswith(emacs_flymake_suffix):
				2682	abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cc'
				2683
				2684	# include_state is modified during iteration, so we iterate over a copy of
				2685	# the keys.
				2686	for header in include_state.keys(): #NOLINT
				2687	(same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
				2688	fullpath = common_path + header
				2689	if same_module and UpdateIncludeState(fullpath, include_state, io):
				2690	header_found = True
				2691
				2692	# If we can't find the header file for a .cc, assume it's because we don't
				2693	# know where to look. In that case we'll give up as we're not sure they
				2694	# didn't include it in the .h file.
				2695	# TODO(unknown): Do a better job of finding .h files so we are confident that
				2696	# not having the .h file means there isn't one.
				2697	if filename.endswith('.cc') and not header_found:
				2698	return
				2699
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2700	# All the lines have been processed, report the errors found.
				2701	for required_header_unstripped in required:
				2702	template = required[required_header_unstripped][1]
				2703	if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
				2704	headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
				2705	if [True for header in headers if header in include_state]:
				2706	continue
				2707	if required_header_unstripped.strip('<>"') not in include_state:
				2708	error(filename, required[required_header_unstripped][0],
				2709	'build/include_what_you_use', 4,
				2710	'Add #include ' + required_header_unstripped + ' for ' + template)
				2711
				2712
				2713	def ProcessLine(filename, file_extension,
				2714	clean_lines, line, include_state, function_state,
				2715	class_state, error):
				2716	"""Processes a single line in the file.
				2717
				2718	Args:
				2719	filename: Filename of the file that is being processed.
				2720	file_extension: The extension (dot not included) of the file.
				2721	clean_lines: An array of strings, each representing a line of the file,
				2722	with comments stripped.
				2723	line: Number of line being processed.
				2724	include_state: An _IncludeState instance in which the headers are inserted.
				2725	function_state: A _FunctionState instance which counts function lines, etc.
				2726	class_state: A _ClassState instance which maintains information about
				2727	the current stack of nested class declarations being parsed.
				2728	error: A callable to which errors are reported, which takes 4 arguments:
				2729	filename, line number, error level, and message
				2730
				2731	"""
				2732	raw_lines = clean_lines.raw_lines
				2733	CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
				2734	if Search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines
				2735	return
				2736	CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
				2737	CheckStyle(filename, clean_lines, line, file_extension, error)
				2738	CheckLanguage(filename, clean_lines, line, file_extension, include_state,
				2739	error)
				2740	CheckForNonStandardConstructs(filename, clean_lines, line,
				2741	class_state, error)
				2742	CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	2743	CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2744
				2745
				2746	def ProcessFileData(filename, file_extension, lines, error):
				2747	"""Performs lint checks and reports any errors to the given error function.
				2748
				2749	Args:
				2750	filename: Filename of the file that is being processed.
				2751	file_extension: The extension (dot not included) of the file.
				2752	lines: An array of strings, each representing a line of the file, with the
				2753	last element being empty if the file is termined with a newline.
				2754	error: A callable to which errors are reported, which takes 4 arguments:
				2755	"""
				2756	lines = (['// marker so line numbers and indices both start at 1'] + lines +
				2757	['// marker so line numbers end in a known way'])
				2758
				2759	include_state = _IncludeState()
				2760	function_state = _FunctionState()
				2761	class_state = _ClassState()
				2762
				2763	CheckForCopyright(filename, lines, error)
				2764
				2765	if file_extension == 'h':
				2766	CheckForHeaderGuard(filename, lines, error)
				2767
				2768	RemoveMultiLineComments(filename, lines, error)
				2769	clean_lines = CleansedLines(lines)
				2770	for line in xrange(clean_lines.NumLines()):
				2771	ProcessLine(filename, file_extension, clean_lines, line,
				2772	include_state, function_state, class_state, error)
				2773	class_state.CheckFinished(filename, error)
				2774
				2775	CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
				2776
				2777	# We check here rather than inside ProcessLine so that we see raw
				2778	# lines rather than "cleaned" lines.
				2779	CheckForUnicodeReplacementCharacters(filename, lines, error)
				2780
				2781	CheckForNewlineAtEOF(filename, lines, error)
				2782
				2783
				2784	def ProcessFile(filename, vlevel):
				2785	"""Does google-lint on a single file.
				2786
				2787	Args:
				2788	filename: The name of the file to parse.
				2789
				2790	vlevel: The level of errors to report. Every error of confidence
				2791	>= verbose_level will be reported. 0 is a good default.
				2792	"""
				2793
				2794	_SetVerboseLevel(vlevel)
				2795
				2796	try:
				2797	# Support the UNIX convention of using "-" for stdin. Note that
				2798	# we are not opening the file with universal newline support
				2799	# (which codecs doesn't support anyway), so the resulting lines do
				2800	# contain trailing '\r' characters if we are reading a file that
				2801	# has CRLF endings.
				2802	# If after the split a trailing '\r' is present, it is removed
				2803	# below. If it is not expected to be present (i.e. os.linesep !=
				2804	# '\r\n' as in Windows), a warning is issued below if this file
				2805	# is processed.
				2806
				2807	if filename == '-':
				2808	lines = codecs.StreamReaderWriter(sys.stdin,
				2809	codecs.getreader('utf8'),
				2810	codecs.getwriter('utf8'),
				2811	'replace').read().split('\n')
				2812	else:
				2813	lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
				2814
				2815	carriage_return_found = False
				2816	# Remove trailing '\r'.
				2817	for linenum in range(len(lines)):
				2818	if lines[linenum].endswith('\r'):
				2819	lines[linenum] = lines[linenum].rstrip('\r')
				2820	carriage_return_found = True
				2821
				2822	except IOError:
				2823	sys.stderr.write(
				2824	"Skipping input '%s': Can't open for reading\n" % filename)
				2825	return
				2826
				2827	# Note, if no dot is found, this will give the entire filename as the ext.
				2828	file_extension = filename[filename.rfind('.') + 1:]
				2829
				2830	# When reading from stdin, the extension is unknown, so no cpplint tests
				2831	# should rely on the extension.
				2832	if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
				2833	and file_extension != 'cpp'):
				2834	sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
				2835	else:
				2836	ProcessFileData(filename, file_extension, lines, Error)
				2837	if carriage_return_found and os.linesep != '\r\n':
				2838	# Use 0 for linenum since outputing only one error for potentially
				2839	# several lines.
				2840	Error(filename, 0, 'whitespace/newline', 1,
				2841	'One or more unexpected \\r (^M) found;'
				2842	'better to use only a \\n')
				2843
				2844	sys.stderr.write('Done processing %s\n' % filename)
				2845
				2846
				2847	def PrintUsage(message):
				2848	"""Prints a brief usage string and exits, optionally with an error message.
				2849
				2850	Args:
				2851	message: The optional error message.
				2852	"""
				2853	sys.stderr.write(_USAGE)
				2854	if message:
				2855	sys.exit('\nFATAL ERROR: ' + message)
				2856	else:
				2857	sys.exit(1)
				2858
				2859
				2860	def PrintCategories():
				2861	"""Prints a list of all the error-categories used by error messages.
				2862
				2863	These are the categories used to filter messages via --filter.
				2864	"""
				2865	sys.stderr.write(_ERROR_CATEGORIES)
				2866	sys.exit(0)
				2867
				2868
				2869	def ParseArguments(args):
				2870	"""Parses the command line arguments.
				2871
				2872	This may set the output format and verbosity level as side-effects.
				2873
				2874	Args:
				2875	args: The command line arguments:
				2876
				2877	Returns:
				2878	The list of filenames to lint.
				2879	"""
				2880	try:
				2881	(opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
				2882	'filter='])
				2883	except getopt.GetoptError:
				2884	PrintUsage('Invalid arguments.')
				2885
				2886	verbosity = _VerboseLevel()
				2887	output_format = _OutputFormat()
				2888	filters = ''
				2889
				2890	for (opt, val) in opts:
				2891	if opt == '--help':
				2892	PrintUsage(None)
				2893	elif opt == '--output':
				2894	if not val in ('emacs', 'vs7'):
				2895	PrintUsage('The only allowed output formats are emacs and vs7.')
				2896	output_format = val
				2897	elif opt == '--verbose':
				2898	verbosity = int(val)
				2899	elif opt == '--filter':
				2900	filters = val
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2901	if not filters:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2902	PrintCategories()
				2903
				2904	if not filenames:
				2905	PrintUsage('No files were specified.')
				2906
				2907	_SetOutputFormat(output_format)
				2908	_SetVerboseLevel(verbosity)
				2909	_SetFilters(filters)
				2910
				2911	return filenames
				2912
				2913
				2914	def main():
				2915	filenames = ParseArguments(sys.argv[1:])
				2916
				2917	# Change stderr to write with replacement characters so we don't die
				2918	# if we try to print something containing non-ASCII characters.
				2919	sys.stderr = codecs.StreamReaderWriter(sys.stderr,
				2920	codecs.getreader('utf8'),
				2921	codecs.getwriter('utf8'),
				2922	'replace')
				2923
				2924	_cpplint_state.ResetErrorCount()
				2925	for filename in filenames:
				2926	ProcessFile(filename, _cpplint_state.verbose_level)
				2927	sys.stderr.write('Total errors found: %d\n' % _cpplint_state.error_count)
				2928	sys.exit(_cpplint_state.error_count > 0)
				2929
				2930
				2931	if __name__ == '__main__':
				2932	main()