Blame - cpplint/cpplint.py - platform/external/google-styleguide

blob: 70912e28cb84ecb501d7c60dace428abae451be8 [file] [log] [blame]

erg@google.com	720121a	2012-05-11 16:31:47 +0000	[diff] [blame]	1	#!/usr/bin/python
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2	#
erg@google.com	8f91ab2	2011-09-06 21:04:45 +0000	[diff] [blame]	3	# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	8	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	18	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	30
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	31	"""Does google-lint on c++ files.
				32
				33	The goal of this script is to identify places in the code that may
				34	be in non-compliance with google style. It does not attempt to fix
				35	up these problems -- the point is to educate. It does also not
				36	attempt to find all problems, or to ensure that everything it does
				37	find is legitimately a problem.
				38
				39	In particular, we can get very confused by /* and // inside strings!
				40	We do a small hack, which is to ignore //'s with "'s after them on the
				41	same line, but it is far from perfect (in either direction).
				42	"""
				43
				44	import codecs
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	45	import copy
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	46	import getopt
				47	import math # for log
				48	import os
				49	import re
				50	import sre_compile
				51	import string
				52	import sys
				53	import unicodedata
				54
				55
				56	_USAGE = """
				57	Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	58	[--counting=total\|toplevel\|detailed]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	59	<file> [file] ...
				60
				61	The style guidelines this tries to follow are those in
				62	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
				63
				64	Every problem is given a confidence score from 1-5, with 5 meaning we are
				65	certain of the problem, and 1 meaning it could be a legitimate construct.
				66	This will miss some errors, and is not a substitute for a code review.
				67
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	68	To suppress false-positive errors of a certain category, add a
				69	'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
				70	suppresses errors of all categories on that line.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	71
				72	The files passed in will be linted; at least one file must be provided.
				73	Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
				74
				75	Flags:
				76
				77	output=vs7
				78	By default, the output is formatted to ease emacs parsing. Visual Studio
				79	compatible output (vs7) may also be used. Other formats are unsupported.
				80
				81	verbose=#
				82	Specify a number 0-5 to restrict errors to certain verbosity levels.
				83
				84	filter=-x,+y,...
				85	Specify a comma-separated list of category-filters to apply: only
				86	error messages whose category names pass the filters will be printed.
				87	(Category names are printed with the message and look like
				88	"[whitespace/indent]".) Filters are evaluated left to right.
				89	"-FOO" and "FOO" means "do not print categories that start with FOO".
				90	"+FOO" means "do print categories that start with FOO".
				91
				92	Examples: --filter=-whitespace,+whitespace/braces
				93	--filter=whitespace,runtime/printf,+runtime/printf_format
				94	--filter=-,+build/include_what_you_use
				95
				96	To see a list of all the categories used in cpplint, pass no arg:
				97	--filter=
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	98
				99	counting=total\|toplevel\|detailed
				100	The total number of errors found is always printed. If
				101	'toplevel' is provided, then the count of errors in each of
				102	the top-level categories like 'build' and 'whitespace' will
				103	also be printed. If 'detailed' is provided, then a count
				104	is provided for each category like 'build/class'.
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	105
				106	root=subdir
				107	The root directory used for deriving header guard CPP variable.
				108	By default, the header guard CPP variable is calculated as the relative
				109	path to the directory that contains .git, .hg, or .svn. When this flag
				110	is specified, the relative path is calculated from the specified
				111	directory. If the specified directory does not exist, this flag is
				112	ignored.
				113
				114	Examples:
				115	Assuing that src/.git exists, the header guard CPP variables for
				116	src/chrome/browser/ui/browser.h are:
				117
				118	No flag => CHROME_BROWSER_UI_BROWSER_H_
				119	--root=chrome => BROWSER_UI_BROWSER_H_
				120	--root=chrome/browser => UI_BROWSER_H_
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	121	"""
				122
				123	# We categorize each error message we print. Here are the categories.
				124	# We want an explicit list so we can list them all in cpplint --filter=.
				125	# If you add a new error message with a new category, add it to the list
				126	# here! cpplint_unittest.py should tell you if you forget to do this.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	127	_ERROR_CATEGORIES = [
				128	'build/class',
				129	'build/deprecated',
				130	'build/endif_comment',
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	131	'build/explicit_make_pair',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	132	'build/forward_decl',
				133	'build/header_guard',
				134	'build/include',
				135	'build/include_alpha',
				136	'build/include_order',
				137	'build/include_what_you_use',
				138	'build/namespaces',
				139	'build/printf_format',
				140	'build/storage_class',
				141	'legal/copyright',
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	142	'readability/alt_tokens',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	143	'readability/braces',
				144	'readability/casting',
				145	'readability/check',
				146	'readability/constructors',
				147	'readability/fn_size',
				148	'readability/function',
				149	'readability/multiline_comment',
				150	'readability/multiline_string',
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	151	'readability/namespace',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	152	'readability/nolint',
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	153	'readability/nul',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	154	'readability/streams',
				155	'readability/todo',
				156	'readability/utf8',
				157	'runtime/arrays',
				158	'runtime/casting',
				159	'runtime/explicit',
				160	'runtime/int',
				161	'runtime/init',
				162	'runtime/invalid_increment',
				163	'runtime/member_string_references',
				164	'runtime/memset',
				165	'runtime/operator',
				166	'runtime/printf',
				167	'runtime/printf_format',
				168	'runtime/references',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	169	'runtime/string',
				170	'runtime/threadsafe_fn',
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	171	'runtime/vlog',
				172	'whitespace/blank_line',
				173	'whitespace/braces',
				174	'whitespace/comma',
				175	'whitespace/comments',
				176	'whitespace/empty_conditional_body',
				177	'whitespace/empty_loop_body',
				178	'whitespace/end_of_line',
				179	'whitespace/ending_newline',
				180	'whitespace/forcolon',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	181	'whitespace/indent',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	182	'whitespace/line_length',
				183	'whitespace/newline',
				184	'whitespace/operators',
				185	'whitespace/parens',
				186	'whitespace/semicolon',
				187	'whitespace/tab',
				188	'whitespace/todo'
				189	]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	190
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	191	# The default state of the category filter. This is overrided by the --filter=
				192	# flag. By default all errors are on, so only add here categories that should be
				193	# off by default (i.e., categories that must be enabled by the --filter= flags).
				194	# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	195	_DEFAULT_FILTERS = ['-build/include_alpha']
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	196
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	197	# We used to check for high-bit characters, but after much discussion we
				198	# decided those were OK, as long as they were in UTF-8 and didn't represent
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	199	# hard-coded international strings, which belong in a separate i18n file.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	200
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	201
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	202	# C++ headers
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	203	_CPP_HEADERS = frozenset([
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	204	# Legacy
				205	'algobase.h',
				206	'algo.h',
				207	'alloc.h',
				208	'builtinbuf.h',
				209	'bvector.h',
				210	'complex.h',
				211	'defalloc.h',
				212	'deque.h',
				213	'editbuf.h',
				214	'fstream.h',
				215	'function.h',
				216	'hash_map',
				217	'hash_map.h',
				218	'hash_set',
				219	'hash_set.h',
				220	'hashtable.h',
				221	'heap.h',
				222	'indstream.h',
				223	'iomanip.h',
				224	'iostream.h',
				225	'istream.h',
				226	'iterator.h',
				227	'list.h',
				228	'map.h',
				229	'multimap.h',
				230	'multiset.h',
				231	'ostream.h',
				232	'pair.h',
				233	'parsestream.h',
				234	'pfstream.h',
				235	'procbuf.h',
				236	'pthread_alloc',
				237	'pthread_alloc.h',
				238	'rope',
				239	'rope.h',
				240	'ropeimpl.h',
				241	'set.h',
				242	'slist',
				243	'slist.h',
				244	'stack.h',
				245	'stdiostream.h',
				246	'stl_alloc.h',
				247	'stl_relops.h',
				248	'streambuf.h',
				249	'stream.h',
				250	'strfile.h',
				251	'strstream.h',
				252	'tempbuf.h',
				253	'tree.h',
				254	'type_traits.h',
				255	'vector.h',
				256	# 17.6.1.2 C++ library headers
				257	'algorithm',
				258	'array',
				259	'atomic',
				260	'bitset',
				261	'chrono',
				262	'codecvt',
				263	'complex',
				264	'condition_variable',
				265	'deque',
				266	'exception',
				267	'forward_list',
				268	'fstream',
				269	'functional',
				270	'future',
				271	'initializer_list',
				272	'iomanip',
				273	'ios',
				274	'iosfwd',
				275	'iostream',
				276	'istream',
				277	'iterator',
				278	'limits',
				279	'list',
				280	'locale',
				281	'map',
				282	'memory',
				283	'mutex',
				284	'new',
				285	'numeric',
				286	'ostream',
				287	'queue',
				288	'random',
				289	'ratio',
				290	'regex',
				291	'set',
				292	'sstream',
				293	'stack',
				294	'stdexcept',
				295	'streambuf',
				296	'string',
				297	'strstream',
				298	'system_error',
				299	'thread',
				300	'tuple',
				301	'typeindex',
				302	'typeinfo',
				303	'type_traits',
				304	'unordered_map',
				305	'unordered_set',
				306	'utility',
erg@google.com	5d00c56	2013-07-12 19:57:05 +0000	[diff] [blame]	307	'valarray',
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	308	'vector',
				309	# 17.6.1.2 C++ headers for C library facilities
				310	'cassert',
				311	'ccomplex',
				312	'cctype',
				313	'cerrno',
				314	'cfenv',
				315	'cfloat',
				316	'cinttypes',
				317	'ciso646',
				318	'climits',
				319	'clocale',
				320	'cmath',
				321	'csetjmp',
				322	'csignal',
				323	'cstdalign',
				324	'cstdarg',
				325	'cstdbool',
				326	'cstddef',
				327	'cstdint',
				328	'cstdio',
				329	'cstdlib',
				330	'cstring',
				331	'ctgmath',
				332	'ctime',
				333	'cuchar',
				334	'cwchar',
				335	'cwctype',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	336	])
				337
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	338	# Assertion macros. These are defined in base/logging.h and
				339	# testing/base/gunit.h. Note that the _M versions need to come first
				340	# for substring matching to work.
				341	_CHECK_MACROS = [
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	342	'DCHECK', 'CHECK',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	343	'EXPECT_TRUE_M', 'EXPECT_TRUE',
				344	'ASSERT_TRUE_M', 'ASSERT_TRUE',
				345	'EXPECT_FALSE_M', 'EXPECT_FALSE',
				346	'ASSERT_FALSE_M', 'ASSERT_FALSE',
				347	]
				348
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	349	# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	350	_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
				351
				352	for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
				353	('>=', 'GE'), ('>', 'GT'),
				354	('<=', 'LE'), ('<', 'LT')]:
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	355	_CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	356	_CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
				357	_CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
				358	_CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
				359	_CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
				360	_CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
				361
				362	for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
				363	('>=', 'LT'), ('>', 'LE'),
				364	('<=', 'GT'), ('<', 'GE')]:
				365	_CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
				366	_CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
				367	_CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
				368	_CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
				369
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	370	# Alternative tokens and their replacements. For full list, see section 2.5
				371	# Alternative tokens [lex.digraph] in the C++ standard.
				372	#
				373	# Digraphs (such as '%:') are not included here since it's a mess to
				374	# match those on a word boundary.
				375	_ALT_TOKEN_REPLACEMENT = {
				376	'and': '&&',
				377	'bitor': '\|',
				378	'or': '\|\|',
				379	'xor': '^',
				380	'compl': '~',
				381	'bitand': '&',
				382	'and_eq': '&=',
				383	'or_eq': '\|=',
				384	'xor_eq': '^=',
				385	'not': '!',
				386	'not_eq': '!='
				387	}
				388
				389	# Compile regular expression that matches all the above keywords. The "[ =()]"
				390	# bit is meant to avoid matching these keywords outside of boolean expressions.
				391	#
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	392	# False positives include C-style multi-line comments and multi-line strings
				393	# but those have always been troublesome for cpplint.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	394	_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
				395	r'[ =()](' + ('\|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]\|$)')
				396
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	397
				398	# These constants define types of headers for use with
				399	# _IncludeState.CheckNextIncludeOrder().
				400	_C_SYS_HEADER = 1
				401	_CPP_SYS_HEADER = 2
				402	_LIKELY_MY_HEADER = 3
				403	_POSSIBLE_MY_HEADER = 4
				404	_OTHER_HEADER = 5
				405
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	406	# These constants define the current inline assembly state
				407	_NO_ASM = 0 # Outside of inline assembly block
				408	_INSIDE_ASM = 1 # Inside inline assembly block
				409	_END_ASM = 2 # Last line of inline assembly block
				410	_BLOCK_ASM = 3 # The whole block is an inline assembly block
				411
				412	# Match start of assembly blocks
				413	_MATCH_ASM = re.compile(r'^\s*(?:asm\|_asm\|__asm\|__asm__)'
				414	r'(?:\s+(volatile\|__volatile__))?'
				415	r'\s*[{(]')
				416
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	417
				418	_regexp_compile_cache = {}
				419
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	420	# Finds occurrences of NOLINT or NOLINT(...).
				421	_RE_SUPPRESSION = re.compile(r'\bNOLINT\b($[^)]*$)?')
				422
				423	# {str, set(int)}: a map from error categories to sets of linenumbers
				424	# on which those errors are expected and should be suppressed.
				425	_error_suppressions = {}
				426
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	427	# The root directory used for deriving header guard CPP variable.
				428	# This is set by --root flag.
				429	_root = None
				430
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	431	def ParseNolintSuppressions(filename, raw_line, linenum, error):
				432	"""Updates the global list of error-suppressions.
				433
				434	Parses any NOLINT comments on the current line, updating the global
				435	error_suppressions store. Reports an error if the NOLINT comment
				436	was malformed.
				437
				438	Args:
				439	filename: str, the name of the input file.
				440	raw_line: str, the line of input text, with comments.
				441	linenum: int, the number of the current line.
				442	error: function, an error handler.
				443	"""
				444	# FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	445	matched = _RE_SUPPRESSION.search(raw_line)
				446	if matched:
				447	category = matched.group(1)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	448	if category in (None, '(*)'): # => "suppress all"
				449	_error_suppressions.setdefault(None, set()).add(linenum)
				450	else:
				451	if category.startswith('(') and category.endswith(')'):
				452	category = category[1:-1]
				453	if category in _ERROR_CATEGORIES:
				454	_error_suppressions.setdefault(category, set()).add(linenum)
				455	else:
				456	error(filename, linenum, 'readability/nolint', 5,
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	457	'Unknown NOLINT error category: %s' % category)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	458
				459
				460	def ResetNolintSuppressions():
				461	"Resets the set of NOLINT suppressions to empty."
				462	_error_suppressions.clear()
				463
				464
				465	def IsErrorSuppressedByNolint(category, linenum):
				466	"""Returns true if the specified error category is suppressed on this line.
				467
				468	Consults the global error_suppressions map populated by
				469	ParseNolintSuppressions/ResetNolintSuppressions.
				470
				471	Args:
				472	category: str, the category of the error.
				473	linenum: int, the current line number.
				474	Returns:
				475	bool, True iff the error should be suppressed due to a NOLINT comment.
				476	"""
				477	return (linenum in _error_suppressions.get(category, set()) or
				478	linenum in _error_suppressions.get(None, set()))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	479
				480	def Match(pattern, s):
				481	"""Matches the string with the pattern, caching the compiled regexp."""
				482	# The regexp compilation caching is inlined in both Match and Search for
				483	# performance reasons; factoring it out into a separate function turns out
				484	# to be noticeably expensive.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	485	if pattern not in _regexp_compile_cache:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	486	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				487	return _regexp_compile_cache[pattern].match(s)
				488
				489
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	490	def ReplaceAll(pattern, rep, s):
				491	"""Replaces instances of pattern in a string with a replacement.
				492
				493	The compiled regex is kept in a cache shared by Match and Search.
				494
				495	Args:
				496	pattern: regex pattern
				497	rep: replacement text
				498	s: search string
				499
				500	Returns:
				501	string with replacements made (or original string if no replacements)
				502	"""
				503	if pattern not in _regexp_compile_cache:
				504	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				505	return _regexp_compile_cache[pattern].sub(rep, s)
				506
				507
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	508	def Search(pattern, s):
				509	"""Searches the string for the pattern, caching the compiled regexp."""
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	510	if pattern not in _regexp_compile_cache:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	511	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				512	return _regexp_compile_cache[pattern].search(s)
				513
				514
				515	class _IncludeState(dict):
				516	"""Tracks line numbers for includes, and the order in which includes appear.
				517
				518	As a dict, an _IncludeState object serves as a mapping between include
				519	filename and line number on which that file was included.
				520
				521	Call CheckNextIncludeOrder() once for each header in the file, passing
				522	in the type constants defined above. Calls in an illegal order will
				523	raise an _IncludeError with an appropriate error message.
				524
				525	"""
				526	# self._section will move monotonically through this set. If it ever
				527	# needs to move backwards, CheckNextIncludeOrder will raise an error.
				528	_INITIAL_SECTION = 0
				529	_MY_H_SECTION = 1
				530	_C_SECTION = 2
				531	_CPP_SECTION = 3
				532	_OTHER_H_SECTION = 4
				533
				534	_TYPE_NAMES = {
				535	_C_SYS_HEADER: 'C system header',
				536	_CPP_SYS_HEADER: 'C++ system header',
				537	_LIKELY_MY_HEADER: 'header this file implements',
				538	_POSSIBLE_MY_HEADER: 'header this file may implement',
				539	_OTHER_HEADER: 'other header',
				540	}
				541	_SECTION_NAMES = {
				542	_INITIAL_SECTION: "... nothing. (This can't be an error.)",
				543	_MY_H_SECTION: 'a header this file implements',
				544	_C_SECTION: 'C system header',
				545	_CPP_SECTION: 'C++ system header',
				546	_OTHER_H_SECTION: 'other header',
				547	}
				548
				549	def __init__(self):
				550	dict.__init__(self)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	551	self.ResetSection()
				552
				553	def ResetSection(self):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	554	# The name of the current section.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	555	self._section = self._INITIAL_SECTION
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	556	# The path of last found header.
				557	self._last_header = ''
				558
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	559	def SetLastHeader(self, header_path):
				560	self._last_header = header_path
				561
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	562	def CanonicalizeAlphabeticalOrder(self, header_path):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	563	"""Returns a path canonicalized for alphabetical comparison.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	564
				565	- replaces "-" with "_" so they both cmp the same.
				566	- removes '-inl' since we don't require them to be after the main header.
				567	- lowercase everything, just in case.
				568
				569	Args:
				570	header_path: Path to be canonicalized.
				571
				572	Returns:
				573	Canonicalized path.
				574	"""
				575	return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
				576
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	577	def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	578	"""Check if a header is in alphabetical order with the previous header.
				579
				580	Args:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	581	clean_lines: A CleansedLines instance containing the file.
				582	linenum: The number of the line to check.
				583	header_path: Canonicalized header to be checked.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	584
				585	Returns:
				586	Returns true if the header is in alphabetical order.
				587	"""
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	588	# If previous section is different from current section, _last_header will
				589	# be reset to empty string, so it's always less than current header.
				590	#
				591	# If previous line was a blank line, assume that the headers are
				592	# intentionally sorted the way they are.
				593	if (self._last_header > header_path and
				594	not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	595	return False
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	596	return True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	597
				598	def CheckNextIncludeOrder(self, header_type):
				599	"""Returns a non-empty error message if the next header is out of order.
				600
				601	This function also updates the internal state to be ready to check
				602	the next include.
				603
				604	Args:
				605	header_type: One of the _XXX_HEADER constants defined above.
				606
				607	Returns:
				608	The empty string if the header is in the right order, or an
				609	error message describing what's wrong.
				610
				611	"""
				612	error_message = ('Found %s after %s' %
				613	(self._TYPE_NAMES[header_type],
				614	self._SECTION_NAMES[self._section]))
				615
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	616	last_section = self._section
				617
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	618	if header_type == _C_SYS_HEADER:
				619	if self._section <= self._C_SECTION:
				620	self._section = self._C_SECTION
				621	else:
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	622	self._last_header = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	623	return error_message
				624	elif header_type == _CPP_SYS_HEADER:
				625	if self._section <= self._CPP_SECTION:
				626	self._section = self._CPP_SECTION
				627	else:
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	628	self._last_header = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	629	return error_message
				630	elif header_type == _LIKELY_MY_HEADER:
				631	if self._section <= self._MY_H_SECTION:
				632	self._section = self._MY_H_SECTION
				633	else:
				634	self._section = self._OTHER_H_SECTION
				635	elif header_type == _POSSIBLE_MY_HEADER:
				636	if self._section <= self._MY_H_SECTION:
				637	self._section = self._MY_H_SECTION
				638	else:
				639	# This will always be the fallback because we're not sure
				640	# enough that the header is associated with this file.
				641	self._section = self._OTHER_H_SECTION
				642	else:
				643	assert header_type == _OTHER_HEADER
				644	self._section = self._OTHER_H_SECTION
				645
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	646	if last_section != self._section:
				647	self._last_header = ''
				648
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	649	return ''
				650
				651
				652	class _CppLintState(object):
				653	"""Maintains module-wide state.."""
				654
				655	def __init__(self):
				656	self.verbose_level = 1 # global setting.
				657	self.error_count = 0 # global count of reported errors
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	658	# filters to apply when emitting error messages
				659	self.filters = _DEFAULT_FILTERS[:]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	660	self.counting = 'total' # In what way are we counting errors?
				661	self.errors_by_category = {} # string to int dict storing error counts
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	662
				663	# output format:
				664	# "emacs" - format that emacs can parse (default)
				665	# "vs7" - format that Microsoft Visual Studio 7 can parse
				666	self.output_format = 'emacs'
				667
				668	def SetOutputFormat(self, output_format):
				669	"""Sets the output format for errors."""
				670	self.output_format = output_format
				671
				672	def SetVerboseLevel(self, level):
				673	"""Sets the module's verbosity, and returns the previous setting."""
				674	last_verbose_level = self.verbose_level
				675	self.verbose_level = level
				676	return last_verbose_level
				677
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	678	def SetCountingStyle(self, counting_style):
				679	"""Sets the module's counting options."""
				680	self.counting = counting_style
				681
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	682	def SetFilters(self, filters):
				683	"""Sets the error-message filters.
				684
				685	These filters are applied when deciding whether to emit a given
				686	error message.
				687
				688	Args:
				689	filters: A string of comma-separated filters (eg "+whitespace/indent").
				690	Each filter should start with + or -; else we die.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	691
				692	Raises:
				693	ValueError: The comma-separated filters did not all start with '+' or '-'.
				694	E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	695	"""
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	696	# Default filters always have less priority than the flag ones.
				697	self.filters = _DEFAULT_FILTERS[:]
				698	for filt in filters.split(','):
				699	clean_filt = filt.strip()
				700	if clean_filt:
				701	self.filters.append(clean_filt)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	702	for filt in self.filters:
				703	if not (filt.startswith('+') or filt.startswith('-')):
				704	raise ValueError('Every filter in --filters must start with + or -'
				705	' (%s does not)' % filt)
				706
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	707	def ResetErrorCounts(self):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	708	"""Sets the module's error statistic back to zero."""
				709	self.error_count = 0
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	710	self.errors_by_category = {}
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	711
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	712	def IncrementErrorCount(self, category):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	713	"""Bumps the module's error statistic."""
				714	self.error_count += 1
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	715	if self.counting in ('toplevel', 'detailed'):
				716	if self.counting != 'detailed':
				717	category = category.split('/')[0]
				718	if category not in self.errors_by_category:
				719	self.errors_by_category[category] = 0
				720	self.errors_by_category[category] += 1
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	721
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	722	def PrintErrorCounts(self):
				723	"""Print a summary of errors by category, and the total."""
				724	for category, count in self.errors_by_category.iteritems():
				725	sys.stderr.write('Category \'%s\' errors found: %d\n' %
				726	(category, count))
				727	sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	728
				729	_cpplint_state = _CppLintState()
				730
				731
				732	def _OutputFormat():
				733	"""Gets the module's output format."""
				734	return _cpplint_state.output_format
				735
				736
				737	def _SetOutputFormat(output_format):
				738	"""Sets the module's output format."""
				739	_cpplint_state.SetOutputFormat(output_format)
				740
				741
				742	def _VerboseLevel():
				743	"""Returns the module's verbosity setting."""
				744	return _cpplint_state.verbose_level
				745
				746
				747	def _SetVerboseLevel(level):
				748	"""Sets the module's verbosity, and returns the previous setting."""
				749	return _cpplint_state.SetVerboseLevel(level)
				750
				751
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	752	def _SetCountingStyle(level):
				753	"""Sets the module's counting options."""
				754	_cpplint_state.SetCountingStyle(level)
				755
				756
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	757	def _Filters():
				758	"""Returns the module's list of output filters, as a list."""
				759	return _cpplint_state.filters
				760
				761
				762	def _SetFilters(filters):
				763	"""Sets the module's error-message filters.
				764
				765	These filters are applied when deciding whether to emit a given
				766	error message.
				767
				768	Args:
				769	filters: A string of comma-separated filters (eg "whitespace/indent").
				770	Each filter should start with + or -; else we die.
				771	"""
				772	_cpplint_state.SetFilters(filters)
				773
				774
				775	class _FunctionState(object):
				776	"""Tracks current function name and the number of lines in its body."""
				777
				778	_NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
				779	_TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
				780
				781	def __init__(self):
				782	self.in_a_function = False
				783	self.lines_in_function = 0
				784	self.current_function = ''
				785
				786	def Begin(self, function_name):
				787	"""Start analyzing function body.
				788
				789	Args:
				790	function_name: The name of the function being tracked.
				791	"""
				792	self.in_a_function = True
				793	self.lines_in_function = 0
				794	self.current_function = function_name
				795
				796	def Count(self):
				797	"""Count line in current function body."""
				798	if self.in_a_function:
				799	self.lines_in_function += 1
				800
				801	def Check(self, error, filename, linenum):
				802	"""Report if too many lines in function body.
				803
				804	Args:
				805	error: The function to call with any errors found.
				806	filename: The name of the current file.
				807	linenum: The number of the line to check.
				808	"""
				809	if Match(r'T(EST\|est)', self.current_function):
				810	base_trigger = self._TEST_TRIGGER
				811	else:
				812	base_trigger = self._NORMAL_TRIGGER
				813	trigger = base_trigger * 2**_VerboseLevel()
				814
				815	if self.lines_in_function > trigger:
				816	error_level = int(math.log(self.lines_in_function / base_trigger, 2))
				817	# 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
				818	if error_level > 5:
				819	error_level = 5
				820	error(filename, linenum, 'readability/fn_size', error_level,
				821	'Small and focused functions are preferred:'
				822	' %s has %d non-comment lines'
				823	' (error triggered by exceeding %d lines).' % (
				824	self.current_function, self.lines_in_function, trigger))
				825
				826	def End(self):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	827	"""Stop analyzing function body."""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	828	self.in_a_function = False
				829
				830
				831	class _IncludeError(Exception):
				832	"""Indicates a problem with the include order in a file."""
				833	pass
				834
				835
				836	class FileInfo:
				837	"""Provides utility functions for filenames.
				838
				839	FileInfo provides easy access to the components of a file's path
				840	relative to the project root.
				841	"""
				842
				843	def __init__(self, filename):
				844	self._filename = filename
				845
				846	def FullName(self):
				847	"""Make Windows paths like Unix."""
				848	return os.path.abspath(self._filename).replace('\\', '/')
				849
				850	def RepositoryName(self):
				851	"""FullName after removing the local path to the repository.
				852
				853	If we have a real absolute path name here we can try to do something smart:
				854	detecting the root of the checkout and truncating /path/to/checkout from
				855	the name so that we get header guards that don't include things like
				856	"C:\Documents and Settings\..." or "/home/username/..." in them and thus
				857	people on different computers who have checked the source out to different
				858	locations won't see bogus errors.
				859	"""
				860	fullname = self.FullName()
				861
				862	if os.path.exists(fullname):
				863	project_dir = os.path.dirname(fullname)
				864
				865	if os.path.exists(os.path.join(project_dir, ".svn")):
				866	# If there's a .svn file in the current directory, we recursively look
				867	# up the directory tree for the top of the SVN checkout
				868	root_dir = project_dir
				869	one_up_dir = os.path.dirname(root_dir)
				870	while os.path.exists(os.path.join(one_up_dir, ".svn")):
				871	root_dir = os.path.dirname(root_dir)
				872	one_up_dir = os.path.dirname(one_up_dir)
				873
				874	prefix = os.path.commonprefix([root_dir, project_dir])
				875	return fullname[len(prefix) + 1:]
				876
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	877	# Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
				878	# searching up from the current path.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	879	root_dir = os.path.dirname(fullname)
				880	while (root_dir != os.path.dirname(root_dir) and
erg@google.com	5e16969	2010-01-28 20:17:01 +0000	[diff] [blame]	881	not os.path.exists(os.path.join(root_dir, ".git")) and
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	882	not os.path.exists(os.path.join(root_dir, ".hg")) and
				883	not os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	884	root_dir = os.path.dirname(root_dir)
erg@google.com	42e59b0	2010-10-04 22:18:07 +0000	[diff] [blame]	885
				886	if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	887	os.path.exists(os.path.join(root_dir, ".hg")) or
				888	os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com	42e59b0	2010-10-04 22:18:07 +0000	[diff] [blame]	889	prefix = os.path.commonprefix([root_dir, project_dir])
				890	return fullname[len(prefix) + 1:]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	891
				892	# Don't know what to do; header guard warnings may be wrong...
				893	return fullname
				894
				895	def Split(self):
				896	"""Splits the file into the directory, basename, and extension.
				897
				898	For 'chrome/browser/browser.cc', Split() would
				899	return ('chrome/browser', 'browser', '.cc')
				900
				901	Returns:
				902	A tuple of (directory, basename, extension).
				903	"""
				904
				905	googlename = self.RepositoryName()
				906	project, rest = os.path.split(googlename)
				907	return (project,) + os.path.splitext(rest)
				908
				909	def BaseName(self):
				910	"""File base name - text after the final slash, before the final period."""
				911	return self.Split()[1]
				912
				913	def Extension(self):
				914	"""File extension - text following the final period."""
				915	return self.Split()[2]
				916
				917	def NoExtension(self):
				918	"""File has no source file extension."""
				919	return '/'.join(self.Split()[0:2])
				920
				921	def IsSource(self):
				922	"""File has a source file extension."""
				923	return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
				924
				925
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	926	def _ShouldPrintError(category, confidence, linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	927	"""If confidence >= verbose, category passes filter and is not suppressed."""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	928
				929	# There are three ways we might decide not to print an error message:
				930	# a "NOLINT(category)" comment appears in the source,
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	931	# the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	932	if IsErrorSuppressedByNolint(category, linenum):
				933	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	934	if confidence < _cpplint_state.verbose_level:
				935	return False
				936
				937	is_filtered = False
				938	for one_filter in _Filters():
				939	if one_filter.startswith('-'):
				940	if category.startswith(one_filter[1:]):
				941	is_filtered = True
				942	elif one_filter.startswith('+'):
				943	if category.startswith(one_filter[1:]):
				944	is_filtered = False
				945	else:
				946	assert False # should have been checked for in SetFilter.
				947	if is_filtered:
				948	return False
				949
				950	return True
				951
				952
				953	def Error(filename, linenum, category, confidence, message):
				954	"""Logs the fact we've found a lint error.
				955
				956	We log where the error was found, and also our confidence in the error,
				957	that is, how certain we are this is a legitimate style regression, and
				958	not a misidentification or a use that's sometimes justified.
				959
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	960	False positives can be suppressed by the use of
				961	"cpplint(category)" comments on the offending line. These are
				962	parsed into _error_suppressions.
				963
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	964	Args:
				965	filename: The name of the file containing the error.
				966	linenum: The number of the line containing the error.
				967	category: A string used to describe the "category" this bug
				968	falls under: "whitespace", say, or "runtime". Categories
				969	may have a hierarchy separated by slashes: "whitespace/indent".
				970	confidence: A number from 1-5 representing a confidence score for
				971	the error, with 5 meaning that we are certain of the problem,
				972	and 1 meaning that it could be a legitimate construct.
				973	message: The error message.
				974	"""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	975	if _ShouldPrintError(category, confidence, linenum):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	976	_cpplint_state.IncrementErrorCount(category)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	977	if _cpplint_state.output_format == 'vs7':
				978	sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
				979	filename, linenum, message, category, confidence))
erg@google.com	02c27fd	2013-05-28 21:34:34 +0000	[diff] [blame]	980	elif _cpplint_state.output_format == 'eclipse':
				981	sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
				982	filename, linenum, message, category, confidence))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	983	else:
				984	sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
				985	filename, linenum, message, category, confidence))
				986
				987
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	988	# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	989	_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
				990	r'\\([abfnrtv?"\\\']\|\d+\|x[0-9a-fA-F]+)')
				991	# Matches strings. Escape codes should already be removed by ESCAPES.
				992	_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
				993	# Matches characters. Escape codes should already be removed by ESCAPES.
				994	_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
				995	# Matches multi-line C++ comments.
				996	# This RE is a little bit more complicated than one might expect, because we
				997	# have to take care of space removals tools so we can handle comments inside
				998	# statements better.
				999	# The current rule is: We only clear spaces from both sides when we're at the
				1000	# end of the line. Otherwise, we try to remove spaces from the right side,
				1001	# if this doesn't work we try on left side but only if there's a non-character
				1002	# on the right.
				1003	_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
				1004	r"""(\s/\.\/\s*$\|
				1005	/\.\*/\s+\|
				1006	\s+/\.\*/(?=\W)\|
				1007	/\.\*/)""", re.VERBOSE)
				1008
				1009
				1010	def IsCppString(line):
				1011	"""Does line terminate so, that the next symbol is in string constant.
				1012
				1013	This function does not consider single-line nor multi-line comments.
				1014
				1015	Args:
				1016	line: is a partial line of code starting from the 0..n.
				1017
				1018	Returns:
				1019	True, if next character appended to 'line' is inside a
				1020	string constant.
				1021	"""
				1022
				1023	line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
				1024	return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
				1025
				1026
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1027	def CleanseRawStrings(raw_lines):
				1028	"""Removes C++11 raw strings from lines.
				1029
				1030	Before:
				1031	static const char kData[] = R"(
				1032	multi-line string
				1033	)";
				1034
				1035	After:
				1036	static const char kData[] = ""
				1037	(replaced by blank line)
				1038	"";
				1039
				1040	Args:
				1041	raw_lines: list of raw lines.
				1042
				1043	Returns:
				1044	list of lines with C++11 raw strings replaced by empty strings.
				1045	"""
				1046
				1047	delimiter = None
				1048	lines_without_raw_strings = []
				1049	for line in raw_lines:
				1050	if delimiter:
				1051	# Inside a raw string, look for the end
				1052	end = line.find(delimiter)
				1053	if end >= 0:
				1054	# Found the end of the string, match leading space for this
				1055	# line and resume copying the original lines, and also insert
				1056	# a "" on the last line.
				1057	leading_space = Match(r'^(\s*)\S', line)
				1058	line = leading_space.group(1) + '""' + line[end + len(delimiter):]
				1059	delimiter = None
				1060	else:
				1061	# Haven't found the end yet, append a blank line.
				1062	line = ''
				1063
				1064	else:
				1065	# Look for beginning of a raw string.
				1066	# See 2.14.15 [lex.string] for syntax.
				1067	matched = Match(r'^(.)\b(?:R\|u8R\|uR\|UR\|LR)"([^\s\\()])\((.*)$', line)
				1068	if matched:
				1069	delimiter = ')' + matched.group(2) + '"'
				1070
				1071	end = matched.group(3).find(delimiter)
				1072	if end >= 0:
				1073	# Raw string ended on same line
				1074	line = (matched.group(1) + '""' +
				1075	matched.group(3)[end + len(delimiter):])
				1076	delimiter = None
				1077	else:
				1078	# Start of a multi-line raw string
				1079	line = matched.group(1) + '""'
				1080
				1081	lines_without_raw_strings.append(line)
				1082
				1083	# TODO(unknown): if delimiter is not None here, we might want to
				1084	# emit a warning for unterminated string.
				1085	return lines_without_raw_strings
				1086
				1087
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1088	def FindNextMultiLineCommentStart(lines, lineix):
				1089	"""Find the beginning marker for a multiline comment."""
				1090	while lineix < len(lines):
				1091	if lines[lineix].strip().startswith('/*'):
				1092	# Only return this marker if the comment goes beyond this line
				1093	if lines[lineix].strip().find('*/', 2) < 0:
				1094	return lineix
				1095	lineix += 1
				1096	return len(lines)
				1097
				1098
				1099	def FindNextMultiLineCommentEnd(lines, lineix):
				1100	"""We are inside a comment, find the end marker."""
				1101	while lineix < len(lines):
				1102	if lines[lineix].strip().endswith('*/'):
				1103	return lineix
				1104	lineix += 1
				1105	return len(lines)
				1106
				1107
				1108	def RemoveMultiLineCommentsFromRange(lines, begin, end):
				1109	"""Clears a range of lines for multi-line comments."""
				1110	# Having // dummy comments makes the lines non-empty, so we will not get
				1111	# unnecessary blank line warnings later in the code.
				1112	for i in range(begin, end):
				1113	lines[i] = '// dummy'
				1114
				1115
				1116	def RemoveMultiLineComments(filename, lines, error):
				1117	"""Removes multiline (c-style) comments from lines."""
				1118	lineix = 0
				1119	while lineix < len(lines):
				1120	lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
				1121	if lineix_begin >= len(lines):
				1122	return
				1123	lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
				1124	if lineix_end >= len(lines):
				1125	error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
				1126	'Could not find end of multi-line comment')
				1127	return
				1128	RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
				1129	lineix = lineix_end + 1
				1130
				1131
				1132	def CleanseComments(line):
				1133	"""Removes //-comments and single-line C-style /* */ comments.
				1134
				1135	Args:
				1136	line: A line of C++ source.
				1137
				1138	Returns:
				1139	The line with single-line comments removed.
				1140	"""
				1141	commentpos = line.find('//')
				1142	if commentpos != -1 and not IsCppString(line[:commentpos]):
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	1143	line = line[:commentpos].rstrip()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1144	# get rid of /* ... */
				1145	return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
				1146
				1147
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1148	class CleansedLines(object):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1149	"""Holds 3 copies of all lines with different preprocessing applied to them.
				1150
				1151	1) elided member contains lines without strings and comments,
				1152	2) lines member contains lines without comments, and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1153	3) raw_lines member contains all the lines without processing.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1154	All these three members are of <type 'list'>, and of the same length.
				1155	"""
				1156
				1157	def __init__(self, lines):
				1158	self.elided = []
				1159	self.lines = []
				1160	self.raw_lines = lines
				1161	self.num_lines = len(lines)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1162	self.lines_without_raw_strings = CleanseRawStrings(lines)
				1163	for linenum in range(len(self.lines_without_raw_strings)):
				1164	self.lines.append(CleanseComments(
				1165	self.lines_without_raw_strings[linenum]))
				1166	elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1167	self.elided.append(CleanseComments(elided))
				1168
				1169	def NumLines(self):
				1170	"""Returns the number of lines represented."""
				1171	return self.num_lines
				1172
				1173	@staticmethod
				1174	def _CollapseStrings(elided):
				1175	"""Collapses strings and chars on a line to simple "" or '' blocks.
				1176
				1177	We nix strings first so we're not fooled by text like '"http://"'
				1178
				1179	Args:
				1180	elided: The line being processed.
				1181
				1182	Returns:
				1183	The line with collapsed strings.
				1184	"""
				1185	if not _RE_PATTERN_INCLUDE.match(elided):
				1186	# Remove escaped characters first to make quote/single quote collapsing
				1187	# basic. Things that look like escaped characters shouldn't occur
				1188	# outside of strings and chars.
				1189	elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
				1190	elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
				1191	elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
				1192	return elided
				1193
				1194
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1195	def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
				1196	"""Find the position just after the matching endchar.
				1197
				1198	Args:
				1199	line: a CleansedLines line.
				1200	startpos: start searching at this position.
				1201	depth: nesting level at startpos.
				1202	startchar: expression opening character.
				1203	endchar: expression closing character.
				1204
				1205	Returns:
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1206	On finding matching endchar: (index just after matching endchar, 0)
				1207	Otherwise: (-1, new depth at end of this line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1208	"""
				1209	for i in xrange(startpos, len(line)):
				1210	if line[i] == startchar:
				1211	depth += 1
				1212	elif line[i] == endchar:
				1213	depth -= 1
				1214	if depth == 0:
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1215	return (i + 1, 0)
				1216	return (-1, depth)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1217
				1218
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1219	def CloseExpression(clean_lines, linenum, pos):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1220	"""If input points to ( or { or [ or <, finds the position that closes it.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1221
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1222	If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1223	linenum/pos that correspond to the closing of the expression.
				1224
				1225	Args:
				1226	clean_lines: A CleansedLines instance containing the file.
				1227	linenum: The number of the line to check.
				1228	pos: A position on the line.
				1229
				1230	Returns:
				1231	A tuple (line, linenum, pos) pointer past the closing brace, or
				1232	(line, len(lines), -1) if we never find a close. Note we ignore
				1233	strings and comments when matching; and the line we return is the
				1234	'cleansed' line at linenum.
				1235	"""
				1236
				1237	line = clean_lines.elided[linenum]
				1238	startchar = line[pos]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1239	if startchar not in '({[<':
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1240	return (line, clean_lines.NumLines(), -1)
				1241	if startchar == '(': endchar = ')'
				1242	if startchar == '[': endchar = ']'
				1243	if startchar == '{': endchar = '}'
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1244	if startchar == '<': endchar = '>'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1245
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1246	# Check first line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1247	(end_pos, num_open) = FindEndOfExpressionInLine(
				1248	line, pos, 0, startchar, endchar)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1249	if end_pos > -1:
				1250	return (line, linenum, end_pos)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1251
				1252	# Continue scanning forward
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1253	while linenum < clean_lines.NumLines() - 1:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1254	linenum += 1
				1255	line = clean_lines.elided[linenum]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1256	(end_pos, num_open) = FindEndOfExpressionInLine(
				1257	line, 0, num_open, startchar, endchar)
				1258	if end_pos > -1:
				1259	return (line, linenum, end_pos)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1260
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1261	# Did not find endchar before end of file, give up
				1262	return (line, clean_lines.NumLines(), -1)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1263
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1264
				1265	def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
				1266	"""Find position at the matching startchar.
				1267
				1268	This is almost the reverse of FindEndOfExpressionInLine, but note
				1269	that the input position and returned position differs by 1.
				1270
				1271	Args:
				1272	line: a CleansedLines line.
				1273	endpos: start searching at this position.
				1274	depth: nesting level at endpos.
				1275	startchar: expression opening character.
				1276	endchar: expression closing character.
				1277
				1278	Returns:
				1279	On finding matching startchar: (index at matching startchar, 0)
				1280	Otherwise: (-1, new depth at beginning of this line)
				1281	"""
				1282	for i in xrange(endpos, -1, -1):
				1283	if line[i] == endchar:
				1284	depth += 1
				1285	elif line[i] == startchar:
				1286	depth -= 1
				1287	if depth == 0:
				1288	return (i, 0)
				1289	return (-1, depth)
				1290
				1291
				1292	def ReverseCloseExpression(clean_lines, linenum, pos):
				1293	"""If input points to ) or } or ] or >, finds the position that opens it.
				1294
				1295	If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
				1296	linenum/pos that correspond to the opening of the expression.
				1297
				1298	Args:
				1299	clean_lines: A CleansedLines instance containing the file.
				1300	linenum: The number of the line to check.
				1301	pos: A position on the line.
				1302
				1303	Returns:
				1304	A tuple (line, linenum, pos) pointer at the opening brace, or
				1305	(line, 0, -1) if we never find the matching opening brace. Note
				1306	we ignore strings and comments when matching; and the line we
				1307	return is the 'cleansed' line at linenum.
				1308	"""
				1309	line = clean_lines.elided[linenum]
				1310	endchar = line[pos]
				1311	if endchar not in ')}]>':
				1312	return (line, 0, -1)
				1313	if endchar == ')': startchar = '('
				1314	if endchar == ']': startchar = '['
				1315	if endchar == '}': startchar = '{'
				1316	if endchar == '>': startchar = '<'
				1317
				1318	# Check last line
				1319	(start_pos, num_open) = FindStartOfExpressionInLine(
				1320	line, pos, 0, startchar, endchar)
				1321	if start_pos > -1:
				1322	return (line, linenum, start_pos)
				1323
				1324	# Continue scanning backward
				1325	while linenum > 0:
				1326	linenum -= 1
				1327	line = clean_lines.elided[linenum]
				1328	(start_pos, num_open) = FindStartOfExpressionInLine(
				1329	line, len(line) - 1, num_open, startchar, endchar)
				1330	if start_pos > -1:
				1331	return (line, linenum, start_pos)
				1332
				1333	# Did not find startchar before beginning of file, give up
				1334	return (line, 0, -1)
				1335
				1336
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1337	def CheckForCopyright(filename, lines, error):
				1338	"""Logs an error if no Copyright message appears at the top of the file."""
				1339
				1340	# We'll say it should occur by line 10. Don't forget there's a
				1341	# dummy line at the front.
				1342	for line in xrange(1, min(len(lines), 11)):
				1343	if re.search(r'Copyright', lines[line], re.I): break
				1344	else: # means no copyright line was found
				1345	error(filename, 0, 'legal/copyright', 5,
				1346	'No copyright message found. '
				1347	'You should have a line: "Copyright [year] <Copyright Owner>"')
				1348
				1349
				1350	def GetHeaderGuardCPPVariable(filename):
				1351	"""Returns the CPP variable that should be used as a header guard.
				1352
				1353	Args:
				1354	filename: The name of a C++ header file.
				1355
				1356	Returns:
				1357	The CPP variable that should be used as a header guard in the
				1358	named file.
				1359
				1360	"""
				1361
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1362	# Restores original filename in case that cpplint is invoked from Emacs's
				1363	# flymake.
				1364	filename = re.sub(r'_flymake\.h$', '.h', filename)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1365	filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1366
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1367	fileinfo = FileInfo(filename)
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	1368	file_path_from_root = fileinfo.RepositoryName()
				1369	if _root:
				1370	file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
				1371	return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1372
				1373
				1374	def CheckForHeaderGuard(filename, lines, error):
				1375	"""Checks that the file contains a header guard.
				1376
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1377	Logs an error if no #ifndef header guard is present. For other
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1378	headers, checks that the full pathname is used.
				1379
				1380	Args:
				1381	filename: The name of the C++ header file.
				1382	lines: An array of strings, each representing a line of the file.
				1383	error: The function to call with any errors found.
				1384	"""
				1385
				1386	cppvar = GetHeaderGuardCPPVariable(filename)
				1387
				1388	ifndef = None
				1389	ifndef_linenum = 0
				1390	define = None
				1391	endif = None
				1392	endif_linenum = 0
				1393	for linenum, line in enumerate(lines):
				1394	linesplit = line.split()
				1395	if len(linesplit) >= 2:
				1396	# find the first occurrence of #ifndef and #define, save arg
				1397	if not ifndef and linesplit[0] == '#ifndef':
				1398	# set ifndef to the header guard presented on the #ifndef line.
				1399	ifndef = linesplit[1]
				1400	ifndef_linenum = linenum
				1401	if not define and linesplit[0] == '#define':
				1402	define = linesplit[1]
				1403	# find the last occurrence of #endif, save entire line
				1404	if line.startswith('#endif'):
				1405	endif = line
				1406	endif_linenum = linenum
				1407
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1408	if not ifndef:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1409	error(filename, 0, 'build/header_guard', 5,
				1410	'No #ifndef header guard found, suggested CPP variable is: %s' %
				1411	cppvar)
				1412	return
				1413
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1414	if not define:
				1415	error(filename, 0, 'build/header_guard', 5,
				1416	'No #define header guard found, suggested CPP variable is: %s' %
				1417	cppvar)
				1418	return
				1419
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1420	# The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
				1421	# for backward compatibility.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1422	if ifndef != cppvar:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1423	error_level = 0
				1424	if ifndef != cppvar + '_':
				1425	error_level = 5
				1426
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1427	ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
				1428	error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1429	error(filename, ifndef_linenum, 'build/header_guard', error_level,
				1430	'#ifndef header guard has wrong style, please use: %s' % cppvar)
				1431
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1432	if define != ifndef:
				1433	error(filename, 0, 'build/header_guard', 5,
				1434	'#ifndef and #define don\'t match, suggested CPP variable is: %s' %
				1435	cppvar)
				1436	return
				1437
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1438	if endif != ('#endif // %s' % cppvar):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1439	error_level = 0
				1440	if endif != ('#endif // %s' % (cppvar + '_')):
				1441	error_level = 5
				1442
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1443	ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
				1444	error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1445	error(filename, endif_linenum, 'build/header_guard', error_level,
				1446	'#endif line should be "#endif // %s"' % cppvar)
				1447
				1448
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1449	def CheckForBadCharacters(filename, lines, error):
				1450	"""Logs an error for each line containing bad characters.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1451
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1452	Two kinds of bad characters:
				1453
				1454	1. Unicode replacement characters: These indicate that either the file
				1455	contained invalid UTF-8 (likely) or Unicode replacement characters (which
				1456	it shouldn't). Note that it's possible for this to throw off line
				1457	numbering if the invalid UTF-8 occurred adjacent to a newline.
				1458
				1459	2. NUL bytes. These are problematic for some tools.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1460
				1461	Args:
				1462	filename: The name of the current file.
				1463	lines: An array of strings, each representing a line of the file.
				1464	error: The function to call with any errors found.
				1465	"""
				1466	for linenum, line in enumerate(lines):
				1467	if u'\ufffd' in line:
				1468	error(filename, linenum, 'readability/utf8', 5,
				1469	'Line contains invalid UTF-8 (or Unicode replacement character).')
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1470	if '\0' in line:
				1471	error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1472
				1473
				1474	def CheckForNewlineAtEOF(filename, lines, error):
				1475	"""Logs an error if there is no newline char at the end of the file.
				1476
				1477	Args:
				1478	filename: The name of the current file.
				1479	lines: An array of strings, each representing a line of the file.
				1480	error: The function to call with any errors found.
				1481	"""
				1482
				1483	# The array lines() was created by adding two newlines to the
				1484	# original file (go figure), then splitting on \n.
				1485	# To verify that the file ends in \n, we just have to make sure the
				1486	# last-but-two element of lines() exists and is empty.
				1487	if len(lines) < 3 or lines[-2]:
				1488	error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
				1489	'Could not find a newline character at the end of the file.')
				1490
				1491
				1492	def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
				1493	"""Logs an error if we see /* ... */ or "..." that extend past one line.
				1494
				1495	/* ... */ comments are legit inside macros, for one line.
				1496	Otherwise, we prefer // comments, so it's ok to warn about the
				1497	other. Likewise, it's ok for strings to extend across multiple
				1498	lines, as long as a line continuation character (backslash)
				1499	terminates each line. Although not currently prohibited by the C++
				1500	style guide, it's ugly and unnecessary. We don't do well with either
				1501	in this lint program, so we warn about both.
				1502
				1503	Args:
				1504	filename: The name of the current file.
				1505	clean_lines: A CleansedLines instance containing the file.
				1506	linenum: The number of the line to check.
				1507	error: The function to call with any errors found.
				1508	"""
				1509	line = clean_lines.elided[linenum]
				1510
				1511	# Remove all \\ (escaped backslashes) from the line. They are OK, and the
				1512	# second (escaped) slash may trigger later \" detection erroneously.
				1513	line = line.replace('\\\\', '')
				1514
				1515	if line.count('/') > line.count('/'):
				1516	error(filename, linenum, 'readability/multiline_comment', 5,
				1517	'Complex multi-line /.../-style comment found. '
				1518	'Lint may give bogus warnings. '
				1519	'Consider replacing these with //-style comments, '
				1520	'with #if 0...#endif, '
				1521	'or with more clearly structured multi-line comments.')
				1522
				1523	if (line.count('"') - line.count('\\"')) % 2:
				1524	error(filename, linenum, 'readability/multiline_string', 5,
				1525	'Multi-line string ("...") found. This lint script doesn\'t '
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1526	'do well with such strings, and may give bogus warnings. '
				1527	'Use C++11 raw strings or concatenation instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1528
				1529
				1530	threading_list = (
				1531	('asctime(', 'asctime_r('),
				1532	('ctime(', 'ctime_r('),
				1533	('getgrgid(', 'getgrgid_r('),
				1534	('getgrnam(', 'getgrnam_r('),
				1535	('getlogin(', 'getlogin_r('),
				1536	('getpwnam(', 'getpwnam_r('),
				1537	('getpwuid(', 'getpwuid_r('),
				1538	('gmtime(', 'gmtime_r('),
				1539	('localtime(', 'localtime_r('),
				1540	('rand(', 'rand_r('),
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1541	('strtok(', 'strtok_r('),
				1542	('ttyname(', 'ttyname_r('),
				1543	)
				1544
				1545
				1546	def CheckPosixThreading(filename, clean_lines, linenum, error):
				1547	"""Checks for calls to thread-unsafe functions.
				1548
				1549	Much code has been originally written without consideration of
				1550	multi-threading. Also, engineers are relying on their old experience;
				1551	they have learned posix before threading extensions were added. These
				1552	tests guide the engineers to use thread-safe functions (when using
				1553	posix directly).
				1554
				1555	Args:
				1556	filename: The name of the current file.
				1557	clean_lines: A CleansedLines instance containing the file.
				1558	linenum: The number of the line to check.
				1559	error: The function to call with any errors found.
				1560	"""
				1561	line = clean_lines.elided[linenum]
				1562	for single_thread_function, multithread_safe_function in threading_list:
				1563	ix = line.find(single_thread_function)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1564	# Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1565	if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
				1566	line[ix - 1] not in ('_', '.', '>'))):
				1567	error(filename, linenum, 'runtime/threadsafe_fn', 2,
				1568	'Consider using ' + multithread_safe_function +
				1569	'...) instead of ' + single_thread_function +
				1570	'...) for improved thread safety.')
				1571
				1572
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	1573	def CheckVlogArguments(filename, clean_lines, linenum, error):
				1574	"""Checks that VLOG() is only used for defining a logging level.
				1575
				1576	For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
				1577	VLOG(FATAL) are not.
				1578
				1579	Args:
				1580	filename: The name of the current file.
				1581	clean_lines: A CleansedLines instance containing the file.
				1582	linenum: The number of the line to check.
				1583	error: The function to call with any errors found.
				1584	"""
				1585	line = clean_lines.elided[linenum]
				1586	if Search(r'\bVLOG$(INFO\|ERROR\|WARNING\|DFATAL\|FATAL)$', line):
				1587	error(filename, linenum, 'runtime/vlog', 5,
				1588	'VLOG() should be used with numeric verbosity level. '
				1589	'Use LOG() if you want symbolic severity levels.')
				1590
				1591
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1592	# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1593	# incrementing a value.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1594	_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1595	r'^\s\\w+(\+\+\|--);')
				1596
				1597
				1598	def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1599	"""Checks for invalid increment *count++.
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1600
				1601	For example following function:
				1602	void increment_counter(int* count) {
				1603	*count++;
				1604	}
				1605	is invalid, because it effectively does count++, moving pointer, and should
				1606	be replaced with ++count, (count)++ or *count += 1.
				1607
				1608	Args:
				1609	filename: The name of the current file.
				1610	clean_lines: A CleansedLines instance containing the file.
				1611	linenum: The number of the line to check.
				1612	error: The function to call with any errors found.
				1613	"""
				1614	line = clean_lines.elided[linenum]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1615	if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1616	error(filename, linenum, 'runtime/invalid_increment', 5,
				1617	'Changing pointer instead of value (or unused value of operator*).')
				1618
				1619
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1620	class _BlockInfo(object):
				1621	"""Stores information about a generic block of code."""
				1622
				1623	def __init__(self, seen_open_brace):
				1624	self.seen_open_brace = seen_open_brace
				1625	self.open_parentheses = 0
				1626	self.inline_asm = _NO_ASM
				1627
				1628	def CheckBegin(self, filename, clean_lines, linenum, error):
				1629	"""Run checks that applies to text up to the opening brace.
				1630
				1631	This is mostly for checking the text after the class identifier
				1632	and the "{", usually where the base class is specified. For other
				1633	blocks, there isn't much to check, so we always pass.
				1634
				1635	Args:
				1636	filename: The name of the current file.
				1637	clean_lines: A CleansedLines instance containing the file.
				1638	linenum: The number of the line to check.
				1639	error: The function to call with any errors found.
				1640	"""
				1641	pass
				1642
				1643	def CheckEnd(self, filename, clean_lines, linenum, error):
				1644	"""Run checks that applies to text after the closing brace.
				1645
				1646	This is mostly used for checking end of namespace comments.
				1647
				1648	Args:
				1649	filename: The name of the current file.
				1650	clean_lines: A CleansedLines instance containing the file.
				1651	linenum: The number of the line to check.
				1652	error: The function to call with any errors found.
				1653	"""
				1654	pass
				1655
				1656
				1657	class _ClassInfo(_BlockInfo):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1658	"""Stores information about a class."""
				1659
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1660	def __init__(self, name, class_or_struct, clean_lines, linenum):
				1661	_BlockInfo.__init__(self, False)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1662	self.name = name
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1663	self.starting_linenum = linenum
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1664	self.is_derived = False
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1665	if class_or_struct == 'struct':
				1666	self.access = 'public'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1667	self.is_struct = True
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1668	else:
				1669	self.access = 'private'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1670	self.is_struct = False
				1671
				1672	# Remember initial indentation level for this class. Using raw_lines here
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1673	# instead of elided to account for leading comments.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1674	initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
				1675	if initial_indent:
				1676	self.class_indent = len(initial_indent.group(1))
				1677	else:
				1678	self.class_indent = 0
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1679
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	1680	# Try to find the end of the class. This will be confused by things like:
				1681	# class A {
				1682	# } *x = { ...
				1683	#
				1684	# But it's still good enough for CheckSectionSpacing.
				1685	self.last_line = 0
				1686	depth = 0
				1687	for i in range(linenum, clean_lines.NumLines()):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1688	line = clean_lines.elided[i]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	1689	depth += line.count('{') - line.count('}')
				1690	if not depth:
				1691	self.last_line = i
				1692	break
				1693
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1694	def CheckBegin(self, filename, clean_lines, linenum, error):
				1695	# Look for a bare ':'
				1696	if Search('(^\|[^:]):($\|[^:])', clean_lines.elided[linenum]):
				1697	self.is_derived = True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1698
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1699	def CheckEnd(self, filename, clean_lines, linenum, error):
				1700	# Check that closing brace is aligned with beginning of the class.
				1701	# Only do this if the closing brace is indented by only whitespaces.
				1702	# This means we will not check single-line class definitions.
				1703	indent = Match(r'^( *)\}', clean_lines.elided[linenum])
				1704	if indent and len(indent.group(1)) != self.class_indent:
				1705	if self.is_struct:
				1706	parent = 'struct ' + self.name
				1707	else:
				1708	parent = 'class ' + self.name
				1709	error(filename, linenum, 'whitespace/indent', 3,
				1710	'Closing brace should be aligned with beginning of %s' % parent)
				1711
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1712
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1713	class _NamespaceInfo(_BlockInfo):
				1714	"""Stores information about a namespace."""
				1715
				1716	def __init__(self, name, linenum):
				1717	_BlockInfo.__init__(self, False)
				1718	self.name = name or ''
				1719	self.starting_linenum = linenum
				1720
				1721	def CheckEnd(self, filename, clean_lines, linenum, error):
				1722	"""Check end of namespace comments."""
				1723	line = clean_lines.raw_lines[linenum]
				1724
				1725	# Check how many lines is enclosed in this namespace. Don't issue
				1726	# warning for missing namespace comments if there aren't enough
				1727	# lines. However, do apply checks if there is already an end of
				1728	# namespace comment and it's incorrect.
				1729	#
				1730	# TODO(unknown): We always want to check end of namespace comments
				1731	# if a namespace is large, but sometimes we also want to apply the
				1732	# check if a short namespace contained nontrivial things (something
				1733	# other than forward declarations). There is currently no logic on
				1734	# deciding what these nontrivial things are, so this check is
				1735	# triggered by namespace size only, which works most of the time.
				1736	if (linenum - self.starting_linenum < 10
				1737	and not Match(r'};\s(//\|/\).\bnamespace\b', line)):
				1738	return
				1739
				1740	# Look for matching comment at end of namespace.
				1741	#
				1742	# Note that we accept C style "/* */" comments for terminating
				1743	# namespaces, so that code that terminate namespaces inside
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1744	# preprocessor macros can be cpplint clean.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1745	#
				1746	# We also accept stuff like "// end of namespace <name>." with the
				1747	# period at the end.
				1748	#
				1749	# Besides these, we don't accept anything else, otherwise we might
				1750	# get false negatives when existing comment is a substring of the
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1751	# expected namespace.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1752	if self.name:
				1753	# Named namespace
				1754	if not Match((r'};\s(//\|/\).\bnamespace\s+' + re.escape(self.name) +
				1755	r'[\/\.\\\s]$'),
				1756	line):
				1757	error(filename, linenum, 'readability/namespace', 5,
				1758	'Namespace should be terminated with "// namespace %s"' %
				1759	self.name)
				1760	else:
				1761	# Anonymous namespace
				1762	if not Match(r'};\s(//\|/\).\bnamespace[\/\.\\\s]$', line):
				1763	error(filename, linenum, 'readability/namespace', 5,
				1764	'Namespace should be terminated with "// namespace"')
				1765
				1766
				1767	class _PreprocessorInfo(object):
				1768	"""Stores checkpoints of nesting stacks when #if/#else is seen."""
				1769
				1770	def __init__(self, stack_before_if):
				1771	# The entire nesting stack before #if
				1772	self.stack_before_if = stack_before_if
				1773
				1774	# The entire nesting stack up to #else
				1775	self.stack_before_else = []
				1776
				1777	# Whether we have already seen #else or #elif
				1778	self.seen_else = False
				1779
				1780
				1781	class _NestingState(object):
				1782	"""Holds states related to parsing braces."""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1783
				1784	def __init__(self):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1785	# Stack for tracking all braces. An object is pushed whenever we
				1786	# see a "{", and popped when we see a "}". Only 3 types of
				1787	# objects are possible:
				1788	# - _ClassInfo: a class or struct.
				1789	# - _NamespaceInfo: a namespace.
				1790	# - _BlockInfo: some other type of block.
				1791	self.stack = []
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1792
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1793	# Stack of _PreprocessorInfo objects.
				1794	self.pp_stack = []
				1795
				1796	def SeenOpenBrace(self):
				1797	"""Check if we have seen the opening brace for the innermost block.
				1798
				1799	Returns:
				1800	True if we have seen the opening brace, False if the innermost
				1801	block is still expecting an opening brace.
				1802	"""
				1803	return (not self.stack) or self.stack[-1].seen_open_brace
				1804
				1805	def InNamespaceBody(self):
				1806	"""Check if we are currently one level inside a namespace body.
				1807
				1808	Returns:
				1809	True if top of the stack is a namespace block, False otherwise.
				1810	"""
				1811	return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
				1812
				1813	def UpdatePreprocessor(self, line):
				1814	"""Update preprocessor stack.
				1815
				1816	We need to handle preprocessors due to classes like this:
				1817	#ifdef SWIG
				1818	struct ResultDetailsPageElementExtensionPoint {
				1819	#else
				1820	struct ResultDetailsPageElementExtensionPoint : public Extension {
				1821	#endif
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1822
				1823	We make the following assumptions (good enough for most files):
				1824	- Preprocessor condition evaluates to true from #if up to first
				1825	#else/#elif/#endif.
				1826
				1827	- Preprocessor condition evaluates to false from #else/#elif up
				1828	to #endif. We still perform lint checks on these lines, but
				1829	these do not affect nesting stack.
				1830
				1831	Args:
				1832	line: current line to check.
				1833	"""
				1834	if Match(r'^\s#\s(if\|ifdef\|ifndef)\b', line):
				1835	# Beginning of #if block, save the nesting stack here. The saved
				1836	# stack will allow us to restore the parsing state in the #else case.
				1837	self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
				1838	elif Match(r'^\s#\s(else\|elif)\b', line):
				1839	# Beginning of #else block
				1840	if self.pp_stack:
				1841	if not self.pp_stack[-1].seen_else:
				1842	# This is the first #else or #elif block. Remember the
				1843	# whole nesting stack up to this point. This is what we
				1844	# keep after the #endif.
				1845	self.pp_stack[-1].seen_else = True
				1846	self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
				1847
				1848	# Restore the stack to how it was before the #if
				1849	self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
				1850	else:
				1851	# TODO(unknown): unexpected #else, issue warning?
				1852	pass
				1853	elif Match(r'^\s#\sendif\b', line):
				1854	# End of #if or #else blocks.
				1855	if self.pp_stack:
				1856	# If we saw an #else, we will need to restore the nesting
				1857	# stack to its former state before the #else, otherwise we
				1858	# will just continue from where we left off.
				1859	if self.pp_stack[-1].seen_else:
				1860	# Here we can just use a shallow copy since we are the last
				1861	# reference to it.
				1862	self.stack = self.pp_stack[-1].stack_before_else
				1863	# Drop the corresponding #if
				1864	self.pp_stack.pop()
				1865	else:
				1866	# TODO(unknown): unexpected #endif, issue warning?
				1867	pass
				1868
				1869	def Update(self, filename, clean_lines, linenum, error):
				1870	"""Update nesting state with current line.
				1871
				1872	Args:
				1873	filename: The name of the current file.
				1874	clean_lines: A CleansedLines instance containing the file.
				1875	linenum: The number of the line to check.
				1876	error: The function to call with any errors found.
				1877	"""
				1878	line = clean_lines.elided[linenum]
				1879
				1880	# Update pp_stack first
				1881	self.UpdatePreprocessor(line)
				1882
				1883	# Count parentheses. This is to avoid adding struct arguments to
				1884	# the nesting stack.
				1885	if self.stack:
				1886	inner_block = self.stack[-1]
				1887	depth_change = line.count('(') - line.count(')')
				1888	inner_block.open_parentheses += depth_change
				1889
				1890	# Also check if we are starting or ending an inline assembly block.
				1891	if inner_block.inline_asm in (_NO_ASM, _END_ASM):
				1892	if (depth_change != 0 and
				1893	inner_block.open_parentheses == 1 and
				1894	_MATCH_ASM.match(line)):
				1895	# Enter assembly block
				1896	inner_block.inline_asm = _INSIDE_ASM
				1897	else:
				1898	# Not entering assembly block. If previous line was _END_ASM,
				1899	# we will now shift to _NO_ASM state.
				1900	inner_block.inline_asm = _NO_ASM
				1901	elif (inner_block.inline_asm == _INSIDE_ASM and
				1902	inner_block.open_parentheses == 0):
				1903	# Exit assembly block
				1904	inner_block.inline_asm = _END_ASM
				1905
				1906	# Consume namespace declaration at the beginning of the line. Do
				1907	# this in a loop so that we catch same line declarations like this:
				1908	# namespace proto2 { namespace bridge { class MessageSet; } }
				1909	while True:
				1910	# Match start of namespace. The "\b\s*" below catches namespace
				1911	# declarations even if it weren't followed by a whitespace, this
				1912	# is so that we don't confuse our namespace checker. The
				1913	# missing spaces will be flagged by CheckSpacing.
				1914	namespace_decl_match = Match(r'^\snamespace\b\s([:\w]+)?(.*)$', line)
				1915	if not namespace_decl_match:
				1916	break
				1917
				1918	new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
				1919	self.stack.append(new_namespace)
				1920
				1921	line = namespace_decl_match.group(2)
				1922	if line.find('{') != -1:
				1923	new_namespace.seen_open_brace = True
				1924	line = line[line.find('{') + 1:]
				1925
				1926	# Look for a class declaration in whatever is left of the line
				1927	# after parsing namespaces. The regexp accounts for decorated classes
				1928	# such as in:
				1929	# class LOCKABLE API Object {
				1930	# };
				1931	#
				1932	# Templates with class arguments may confuse the parser, for example:
				1933	# template <class T
				1934	# class Comparator = less<T>,
				1935	# class Vector = vector<T> >
				1936	# class HeapQueue {
				1937	#
				1938	# Because this parser has no nesting state about templates, by the
				1939	# time it saw "class Comparator", it may think that it's a new class.
				1940	# Nested templates have a similar problem:
				1941	# template <
				1942	# typename ExportedType,
				1943	# typename TupleType,
				1944	# template <typename, typename> class ImplTemplate>
				1945	#
				1946	# To avoid these cases, we ignore classes that are followed by '=' or '>'
				1947	class_decl_match = Match(
				1948	r'\s(template\s<[\w\s<>,:]>\s)?'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1949	r'(class\|struct)\s+([A-Z_]+\s+)(\w+(?:::\w+))'
				1950	r'(([^=>]\|<[^<>]>\|<[^<>]<[^<>]>\s>)*)$', line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1951	if (class_decl_match and
				1952	(not self.stack or self.stack[-1].open_parentheses == 0)):
				1953	self.stack.append(_ClassInfo(
				1954	class_decl_match.group(4), class_decl_match.group(2),
				1955	clean_lines, linenum))
				1956	line = class_decl_match.group(5)
				1957
				1958	# If we have not yet seen the opening brace for the innermost block,
				1959	# run checks here.
				1960	if not self.SeenOpenBrace():
				1961	self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
				1962
				1963	# Update access control if we are inside a class/struct
				1964	if self.stack and isinstance(self.stack[-1], _ClassInfo):
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1965	classinfo = self.stack[-1]
				1966	access_match = Match(
				1967	r'^(.)\b(public\|private\|protected\|signals)(\s+(?:slots\s)?)?'
				1968	r':(?:[^:]\|$)',
				1969	line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1970	if access_match:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1971	classinfo.access = access_match.group(2)
				1972
				1973	# Check that access keywords are indented +1 space. Skip this
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1974	# check if the keywords are not preceded by whitespaces.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1975	indent = access_match.group(1)
				1976	if (len(indent) != classinfo.class_indent + 1 and
				1977	Match(r'^\s*$', indent)):
				1978	if classinfo.is_struct:
				1979	parent = 'struct ' + classinfo.name
				1980	else:
				1981	parent = 'class ' + classinfo.name
				1982	slots = ''
				1983	if access_match.group(3):
				1984	slots = access_match.group(3)
				1985	error(filename, linenum, 'whitespace/indent', 3,
				1986	'%s%s: should be indented +1 space inside %s' % (
				1987	access_match.group(2), slots, parent))
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1988
				1989	# Consume braces or semicolons from what's left of the line
				1990	while True:
				1991	# Match first brace, semicolon, or closed parenthesis.
				1992	matched = Match(r'^[^{;)}]([{;)}])(.)$', line)
				1993	if not matched:
				1994	break
				1995
				1996	token = matched.group(1)
				1997	if token == '{':
				1998	# If namespace or class hasn't seen a opening brace yet, mark
				1999	# namespace/class head as complete. Push a new block onto the
				2000	# stack otherwise.
				2001	if not self.SeenOpenBrace():
				2002	self.stack[-1].seen_open_brace = True
				2003	else:
				2004	self.stack.append(_BlockInfo(True))
				2005	if _MATCH_ASM.match(line):
				2006	self.stack[-1].inline_asm = _BLOCK_ASM
				2007	elif token == ';' or token == ')':
				2008	# If we haven't seen an opening brace yet, but we already saw
				2009	# a semicolon, this is probably a forward declaration. Pop
				2010	# the stack for these.
				2011	#
				2012	# Similarly, if we haven't seen an opening brace yet, but we
				2013	# already saw a closing parenthesis, then these are probably
				2014	# function arguments with extra "class" or "struct" keywords.
				2015	# Also pop these stack for these.
				2016	if not self.SeenOpenBrace():
				2017	self.stack.pop()
				2018	else: # token == '}'
				2019	# Perform end of block checks and pop the stack.
				2020	if self.stack:
				2021	self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
				2022	self.stack.pop()
				2023	line = matched.group(2)
				2024
				2025	def InnermostClass(self):
				2026	"""Get class info on the top of the stack.
				2027
				2028	Returns:
				2029	A _ClassInfo object if we are inside a class, or None otherwise.
				2030	"""
				2031	for i in range(len(self.stack), 0, -1):
				2032	classinfo = self.stack[i - 1]
				2033	if isinstance(classinfo, _ClassInfo):
				2034	return classinfo
				2035	return None
				2036
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2037	def CheckCompletedBlocks(self, filename, error):
				2038	"""Checks that all classes and namespaces have been completely parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2039
				2040	Call this when all lines in a file have been processed.
				2041	Args:
				2042	filename: The name of the current file.
				2043	error: The function to call with any errors found.
				2044	"""
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2045	# Note: This test can result in false positives if #ifdef constructs
				2046	# get in the way of brace matching. See the testBuildClass test in
				2047	# cpplint_unittest.py for an example of this.
				2048	for obj in self.stack:
				2049	if isinstance(obj, _ClassInfo):
				2050	error(filename, obj.starting_linenum, 'build/class', 5,
				2051	'Failed to find complete declaration of class %s' %
				2052	obj.name)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2053	elif isinstance(obj, _NamespaceInfo):
				2054	error(filename, obj.starting_linenum, 'build/namespaces', 5,
				2055	'Failed to find complete declaration of namespace %s' %
				2056	obj.name)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2057
				2058
				2059	def CheckForNonStandardConstructs(filename, clean_lines, linenum,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2060	nesting_state, error):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2061	r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2062
				2063	Complain about several constructs which gcc-2 accepts, but which are
				2064	not standard C++. Warning about these in lint is one way to ease the
				2065	transition to new compilers.
				2066	- put storage class first (e.g. "static const" instead of "const static").
				2067	- "%lld" instead of %qd" in printf-type functions.
				2068	- "%1$d" is non-standard in printf-type functions.
				2069	- "\%" is an undefined character escape sequence.
				2070	- text after #endif is not allowed.
				2071	- invalid inner-style forward declaration.
				2072	- >? and <? operators, and their >?= and <?= cousins.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2073
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2074	Additionally, check for constructor/destructor style violations and reference
				2075	members, as it is very convenient to do so while checking for
				2076	gcc-2 compliance.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2077
				2078	Args:
				2079	filename: The name of the current file.
				2080	clean_lines: A CleansedLines instance containing the file.
				2081	linenum: The number of the line to check.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2082	nesting_state: A _NestingState instance which maintains information about
				2083	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2084	error: A callable to which errors are reported, which takes 4 arguments:
				2085	filename, line number, error level, and message
				2086	"""
				2087
				2088	# Remove comments from the line, but leave in strings for now.
				2089	line = clean_lines.lines[linenum]
				2090
				2091	if Search(r'printf\s\(.".%[-+ ]?\dq', line):
				2092	error(filename, linenum, 'runtime/printf_format', 3,
				2093	'%q in format strings is deprecated. Use %ll instead.')
				2094
				2095	if Search(r'printf\s\(.".*%\d+\$', line):
				2096	error(filename, linenum, 'runtime/printf_format', 2,
				2097	'%N$ formats are unconventional. Try rewriting to avoid them.')
				2098
				2099	# Remove escaped backslashes before looking for undefined escapes.
				2100	line = line.replace('\\\\', '')
				2101
				2102	if Search(r'("\|\').*\\(%\|\[\|\(\|{)', line):
				2103	error(filename, linenum, 'build/printf_format', 3,
				2104	'%, [, (, and { are undefined character escapes. Unescape them.')
				2105
				2106	# For the rest, work with both comments and strings removed.
				2107	line = clean_lines.elided[linenum]
				2108
				2109	if Search(r'\b(const\|volatile\|void\|char\|short\|int\|long'
				2110	r'\|float\|double\|signed\|unsigned'
				2111	r'\|schar\|u?int8\|u?int16\|u?int32\|u?int64)'
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2112	r'\s+(register\|static\|extern\|typedef)\b',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2113	line):
				2114	error(filename, linenum, 'build/storage_class', 5,
				2115	'Storage class (static, extern, typedef, etc) should be first.')
				2116
				2117	if Match(r'\s#\sendif\s*[^/\s]+', line):
				2118	error(filename, linenum, 'build/endif_comment', 5,
				2119	'Uncommented text after #endif is non-standard. Use a comment.')
				2120
				2121	if Match(r'\sclass\s+(\w+\s::\s)+\w+\s;', line):
				2122	error(filename, linenum, 'build/forward_decl', 5,
				2123	'Inner-style forward declarations are invalid. Remove this line.')
				2124
				2125	if Search(r'(\w+\|[+-]?\d+(\.\d)?)\s(<\|>)\?=?\s(\w+\|[+-]?\d+)(\.\d)?',
				2126	line):
				2127	error(filename, linenum, 'build/deprecated', 3,
				2128	'>? and <? (max and min) operators are non-standard and deprecated.')
				2129
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2130	if Search(r'^\sconst\sstring\s&\s\w+\s*;', line):
				2131	# TODO(unknown): Could it be expanded safely to arbitrary references,
				2132	# without triggering too many false positives? The first
				2133	# attempt triggered 5 warnings for mostly benign code in the regtest, hence
				2134	# the restriction.
				2135	# Here's the original regexp, for the reference:
				2136	# type_name = r'\w+((\s::\s\w+)\|(\s<\s\w+?\s*>))?'
				2137	# r'\sconst\s' + type_name + '\s&\s\w+\s*;'
				2138	error(filename, linenum, 'runtime/member_string_references', 2,
				2139	'const string& members are dangerous. It is much better to use '
				2140	'alternatives, such as pointers or simple constants.')
				2141
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2142	# Everything else in this function operates on class declarations.
				2143	# Return early if the top of the nesting stack is not a class, or if
				2144	# the class head is not completed yet.
				2145	classinfo = nesting_state.InnermostClass()
				2146	if not classinfo or not classinfo.seen_open_brace:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2147	return
				2148
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2149	# The class may have been declared with namespace or classname qualifiers.
				2150	# The constructor and destructor will not have those qualifiers.
				2151	base_classname = classinfo.name.split('::')[-1]
				2152
				2153	# Look for single-argument constructors that aren't marked explicit.
				2154	# Technically a valid construct, but against style.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2155	args = Match(r'\s+(?:inline\s+)?%s\s*$([^,()]+)$'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2156	% re.escape(base_classname),
				2157	line)
				2158	if (args and
				2159	args.group(1) != 'void' and
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	2160	not Match(r'(const\s+)?%s(\s+const)?\s(?:<\w+>\s)?&'
				2161	% re.escape(base_classname), args.group(1).strip())):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2162	error(filename, linenum, 'runtime/explicit', 5,
				2163	'Single-argument constructors should be marked explicit.')
				2164
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2165
				2166	def CheckSpacingForFunctionCall(filename, line, linenum, error):
				2167	"""Checks for the correctness of various spacing around function calls.
				2168
				2169	Args:
				2170	filename: The name of the current file.
				2171	line: The text of the line to check.
				2172	linenum: The number of the line to check.
				2173	error: The function to call with any errors found.
				2174	"""
				2175
				2176	# Since function calls often occur inside if/for/while/switch
				2177	# expressions - which have their own, more liberal conventions - we
				2178	# first see if we should be looking inside such an expression for a
				2179	# function call, to which we can apply more strict standards.
				2180	fncall = line # if there's no control flow construct, look at whole line
				2181	for pattern in (r'\bif\s$(.)$\s*{',
				2182	r'\bfor\s$(.)$\s*{',
				2183	r'\bwhile\s$(.)$\s*[{;]',
				2184	r'\bswitch\s$(.)$\s*{'):
				2185	match = Search(pattern, line)
				2186	if match:
				2187	fncall = match.group(1) # look inside the parens for function calls
				2188	break
				2189
				2190	# Except in if/for/while/switch, there should never be space
				2191	# immediately inside parens (eg "f( 3, 4 )"). We make an exception
				2192	# for nested parens ( (a+b) + c ). Likewise, there should never be
				2193	# a space before a ( when it's a function argument. I assume it's a
				2194	# function argument when the char before the whitespace is legal in
				2195	# a function name (alnum + _) and we're not starting a macro. Also ignore
				2196	# pointers and references to arrays and functions coz they're too tricky:
				2197	# we use a very simple way to recognize these:
				2198	# " (something)(maybe-something)" or
				2199	# " (something)(maybe-something," or
				2200	# " (something)[something]"
				2201	# Note that we assume the contents of [] to be short enough that
				2202	# they'll never need to wrap.
				2203	if ( # Ignore control structures.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2204	not Search(r'\b(if\|for\|while\|switch\|return\|new\|delete\|catch\|sizeof)\b',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2205	fncall) and
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2206	# Ignore pointers/references to functions.
				2207	not Search(r' $[^)]+$$[^)]*($\|,$)', fncall) and
				2208	# Ignore pointers/references to arrays.
				2209	not Search(r' $[^)]+$\[[^\]]+\]', fncall)):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	2210	if Search(r'\w\s\(\s(?!\s\\$)', fncall): # a ( used for a fn call
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2211	error(filename, linenum, 'whitespace/parens', 4,
				2212	'Extra space after ( in function call')
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	2213	elif Search(r'$\s+(?!(\s*\$\|\()', fncall):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2214	error(filename, linenum, 'whitespace/parens', 2,
				2215	'Extra space after (')
				2216	if (Search(r'\w\s+\(', fncall) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2217	not Search(r'#\s*define\|typedef', fncall) and
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2218	not Search(r'\w\s+$(\w+::)\\w+$\(', fncall)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2219	error(filename, linenum, 'whitespace/parens', 4,
				2220	'Extra space before ( in function call')
				2221	# If the ) is followed only by a newline or a { + newline, assume it's
				2222	# part of a control statement (if/while/etc), and don't complain
				2223	if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2224	# If the closing parenthesis is preceded by only whitespaces,
				2225	# try to give a more descriptive error message.
				2226	if Search(r'^\s+\)', fncall):
				2227	error(filename, linenum, 'whitespace/parens', 2,
				2228	'Closing ) should be moved to the previous line')
				2229	else:
				2230	error(filename, linenum, 'whitespace/parens', 2,
				2231	'Extra space before )')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2232
				2233
				2234	def IsBlankLine(line):
				2235	"""Returns true if the given line is blank.
				2236
				2237	We consider a line to be blank if the line is empty or consists of
				2238	only white spaces.
				2239
				2240	Args:
				2241	line: A line of a string.
				2242
				2243	Returns:
				2244	True, if the given line is blank.
				2245	"""
				2246	return not line or line.isspace()
				2247
				2248
				2249	def CheckForFunctionLengths(filename, clean_lines, linenum,
				2250	function_state, error):
				2251	"""Reports for long function bodies.
				2252
				2253	For an overview why this is done, see:
				2254	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
				2255
				2256	Uses a simplistic algorithm assuming other style guidelines
				2257	(especially spacing) are followed.
				2258	Only checks unindented functions, so class members are unchecked.
				2259	Trivial bodies are unchecked, so constructors with huge initializer lists
				2260	may be missed.
				2261	Blank/comment lines are not counted so as to avoid encouraging the removal
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2262	of vertical space and comments just to get through a lint check.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2263	NOLINT on the last line of a function disables this check.
				2264
				2265	Args:
				2266	filename: The name of the current file.
				2267	clean_lines: A CleansedLines instance containing the file.
				2268	linenum: The number of the line to check.
				2269	function_state: Current function name and lines in body so far.
				2270	error: The function to call with any errors found.
				2271	"""
				2272	lines = clean_lines.lines
				2273	line = lines[linenum]
				2274	raw = clean_lines.raw_lines
				2275	raw_line = raw[linenum]
				2276	joined_line = ''
				2277
				2278	starting_func = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2279	regexp = r'(\w(\w\|::\|\\|\&\|\s))\(' # decls * & space::name( ...
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2280	match_result = Match(regexp, line)
				2281	if match_result:
				2282	# If the name is all caps and underscores, figure it's a macro and
				2283	# ignore it, unless it's TEST or TEST_F.
				2284	function_name = match_result.group(1).split()[-1]
				2285	if function_name == 'TEST' or function_name == 'TEST_F' or (
				2286	not Match(r'[A-Z_]+$', function_name)):
				2287	starting_func = True
				2288
				2289	if starting_func:
				2290	body_found = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2291	for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2292	start_line = lines[start_linenum]
				2293	joined_line += ' ' + start_line.lstrip()
				2294	if Search(r'(;\|})', start_line): # Declarations and trivial functions
				2295	body_found = True
				2296	break # ... ignore
				2297	elif Search(r'{', start_line):
				2298	body_found = True
				2299	function = Search(r'((\w\|:)*)\(', line).group(1)
				2300	if Match(r'TEST', function): # Handle TEST... macros
				2301	parameter_regexp = Search(r'($.*$)', joined_line)
				2302	if parameter_regexp: # Ignore bad syntax
				2303	function += parameter_regexp.group(1)
				2304	else:
				2305	function += '()'
				2306	function_state.Begin(function)
				2307	break
				2308	if not body_found:
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2309	# No body for the function (or evidence of a non-function) was found.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2310	error(filename, linenum, 'readability/fn_size', 5,
				2311	'Lint failed to find start of function body.')
				2312	elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2313	function_state.Check(error, filename, linenum)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2314	function_state.End()
				2315	elif not Match(r'^\s*$', line):
				2316	function_state.Count() # Count non-blank/non-comment lines.
				2317
				2318
				2319	_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO($.+?$)?:?(\s\|$)?')
				2320
				2321
				2322	def CheckComment(comment, filename, linenum, error):
				2323	"""Checks for common mistakes in TODO comments.
				2324
				2325	Args:
				2326	comment: The text of the comment from the line in question.
				2327	filename: The name of the current file.
				2328	linenum: The number of the line to check.
				2329	error: The function to call with any errors found.
				2330	"""
				2331	match = _RE_PATTERN_TODO.match(comment)
				2332	if match:
				2333	# One whitespace is correct; zero whitespace is handled elsewhere.
				2334	leading_whitespace = match.group(1)
				2335	if len(leading_whitespace) > 1:
				2336	error(filename, linenum, 'whitespace/todo', 2,
				2337	'Too many spaces before TODO')
				2338
				2339	username = match.group(2)
				2340	if not username:
				2341	error(filename, linenum, 'readability/todo', 2,
				2342	'Missing username in TODO; it should look like '
				2343	'"// TODO(my_username): Stuff."')
				2344
				2345	middle_whitespace = match.group(3)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2346	# Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2347	if middle_whitespace != ' ' and middle_whitespace != '':
				2348	error(filename, linenum, 'whitespace/todo', 2,
				2349	'TODO(my_username) should be followed by a space')
				2350
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2351	def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
				2352	"""Checks for improper use of DISALLOW* macros.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2353
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2354	Args:
				2355	filename: The name of the current file.
				2356	clean_lines: A CleansedLines instance containing the file.
				2357	linenum: The number of the line to check.
				2358	nesting_state: A _NestingState instance which maintains information about
				2359	the current stack of nested blocks being parsed.
				2360	error: The function to call with any errors found.
				2361	"""
				2362	line = clean_lines.elided[linenum] # get rid of comments and strings
				2363
				2364	matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN\|'
				2365	r'DISALLOW_EVIL_CONSTRUCTORS\|'
				2366	r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
				2367	if not matched:
				2368	return
				2369	if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
				2370	if nesting_state.stack[-1].access != 'private':
				2371	error(filename, linenum, 'readability/constructors', 3,
				2372	'%s must be in the private: section' % matched.group(1))
				2373
				2374	else:
				2375	# Found DISALLOW* macro outside a class declaration, or perhaps it
				2376	# was used inside a function when it should have been part of the
				2377	# class declaration. We could issue a warning here, but it
				2378	# probably resulted in a compiler error already.
				2379	pass
				2380
				2381
				2382	def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
				2383	"""Find the corresponding > to close a template.
				2384
				2385	Args:
				2386	clean_lines: A CleansedLines instance containing the file.
				2387	linenum: Current line number.
				2388	init_suffix: Remainder of the current line after the initial <.
				2389
				2390	Returns:
				2391	True if a matching bracket exists.
				2392	"""
				2393	line = init_suffix
				2394	nesting_stack = ['<']
				2395	while True:
				2396	# Find the next operator that can tell us whether < is used as an
				2397	# opening bracket or as a less-than operator. We only want to
				2398	# warn on the latter case.
				2399	#
				2400	# We could also check all other operators and terminate the search
				2401	# early, e.g. if we got something like this "a<b+c", the "<" is
				2402	# most likely a less-than operator, but then we will get false
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2403	# positives for default arguments and other template expressions.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2404	match = Search(r'^[^<>(),;\[\]]([<>(),;\[\]])(.)$', line)
				2405	if match:
				2406	# Found an operator, update nesting stack
				2407	operator = match.group(1)
				2408	line = match.group(2)
				2409
				2410	if nesting_stack[-1] == '<':
				2411	# Expecting closing angle bracket
				2412	if operator in ('<', '(', '['):
				2413	nesting_stack.append(operator)
				2414	elif operator == '>':
				2415	nesting_stack.pop()
				2416	if not nesting_stack:
				2417	# Found matching angle bracket
				2418	return True
				2419	elif operator == ',':
				2420	# Got a comma after a bracket, this is most likely a template
				2421	# argument. We have not seen a closing angle bracket yet, but
				2422	# it's probably a few lines later if we look for it, so just
				2423	# return early here.
				2424	return True
				2425	else:
				2426	# Got some other operator.
				2427	return False
				2428
				2429	else:
				2430	# Expecting closing parenthesis or closing bracket
				2431	if operator in ('<', '(', '['):
				2432	nesting_stack.append(operator)
				2433	elif operator in (')', ']'):
				2434	# We don't bother checking for matching () or []. If we got
				2435	# something like (] or [), it would have been a syntax error.
				2436	nesting_stack.pop()
				2437
				2438	else:
				2439	# Scan the next line
				2440	linenum += 1
				2441	if linenum >= len(clean_lines.elided):
				2442	break
				2443	line = clean_lines.elided[linenum]
				2444
				2445	# Exhausted all remaining lines and still no matching angle bracket.
				2446	# Most likely the input was incomplete, otherwise we should have
				2447	# seen a semicolon and returned early.
				2448	return True
				2449
				2450
				2451	def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
				2452	"""Find the corresponding < that started a template.
				2453
				2454	Args:
				2455	clean_lines: A CleansedLines instance containing the file.
				2456	linenum: Current line number.
				2457	init_prefix: Part of the current line before the initial >.
				2458
				2459	Returns:
				2460	True if a matching bracket exists.
				2461	"""
				2462	line = init_prefix
				2463	nesting_stack = ['>']
				2464	while True:
				2465	# Find the previous operator
				2466	match = Search(r'^(.)([<>(),;\[\]])[^<>(),;\[\]]$', line)
				2467	if match:
				2468	# Found an operator, update nesting stack
				2469	operator = match.group(2)
				2470	line = match.group(1)
				2471
				2472	if nesting_stack[-1] == '>':
				2473	# Expecting opening angle bracket
				2474	if operator in ('>', ')', ']'):
				2475	nesting_stack.append(operator)
				2476	elif operator == '<':
				2477	nesting_stack.pop()
				2478	if not nesting_stack:
				2479	# Found matching angle bracket
				2480	return True
				2481	elif operator == ',':
				2482	# Got a comma before a bracket, this is most likely a
				2483	# template argument. The opening angle bracket is probably
				2484	# there if we look for it, so just return early here.
				2485	return True
				2486	else:
				2487	# Got some other operator.
				2488	return False
				2489
				2490	else:
				2491	# Expecting opening parenthesis or opening bracket
				2492	if operator in ('>', ')', ']'):
				2493	nesting_stack.append(operator)
				2494	elif operator in ('(', '['):
				2495	nesting_stack.pop()
				2496
				2497	else:
				2498	# Scan the previous line
				2499	linenum -= 1
				2500	if linenum < 0:
				2501	break
				2502	line = clean_lines.elided[linenum]
				2503
				2504	# Exhausted all earlier lines and still no matching angle bracket.
				2505	return False
				2506
				2507
				2508	def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2509	"""Checks for the correctness of various spacing issues in the code.
				2510
				2511	Things we check for: spaces around operators, spaces after
				2512	if/for/while/switch, no spaces around parens in function calls, two
				2513	spaces between code and comment, don't start a block with a blank
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2514	line, don't end a function with a blank line, don't add a blank line
				2515	after public/protected/private, don't have too many blank lines in a row.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2516
				2517	Args:
				2518	filename: The name of the current file.
				2519	clean_lines: A CleansedLines instance containing the file.
				2520	linenum: The number of the line to check.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2521	nesting_state: A _NestingState instance which maintains information about
				2522	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2523	error: The function to call with any errors found.
				2524	"""
				2525
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2526	# Don't use "elided" lines here, otherwise we can't check commented lines.
				2527	# Don't want to use "raw" either, because we don't want to check inside C++11
				2528	# raw strings,
				2529	raw = clean_lines.lines_without_raw_strings
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2530	line = raw[linenum]
				2531
				2532	# Before nixing comments, check if the line is blank for no good
				2533	# reason. This includes the first line after a block is opened, and
				2534	# blank lines at the end of a function (ie, right before a line like '}'
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2535	#
				2536	# Skip all the blank line checks if we are immediately inside a
				2537	# namespace body. In other words, don't issue blank line warnings
				2538	# for this block:
				2539	# namespace {
				2540	#
				2541	# }
				2542	#
				2543	# A warning about missing end of namespace comments will be issued instead.
				2544	if IsBlankLine(line) and not nesting_state.InNamespaceBody():
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2545	elided = clean_lines.elided
				2546	prev_line = elided[linenum - 1]
				2547	prevbrace = prev_line.rfind('{')
				2548	# TODO(unknown): Don't complain if line before blank line, and line after,
				2549	# both start with alnums and are indented the same amount.
				2550	# This ignores whitespace at the start of a namespace block
				2551	# because those are not usually indented.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2552	if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2553	# OK, we have a blank line at the start of a code block. Before we
				2554	# complain, we check if it is an exception to the rule: The previous
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2555	# non-empty line has the parameters of a function header that are indented
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2556	# 4 spaces (because they did not fit in a 80 column line when placed on
				2557	# the same line as the function name). We also check for the case where
				2558	# the previous line is indented 6 spaces, which may happen when the
				2559	# initializers of a constructor do not fit into a 80 column line.
				2560	exception = False
				2561	if Match(r' {6}\w', prev_line): # Initializer list?
				2562	# We are looking for the opening column of initializer list, which
				2563	# should be indented 4 spaces to cause 6 space indentation afterwards.
				2564	search_position = linenum-2
				2565	while (search_position >= 0
				2566	and Match(r' {6}\w', elided[search_position])):
				2567	search_position -= 1
				2568	exception = (search_position >= 0
				2569	and elided[search_position][:5] == ' :')
				2570	else:
				2571	# Search for the function arguments or an initializer list. We use a
				2572	# simple heuristic here: If the line is indented 4 spaces; and we have a
				2573	# closing paren, without the opening paren, followed by an opening brace
				2574	# or colon (for initializer lists) we assume that it is the last line of
				2575	# a function header. If we have a colon indented 4 spaces, it is an
				2576	# initializer list.
				2577	exception = (Match(r' {4}\w[^$]$\s(const\s)?(\{\s$\|:)',
				2578	prev_line)
				2579	or Match(r' {4}:', prev_line))
				2580
				2581	if not exception:
				2582	error(filename, linenum, 'whitespace/blank_line', 2,
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2583	'Redundant blank line at the start of a code block '
				2584	'should be deleted.')
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2585	# Ignore blank lines at the end of a block in a long if-else
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2586	# chain, like this:
				2587	# if (condition1) {
				2588	# // Something followed by a blank line
				2589	#
				2590	# } else if (condition2) {
				2591	# // Something else
				2592	# }
				2593	if linenum + 1 < clean_lines.NumLines():
				2594	next_line = raw[linenum + 1]
				2595	if (next_line
				2596	and Match(r'\s*}', next_line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2597	and next_line.find('} else ') == -1):
				2598	error(filename, linenum, 'whitespace/blank_line', 3,
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2599	'Redundant blank line at the end of a code block '
				2600	'should be deleted.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2601
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2602	matched = Match(r'\s*(public\|protected\|private):', prev_line)
				2603	if matched:
				2604	error(filename, linenum, 'whitespace/blank_line', 3,
				2605	'Do not leave a blank line after "%s:"' % matched.group(1))
				2606
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2607	# Next, we complain if there's a comment too near the text
				2608	commentpos = line.find('//')
				2609	if commentpos != -1:
				2610	# Check if the // may be in quotes. If so, ignore it
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2611	# Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2612	if (line.count('"', 0, commentpos) -
				2613	line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
				2614	# Allow one space for new scopes, two spaces otherwise:
				2615	if (not Match(r'^\s*{ //', line) and
				2616	((commentpos >= 1 and
				2617	line[commentpos-1] not in string.whitespace) or
				2618	(commentpos >= 2 and
				2619	line[commentpos-2] not in string.whitespace))):
				2620	error(filename, linenum, 'whitespace/comments', 2,
				2621	'At least two spaces is best between code and comments')
				2622	# There should always be a space between the // and the comment
				2623	commentend = commentpos + 2
				2624	if commentend < len(line) and not line[commentend] == ' ':
				2625	# but some lines are exceptions -- e.g. if they're big
				2626	# comment delimiters like:
				2627	# //----------------------------------------------------------
erg@google.com	a51c16b	2010-11-17 18:09:31 +0000	[diff] [blame]	2628	# or are an empty C++ style Doxygen comment, like:
				2629	# ///
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2630	# or they begin with multiple slashes followed by a space:
				2631	# //////// Header comment
				2632	match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.com	a51c16b	2010-11-17 18:09:31 +0000	[diff] [blame]	2633	Search(r'^/$', line[commentend:]) or
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2634	Search(r'^/+ ', line[commentend:]))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2635	if not match:
				2636	error(filename, linenum, 'whitespace/comments', 4,
				2637	'Should have a space between // and comment')
				2638	CheckComment(line[commentpos:], filename, linenum, error)
				2639
				2640	line = clean_lines.elided[linenum] # get rid of comments and strings
				2641
				2642	# Don't try to do spacing checks for operator methods
				2643	line = re.sub(r'operator(==\|!=\|<\|<<\|<=\|>=\|>>\|>)\(', 'operator\(', line)
				2644
				2645	# We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
				2646	# Otherwise not. Note we only check for non-spaces on both sides;
				2647	# sometimes people put non-spaces on one side when aligning ='s among
				2648	# many lines (not that this is behavior that I approve of...)
				2649	if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if\|while) ', line):
				2650	error(filename, linenum, 'whitespace/operators', 4,
				2651	'Missing spaces around =')
				2652
				2653	# It's ok not to have spaces around binary operators like + - * /, but if
				2654	# there's too little whitespace, we get concerned. It's hard to tell,
				2655	# though, so we punt on this one for now. TODO.
				2656
				2657	# You should always have whitespace around binary operators.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2658	#
				2659	# Check <= and >= first to avoid false positives with < and >, then
				2660	# check non-include lines for spacing around < and >.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2661	match = Search(r'[^<>=!\s](==\|!=\|<=\|>=)[^<>=!\s]', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2662	if match:
				2663	error(filename, linenum, 'whitespace/operators', 3,
				2664	'Missing spaces around %s' % match.group(1))
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2665	# We allow no-spaces around << when used like this: 10<<20, but
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2666	# not otherwise (particularly, not when used as streams)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2667	match = Search(r'(\S)(?:L\|UL\|ULL\|l\|ul\|ull)?<<(\S)', line)
				2668	if match and not (match.group(1).isdigit() and match.group(2).isdigit()):
				2669	error(filename, linenum, 'whitespace/operators', 3,
				2670	'Missing spaces around <<')
				2671	elif not Match(r'#.*include', line):
				2672	# Avoid false positives on ->
				2673	reduced_line = line.replace('->', '')
				2674
				2675	# Look for < that is not surrounded by spaces. This is only
				2676	# triggered if both sides are missing spaces, even though
				2677	# technically should should flag if at least one side is missing a
				2678	# space. This is done to avoid some false positives with shifts.
				2679	match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
				2680	if (match and
				2681	not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
				2682	error(filename, linenum, 'whitespace/operators', 3,
				2683	'Missing spaces around <')
				2684
				2685	# Look for > that is not surrounded by spaces. Similar to the
				2686	# above, we only trigger if both sides are missing spaces to avoid
				2687	# false positives with shifts.
				2688	match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
				2689	if (match and
				2690	not FindPreviousMatchingAngleBracket(clean_lines, linenum,
				2691	match.group(1))):
				2692	error(filename, linenum, 'whitespace/operators', 3,
				2693	'Missing spaces around >')
				2694
				2695	# We allow no-spaces around >> for almost anything. This is because
				2696	# C++11 allows ">>" to close nested templates, which accounts for
				2697	# most cases when ">>" is not followed by a space.
				2698	#
				2699	# We still warn on ">>" followed by alpha character, because that is
				2700	# likely due to ">>" being used for right shifts, e.g.:
				2701	# value >> alpha
				2702	#
				2703	# When ">>" is used to close templates, the alphanumeric letter that
				2704	# follows would be part of an identifier, and there should still be
				2705	# a space separating the template type and the identifier.
				2706	# type<type<type>> alpha
				2707	match = Search(r'>>[a-zA-Z_]', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2708	if match:
				2709	error(filename, linenum, 'whitespace/operators', 3,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2710	'Missing spaces around >>')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2711
				2712	# There shouldn't be space around unary operators
				2713	match = Search(r'(!\s\|~\s\|[\s]--[\s;]\|[\s]\+\+[\s;])', line)
				2714	if match:
				2715	error(filename, linenum, 'whitespace/operators', 4,
				2716	'Extra space for operator %s' % match.group(1))
				2717
				2718	# A pet peeve of mine: no spaces after an if, while, switch, or for
				2719	match = Search(r' (if\(\|for\(\|while\(\|switch\()', line)
				2720	if match:
				2721	error(filename, linenum, 'whitespace/parens', 5,
				2722	'Missing space before ( in %s' % match.group(1))
				2723
				2724	# For if/for/while/switch, the left and right parens should be
				2725	# consistent about how many spaces are inside the parens, and
				2726	# there should either be zero or one spaces inside the parens.
				2727	# We don't want: "if ( foo)" or "if ( foo )".
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2728	# Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2729	match = Search(r'\b(if\|for\|while\|switch)\s*'
				2730	r'$([ ])(.).[^ ]+([ ])$\s{\s*$',
				2731	line)
				2732	if match:
				2733	if len(match.group(2)) != len(match.group(4)):
				2734	if not (match.group(3) == ';' and
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2735	len(match.group(2)) == 1 + len(match.group(4)) or
				2736	not match.group(2) and Search(r'\bfor\s$.; $', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2737	error(filename, linenum, 'whitespace/parens', 5,
				2738	'Mismatching spaces inside () in %s' % match.group(1))
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2739	if len(match.group(2)) not in [0, 1]:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2740	error(filename, linenum, 'whitespace/parens', 5,
				2741	'Should have zero or one spaces inside ( and ) in %s' %
				2742	match.group(1))
				2743
				2744	# You should always have a space after a comma (either as fn arg or operator)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2745	#
				2746	# This does not apply when the non-space character following the
				2747	# comma is another comma, since the only time when that happens is
				2748	# for empty macro arguments.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2749	#
				2750	# We run this check in two passes: first pass on elided lines to
				2751	# verify that lines contain missing whitespaces, second pass on raw
				2752	# lines to confirm that those missing whitespaces are not due to
				2753	# elided comments.
				2754	if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2755	error(filename, linenum, 'whitespace/comma', 3,
				2756	'Missing space after ,')
				2757
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	2758	# You should always have a space after a semicolon
				2759	# except for few corner cases
				2760	# TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
				2761	# space after ;
				2762	if Search(r';[^\s};\\)/]', line):
				2763	error(filename, linenum, 'whitespace/semicolon', 3,
				2764	'Missing space after ;')
				2765
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2766	# Next we will look for issues with function calls.
				2767	CheckSpacingForFunctionCall(filename, line, linenum, error)
				2768
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2769	# Except after an opening paren, or after another opening brace (in case of
				2770	# an initializer list, for instance), you should have spaces before your
				2771	# braces. And since you should never have braces at the beginning of a line,
				2772	# this is an easy test.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2773	match = Match(r'^(.*[^ ({]){', line)
				2774	if match:
				2775	# Try a bit harder to check for brace initialization. This
				2776	# happens in one of the following forms:
				2777	# Constructor() : initializer_list_{} { ... }
				2778	# Constructor{}.MemberFunction()
				2779	# Type variable{};
				2780	# FunctionCall(type{}, ...);
				2781	# LastArgument(..., type{});
				2782	# LOG(INFO) << type{} << " ...";
				2783	# map_of_type[{...}] = ...;
				2784	#
				2785	# We check for the character following the closing brace, and
				2786	# silence the warning if it's one of those listed above, i.e.
				2787	# "{.;,)<]".
				2788	#
				2789	# To account for nested initializer list, we allow any number of
				2790	# closing braces up to "{;,)<". We can't simply silence the
				2791	# warning on first sight of closing brace, because that would
				2792	# cause false negatives for things that are not initializer lists.
				2793	# Silence this: But not this:
				2794	# Outer{ if (...) {
				2795	# Inner{...} if (...){ // Missing space before {
				2796	# }; }
				2797	#
				2798	# There is a false negative with this approach if people inserted
				2799	# spurious semicolons, e.g. "if (cond){};", but we will catch the
				2800	# spurious semicolon with a separate check.
				2801	(endline, endlinenum, endpos) = CloseExpression(
				2802	clean_lines, linenum, len(match.group(1)))
				2803	trailing_text = ''
				2804	if endpos > -1:
				2805	trailing_text = endline[endpos:]
				2806	for offset in xrange(endlinenum + 1,
				2807	min(endlinenum + 3, clean_lines.NumLines() - 1)):
				2808	trailing_text += clean_lines.elided[offset]
				2809	if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
				2810	error(filename, linenum, 'whitespace/braces', 5,
				2811	'Missing space before {')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2812
				2813	# Make sure '} else {' has spaces.
				2814	if Search(r'}else', line):
				2815	error(filename, linenum, 'whitespace/braces', 5,
				2816	'Missing space before else')
				2817
				2818	# You shouldn't have spaces before your brackets, except maybe after
				2819	# 'delete []' or 'new char * []'.
				2820	if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
				2821	error(filename, linenum, 'whitespace/braces', 5,
				2822	'Extra space before [')
				2823
				2824	# You shouldn't have a space before a semicolon at the end of the line.
				2825	# There's a special case for "for" since the style guide allows space before
				2826	# the semicolon there.
				2827	if Search(r':\s;\s$', line):
				2828	error(filename, linenum, 'whitespace/semicolon', 5,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2829	'Semicolon defining empty statement. Use {} instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2830	elif Search(r'^\s;\s$', line):
				2831	error(filename, linenum, 'whitespace/semicolon', 5,
				2832	'Line contains only semicolon. If this should be an empty statement, '
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2833	'use {} instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2834	elif (Search(r'\s+;\s*$', line) and
				2835	not Search(r'\bfor\b', line)):
				2836	error(filename, linenum, 'whitespace/semicolon', 5,
				2837	'Extra space before last semicolon. If this should be an empty '
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2838	'statement, use {} instead.')
				2839
				2840	# In range-based for, we wanted spaces before and after the colon, but
				2841	# not around "::" tokens that might appear.
				2842	if (Search('for \(.[^:]:[^: ]', line) or
				2843	Search('for \(.[^: ]:[^:]', line)):
				2844	error(filename, linenum, 'whitespace/forcolon', 2,
				2845	'Missing space around colon in range-based for loop')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2846
				2847
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2848	def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
				2849	"""Checks for additional blank line issues related to sections.
				2850
				2851	Currently the only thing checked here is blank line before protected/private.
				2852
				2853	Args:
				2854	filename: The name of the current file.
				2855	clean_lines: A CleansedLines instance containing the file.
				2856	class_info: A _ClassInfo objects.
				2857	linenum: The number of the line to check.
				2858	error: The function to call with any errors found.
				2859	"""
				2860	# Skip checks if the class is small, where small means 25 lines or less.
				2861	# 25 lines seems like a good cutoff since that's the usual height of
				2862	# terminals, and any class that can't fit in one screen can't really
				2863	# be considered "small".
				2864	#
				2865	# Also skip checks if we are on the first line. This accounts for
				2866	# classes that look like
				2867	# class Foo { public: ... };
				2868	#
				2869	# If we didn't find the end of the class, last_line would be zero,
				2870	# and the check will be skipped by the first condition.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2871	if (class_info.last_line - class_info.starting_linenum <= 24 or
				2872	linenum <= class_info.starting_linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2873	return
				2874
				2875	matched = Match(r'\s*(public\|protected\|private):', clean_lines.lines[linenum])
				2876	if matched:
				2877	# Issue warning if the line before public/protected/private was
				2878	# not a blank line, but don't do this if the previous line contains
				2879	# "class" or "struct". This can happen two ways:
				2880	# - We are at the beginning of the class.
				2881	# - We are forward-declaring an inner class that is semantically
				2882	# private, but needed to be public for implementation reasons.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2883	# Also ignores cases where the previous line ends with a backslash as can be
				2884	# common when defining classes in C macros.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2885	prev_line = clean_lines.lines[linenum - 1]
				2886	if (not IsBlankLine(prev_line) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2887	not Search(r'\b(class\|struct)\b', prev_line) and
				2888	not Search(r'\\$', prev_line)):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2889	# Try a bit harder to find the beginning of the class. This is to
				2890	# account for multi-line base-specifier lists, e.g.:
				2891	# class Derived
				2892	# : public Base {
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2893	end_class_head = class_info.starting_linenum
				2894	for i in range(class_info.starting_linenum, linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2895	if Search(r'\{\s*$', clean_lines.lines[i]):
				2896	end_class_head = i
				2897	break
				2898	if end_class_head < linenum - 1:
				2899	error(filename, linenum, 'whitespace/blank_line', 3,
				2900	'"%s:" should be preceded by a blank line' % matched.group(1))
				2901
				2902
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2903	def GetPreviousNonBlankLine(clean_lines, linenum):
				2904	"""Return the most recent non-blank line and its line number.
				2905
				2906	Args:
				2907	clean_lines: A CleansedLines instance containing the file contents.
				2908	linenum: The number of the line to check.
				2909
				2910	Returns:
				2911	A tuple with two elements. The first element is the contents of the last
				2912	non-blank line before the current line, or the empty string if this is the
				2913	first non-blank line. The second is the line number of that line, or -1
				2914	if this is the first non-blank line.
				2915	"""
				2916
				2917	prevlinenum = linenum - 1
				2918	while prevlinenum >= 0:
				2919	prevline = clean_lines.elided[prevlinenum]
				2920	if not IsBlankLine(prevline): # if not a blank line...
				2921	return (prevline, prevlinenum)
				2922	prevlinenum -= 1
				2923	return ('', -1)
				2924
				2925
				2926	def CheckBraces(filename, clean_lines, linenum, error):
				2927	"""Looks for misplaced braces (e.g. at the end of line).
				2928
				2929	Args:
				2930	filename: The name of the current file.
				2931	clean_lines: A CleansedLines instance containing the file.
				2932	linenum: The number of the line to check.
				2933	error: The function to call with any errors found.
				2934	"""
				2935
				2936	line = clean_lines.elided[linenum] # get rid of comments and strings
				2937
				2938	if Match(r'\s{\s$', line):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2939	# We allow an open brace to start a line in the case where someone is using
				2940	# braces in a block to explicitly create a new scope, which is commonly used
				2941	# to control the lifetime of stack-allocated variables. Braces are also
				2942	# used for brace initializers inside function calls. We don't detect this
				2943	# perfectly: we just don't complain if the last non-whitespace character on
				2944	# the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2945	# previous line starts a preprocessor block.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2946	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2947	if (not Search(r'[,;:}{(]\s*$', prevline) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2948	not Match(r'\s*#', prevline)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2949	error(filename, linenum, 'whitespace/braces', 4,
				2950	'{ should almost always be at the end of the previous line')
				2951
				2952	# An else clause should be on the same line as the preceding closing brace.
				2953	if Match(r'\selse\s', line):
				2954	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				2955	if Match(r'\s}\s$', prevline):
				2956	error(filename, linenum, 'whitespace/newline', 4,
				2957	'An else should appear on the same line as the preceding }')
				2958
				2959	# If braces come on one side of an else, they should be on both.
				2960	# However, we have to worry about "else if" that spans multiple lines!
				2961	if Search(r'}\selse[^{]$', line) or Match(r'[^}]else\s{', line):
				2962	if Search(r'}\selse if([^{])$', line): # could be multi-line if
				2963	# find the ( after the if
				2964	pos = line.find('else if')
				2965	pos = line.find('(', pos)
				2966	if pos > 0:
				2967	(endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
				2968	if endline[endpos:].find('{') == -1: # must be brace after if
				2969	error(filename, linenum, 'readability/braces', 5,
				2970	'If an else has a brace on one side, it should have it on both')
				2971	else: # common case: else not followed by a multi-line if
				2972	error(filename, linenum, 'readability/braces', 5,
				2973	'If an else has a brace on one side, it should have it on both')
				2974
				2975	# Likewise, an else should never have the else clause on the same line
				2976	if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
				2977	error(filename, linenum, 'whitespace/newline', 4,
				2978	'Else clause should never be on same line as else (use 2 lines)')
				2979
				2980	# In the same way, a do/while should never be on one line
				2981	if Match(r'\s*do [^\s{]', line):
				2982	error(filename, linenum, 'whitespace/newline', 4,
				2983	'do/while clauses should not be on a single line')
				2984
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	2985	# Block bodies should not be followed by a semicolon. Due to C++11
				2986	# brace initialization, there are more places where semicolons are
				2987	# required than not, so we use a whitelist approach to check these
				2988	# rather than a blacklist. These are the places where "};" should
				2989	# be replaced by just "}":
				2990	# 1. Some flavor of block following closing parenthesis:
				2991	# for (;;) {};
				2992	# while (...) {};
				2993	# switch (...) {};
				2994	# Function(...) {};
				2995	# if (...) {};
				2996	# if (...) else if (...) {};
				2997	#
				2998	# 2. else block:
				2999	# if (...) else {};
				3000	#
				3001	# 3. const member function:
				3002	# Function(...) const {};
				3003	#
				3004	# 4. Block following some statement:
				3005	# x = 42;
				3006	# {};
				3007	#
				3008	# 5. Block at the beginning of a function:
				3009	# Function(...) {
				3010	# {};
				3011	# }
				3012	#
				3013	# Note that naively checking for the preceding "{" will also match
				3014	# braces inside multi-dimensional arrays, but this is fine since
				3015	# that expression will not contain semicolons.
				3016	#
				3017	# 6. Block following another block:
				3018	# while (true) {}
				3019	# {};
				3020	#
				3021	# 7. End of namespaces:
				3022	# namespace {};
				3023	#
				3024	# These semicolons seems far more common than other kinds of
				3025	# redundant semicolons, possibly due to people converting classes
				3026	# to namespaces. For now we do not warn for this case.
				3027	#
				3028	# Try matching case 1 first.
				3029	match = Match(r'^(.\)\s)\{', line)
				3030	if match:
				3031	# Matched closing parenthesis (case 1). Check the token before the
				3032	# matching opening parenthesis, and don't warn if it looks like a
				3033	# macro. This avoids these false positives:
				3034	# - macro that defines a base class
				3035	# - multi-line macro that defines a base class
				3036	# - macro that defines the whole class-head
				3037	#
				3038	# But we still issue warnings for macros that we know are safe to
				3039	# warn, specifically:
				3040	# - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
				3041	# - TYPED_TEST
				3042	# - INTERFACE_DEF
				3043	# - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
				3044	#
				3045	# We implement a whitelist of safe macros instead of a blacklist of
				3046	# unsafe macros, even though the latter appears less frequently in
				3047	# google code and would have been easier to implement. This is because
				3048	# the downside for getting the whitelist wrong means some extra
				3049	# semicolons, while the downside for getting the blacklist wrong
				3050	# would result in compile errors.
				3051	#
				3052	# In addition to macros, we also don't want to warn on compound
				3053	# literals.
				3054	closing_brace_pos = match.group(1).rfind(')')
				3055	opening_parenthesis = ReverseCloseExpression(
				3056	clean_lines, linenum, closing_brace_pos)
				3057	if opening_parenthesis[2] > -1:
				3058	line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
				3059	macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
				3060	if ((macro and
				3061	macro.group(1) not in (
				3062	'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
				3063	'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
				3064	'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
				3065	Search(r'\s+=\s*$', line_prefix)):
				3066	match = None
				3067
				3068	else:
				3069	# Try matching cases 2-3.
				3070	match = Match(r'^(.(?:else\|\)\sconst)\s*)\{', line)
				3071	if not match:
				3072	# Try matching cases 4-6. These are always matched on separate lines.
				3073	#
				3074	# Note that we can't simply concatenate the previous line to the
				3075	# current line and do a single match, otherwise we may output
				3076	# duplicate warnings for the blank line case:
				3077	# if (cond) {
				3078	# // blank line
				3079	# }
				3080	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				3081	if prevline and Search(r'[;{}]\s*$', prevline):
				3082	match = Match(r'^(\s*)\{', line)
				3083
				3084	# Check matching closing brace
				3085	if match:
				3086	(endline, endlinenum, endpos) = CloseExpression(
				3087	clean_lines, linenum, len(match.group(1)))
				3088	if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
				3089	# Current {} pair is eligible for semicolon check, and we have found
				3090	# the redundant semicolon, output warning here.
				3091	#
				3092	# Note: because we are scanning forward for opening braces, and
				3093	# outputting warnings for the matching closing brace, if there are
				3094	# nested blocks with trailing semicolons, we will get the error
				3095	# messages in reversed order.
				3096	error(filename, endlinenum, 'readability/braces', 4,
				3097	"You don't need a ; after a }")
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3098
				3099
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3100	def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
				3101	"""Look for empty loop/conditional body with only a single semicolon.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3102
				3103	Args:
				3104	filename: The name of the current file.
				3105	clean_lines: A CleansedLines instance containing the file.
				3106	linenum: The number of the line to check.
				3107	error: The function to call with any errors found.
				3108	"""
				3109
				3110	# Search for loop keywords at the beginning of the line. Because only
				3111	# whitespaces are allowed before the keywords, this will also ignore most
				3112	# do-while-loops, since those lines should start with closing brace.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3113	#
				3114	# We also check "if" blocks here, since an empty conditional block
				3115	# is likely an error.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3116	line = clean_lines.elided[linenum]
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3117	matched = Match(r'\s(for\|while\|if)\s\(', line)
				3118	if matched:
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3119	# Find the end of the conditional expression
				3120	(end_line, end_linenum, end_pos) = CloseExpression(
				3121	clean_lines, linenum, line.find('('))
				3122
				3123	# Output warning if what follows the condition expression is a semicolon.
				3124	# No warning for all other cases, including whitespace or newline, since we
				3125	# have a separate check for semicolons preceded by whitespace.
				3126	if end_pos >= 0 and Match(r';', end_line[end_pos:]):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3127	if matched.group(1) == 'if':
				3128	error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
				3129	'Empty conditional bodies should use {}')
				3130	else:
				3131	error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
				3132	'Empty loop bodies should use {} or continue')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3133
				3134
				3135	def CheckCheck(filename, clean_lines, linenum, error):
				3136	"""Checks the use of CHECK and EXPECT macros.
				3137
				3138	Args:
				3139	filename: The name of the current file.
				3140	clean_lines: A CleansedLines instance containing the file.
				3141	linenum: The number of the line to check.
				3142	error: The function to call with any errors found.
				3143	"""
				3144
				3145	# Decide the set of replacement macros that should be suggested
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3146	lines = clean_lines.elided
				3147	check_macro = None
				3148	start_pos = -1
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3149	for macro in _CHECK_MACROS:
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3150	i = lines[linenum].find(macro)
				3151	if i >= 0:
				3152	check_macro = macro
				3153
				3154	# Find opening parenthesis. Do a regular expression match here
				3155	# to make sure that we are matching the expected CHECK macro, as
				3156	# opposed to some other macro that happens to contain the CHECK
				3157	# substring.
				3158	matched = Match(r'^(.\b' + check_macro + r'\s)\(', lines[linenum])
				3159	if not matched:
				3160	continue
				3161	start_pos = len(matched.group(1))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3162	break
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3163	if not check_macro or start_pos < 0:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3164	# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
				3165	return
				3166
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3167	# Find end of the boolean expression by matching parentheses
				3168	(last_line, end_line, end_pos) = CloseExpression(
				3169	clean_lines, linenum, start_pos)
				3170	if end_pos < 0:
				3171	return
				3172	if linenum == end_line:
				3173	expression = lines[linenum][start_pos + 1:end_pos - 1]
				3174	else:
				3175	expression = lines[linenum][start_pos + 1:]
				3176	for i in xrange(linenum + 1, end_line):
				3177	expression += lines[i]
				3178	expression += last_line[0:end_pos - 1]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3179
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3180	# Parse expression so that we can take parentheses into account.
				3181	# This avoids false positives for inputs like "CHECK((a < 4) == b)",
				3182	# which is not replaceable by CHECK_LE.
				3183	lhs = ''
				3184	rhs = ''
				3185	operator = None
				3186	while expression:
				3187	matched = Match(r'^\s(<<\|<<=\|>>\|>>=\|->\\|->\|&&\|\\|\\|\|'
				3188	r'==\|!=\|>=\|>\|<=\|<\|\()(.*)$', expression)
				3189	if matched:
				3190	token = matched.group(1)
				3191	if token == '(':
				3192	# Parenthesized operand
				3193	expression = matched.group(2)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	3194	(end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3195	if end < 0:
				3196	return # Unmatched parenthesis
				3197	lhs += '(' + expression[0:end]
				3198	expression = expression[end:]
				3199	elif token in ('&&', '\|\|'):
				3200	# Logical and/or operators. This means the expression
				3201	# contains more than one term, for example:
				3202	# CHECK(42 < a && a < b);
				3203	#
				3204	# These are not replaceable with CHECK_LE, so bail out early.
				3205	return
				3206	elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
				3207	# Non-relational operator
				3208	lhs += token
				3209	expression = matched.group(2)
				3210	else:
				3211	# Relational operator
				3212	operator = token
				3213	rhs = matched.group(2)
				3214	break
				3215	else:
				3216	# Unparenthesized operand. Instead of appending to lhs one character
				3217	# at a time, we do another regular expression match to consume several
				3218	# characters at once if possible. Trivial benchmark shows that this
				3219	# is more efficient when the operands are longer than a single
				3220	# character, which is generally the case.
				3221	matched = Match(r'^([^-=!<>()&\|]+)(.*)$', expression)
				3222	if not matched:
				3223	matched = Match(r'^(\s\S)(.)$', expression)
				3224	if not matched:
				3225	break
				3226	lhs += matched.group(1)
				3227	expression = matched.group(2)
				3228
				3229	# Only apply checks if we got all parts of the boolean expression
				3230	if not (lhs and operator and rhs):
				3231	return
				3232
				3233	# Check that rhs do not contain logical operators. We already know
				3234	# that lhs is fine since the loop above parses out && and \|\|.
				3235	if rhs.find('&&') > -1 or rhs.find('\|\|') > -1:
				3236	return
				3237
				3238	# At least one of the operands must be a constant literal. This is
				3239	# to avoid suggesting replacements for unprintable things like
				3240	# CHECK(variable != iterator)
				3241	#
				3242	# The following pattern matches decimal, hex integers, strings, and
				3243	# characters (in that order).
				3244	lhs = lhs.strip()
				3245	rhs = rhs.strip()
				3246	match_constant = r'^([-+]?(\d+\|0[xX][0-9a-fA-F]+)[lLuU]{0,3}\|"."\|\'.\')$'
				3247	if Match(match_constant, lhs) or Match(match_constant, rhs):
				3248	# Note: since we know both lhs and rhs, we can provide a more
				3249	# descriptive error message like:
				3250	# Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
				3251	# Instead of:
				3252	# Consider using CHECK_EQ instead of CHECK(a == b)
				3253	#
				3254	# We are still keeping the less descriptive message because if lhs
				3255	# or rhs gets long, the error message might become unreadable.
				3256	error(filename, linenum, 'readability/check', 2,
				3257	'Consider using %s instead of %s(a %s b)' % (
				3258	_CHECK_REPLACEMENT[check_macro][operator],
				3259	check_macro, operator))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3260
				3261
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3262	def CheckAltTokens(filename, clean_lines, linenum, error):
				3263	"""Check alternative keywords being used in boolean expressions.
				3264
				3265	Args:
				3266	filename: The name of the current file.
				3267	clean_lines: A CleansedLines instance containing the file.
				3268	linenum: The number of the line to check.
				3269	error: The function to call with any errors found.
				3270	"""
				3271	line = clean_lines.elided[linenum]
				3272
				3273	# Avoid preprocessor lines
				3274	if Match(r'^\s*#', line):
				3275	return
				3276
				3277	# Last ditch effort to avoid multi-line comments. This will not help
				3278	# if the comment started before the current line or ended after the
				3279	# current line, but it catches most of the false positives. At least,
				3280	# it provides a way to workaround this warning for people who use
				3281	# multi-line comments in preprocessor macros.
				3282	#
				3283	# TODO(unknown): remove this once cpplint has better support for
				3284	# multi-line comments.
				3285	if line.find('/') >= 0 or line.find('/') >= 0:
				3286	return
				3287
				3288	for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
				3289	error(filename, linenum, 'readability/alt_tokens', 2,
				3290	'Use operator %s instead of %s' % (
				3291	_ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
				3292
				3293
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3294	def GetLineWidth(line):
				3295	"""Determines the width of the line in column positions.
				3296
				3297	Args:
				3298	line: A string, which may be a Unicode string.
				3299
				3300	Returns:
				3301	The width of the line in column positions, accounting for Unicode
				3302	combining characters and wide characters.
				3303	"""
				3304	if isinstance(line, unicode):
				3305	width = 0
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3306	for uc in unicodedata.normalize('NFC', line):
				3307	if unicodedata.east_asian_width(uc) in ('W', 'F'):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3308	width += 2
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3309	elif not unicodedata.combining(uc):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3310	width += 1
				3311	return width
				3312	else:
				3313	return len(line)
				3314
				3315
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3316	def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3317	error):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3318	"""Checks rules from the 'C++ style rules' section of cppguide.html.
				3319
				3320	Most of these rules are hard to test (naming, comment style), but we
				3321	do what we can. In particular we check for 2-space indents, line lengths,
				3322	tab usage, spaces inside code, etc.
				3323
				3324	Args:
				3325	filename: The name of the current file.
				3326	clean_lines: A CleansedLines instance containing the file.
				3327	linenum: The number of the line to check.
				3328	file_extension: The extension (without the dot) of the filename.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3329	nesting_state: A _NestingState instance which maintains information about
				3330	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3331	error: The function to call with any errors found.
				3332	"""
				3333
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	3334	# Don't use "elided" lines here, otherwise we can't check commented lines.
				3335	# Don't want to use "raw" either, because we don't want to check inside C++11
				3336	# raw strings,
				3337	raw_lines = clean_lines.lines_without_raw_strings
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3338	line = raw_lines[linenum]
				3339
				3340	if line.find('\t') != -1:
				3341	error(filename, linenum, 'whitespace/tab', 1,
				3342	'Tab found; better to use spaces')
				3343
				3344	# One or three blank spaces at the beginning of the line is weird; it's
				3345	# hard to reconcile that with 2-space indents.
				3346	# NOTE: here are the conditions rob pike used for his tests. Mine aren't
				3347	# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
				3348	# if(RLENGTH > 20) complain = 0;
				3349	# if(match($0, " +(error\|private\|public\|protected):")) complain = 0;
				3350	# if(match(prev, "&& *$")) complain = 0;
				3351	# if(match(prev, "\\\|\\\| *$")) complain = 0;
				3352	# if(match(prev, "[\",=><] *$")) complain = 0;
				3353	# if(match($0, " <<")) complain = 0;
				3354	# if(match(prev, " +for \\(")) complain = 0;
				3355	# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
				3356	initial_spaces = 0
				3357	cleansed_line = clean_lines.elided[linenum]
				3358	while initial_spaces < len(line) and line[initial_spaces] == ' ':
				3359	initial_spaces += 1
				3360	if line and line[-1].isspace():
				3361	error(filename, linenum, 'whitespace/end_of_line', 4,
				3362	'Line ends in whitespace. Consider deleting these extra spaces.')
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3363	# There are certain situations we allow one space, notably for section labels
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3364	elif ((initial_spaces == 1 or initial_spaces == 3) and
				3365	not Match(r'\s\w+\s:\s*$', cleansed_line)):
				3366	error(filename, linenum, 'whitespace/indent', 3,
				3367	'Weird number of spaces at line-start. '
				3368	'Are you using a 2-space indent?')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3369
				3370	# Check if the line is a header guard.
				3371	is_header_guard = False
				3372	if file_extension == 'h':
				3373	cppvar = GetHeaderGuardCPPVariable(filename)
				3374	if (line.startswith('#ifndef %s' % cppvar) or
				3375	line.startswith('#define %s' % cppvar) or
				3376	line.startswith('#endif // %s' % cppvar)):
				3377	is_header_guard = True
				3378	# #include lines and header guards can be long, since there's no clean way to
				3379	# split them.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3380	#
				3381	# URLs can be long too. It's possible to split these, but it makes them
				3382	# harder to cut&paste.
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3383	#
				3384	# The "$Id:...$" comment may also get very long without it being the
				3385	# developers fault.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3386	if (not line.startswith('#include') and not is_header_guard and
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3387	not Match(r'^\s//.http(s?)://\S*$', line) and
				3388	not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3389	line_width = GetLineWidth(line)
				3390	if line_width > 100:
				3391	error(filename, linenum, 'whitespace/line_length', 4,
				3392	'Lines should very rarely be longer than 100 characters')
				3393	elif line_width > 80:
				3394	error(filename, linenum, 'whitespace/line_length', 2,
				3395	'Lines should be <= 80 characters long')
				3396
				3397	if (cleansed_line.count(';') > 1 and
				3398	# for loops are allowed two ;'s (and may run over two lines).
				3399	cleansed_line.find('for') == -1 and
				3400	(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
				3401	GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
				3402	# It's ok to have many commands in a switch case that fits in 1 line
				3403	not ((cleansed_line.find('case ') != -1 or
				3404	cleansed_line.find('default:') != -1) and
				3405	cleansed_line.find('break;') != -1)):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3406	error(filename, linenum, 'whitespace/newline', 0,
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3407	'More than one command on the same line')
				3408
				3409	# Some more style checks
				3410	CheckBraces(filename, clean_lines, linenum, error)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3411	CheckEmptyBlockBody(filename, clean_lines, linenum, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3412	CheckAccess(filename, clean_lines, linenum, nesting_state, error)
				3413	CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3414	CheckCheck(filename, clean_lines, linenum, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3415	CheckAltTokens(filename, clean_lines, linenum, error)
				3416	classinfo = nesting_state.InnermostClass()
				3417	if classinfo:
				3418	CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3419
				3420
				3421	_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
				3422	_RE_PATTERN_INCLUDE = re.compile(r'^\s#\sinclude\s([<"])([^>"])[>"].*$')
				3423	# Matches the first component of a filename delimited by -s and _s. That is:
				3424	# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
				3425	# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
				3426	# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
				3427	# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
				3428	_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
				3429
				3430
				3431	def _DropCommonSuffixes(filename):
				3432	"""Drops common suffixes like _test.cc or -inl.h from filename.
				3433
				3434	For example:
				3435	>>> _DropCommonSuffixes('foo/foo-inl.h')
				3436	'foo/foo'
				3437	>>> _DropCommonSuffixes('foo/bar/foo.cc')
				3438	'foo/bar/foo'
				3439	>>> _DropCommonSuffixes('foo/foo_internal.h')
				3440	'foo/foo'
				3441	>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
				3442	'foo/foo_unusualinternal'
				3443
				3444	Args:
				3445	filename: The input filename.
				3446
				3447	Returns:
				3448	The filename with the common suffix removed.
				3449	"""
				3450	for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
				3451	'inl.h', 'impl.h', 'internal.h'):
				3452	if (filename.endswith(suffix) and len(filename) > len(suffix) and
				3453	filename[-len(suffix) - 1] in ('-', '_')):
				3454	return filename[:-len(suffix) - 1]
				3455	return os.path.splitext(filename)[0]
				3456
				3457
				3458	def _IsTestFilename(filename):
				3459	"""Determines if the given filename has a suffix that identifies it as a test.
				3460
				3461	Args:
				3462	filename: The input filename.
				3463
				3464	Returns:
				3465	True if 'filename' looks like a test, False otherwise.
				3466	"""
				3467	if (filename.endswith('_test.cc') or
				3468	filename.endswith('_unittest.cc') or
				3469	filename.endswith('_regtest.cc')):
				3470	return True
				3471	else:
				3472	return False
				3473
				3474
				3475	def _ClassifyInclude(fileinfo, include, is_system):
				3476	"""Figures out what kind of header 'include' is.
				3477
				3478	Args:
				3479	fileinfo: The current file cpplint is running over. A FileInfo instance.
				3480	include: The path to a #included file.
				3481	is_system: True if the #include used <> rather than "".
				3482
				3483	Returns:
				3484	One of the _XXX_HEADER constants.
				3485
				3486	For example:
				3487	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
				3488	_C_SYS_HEADER
				3489	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
				3490	_CPP_SYS_HEADER
				3491	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
				3492	_LIKELY_MY_HEADER
				3493	>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
				3494	... 'bar/foo_other_ext.h', False)
				3495	_POSSIBLE_MY_HEADER
				3496	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
				3497	_OTHER_HEADER
				3498	"""
				3499	# This is a list of all standard c++ header files, except
				3500	# those already checked for above.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3501	is_cpp_h = include in _CPP_HEADERS
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3502
				3503	if is_system:
				3504	if is_cpp_h:
				3505	return _CPP_SYS_HEADER
				3506	else:
				3507	return _C_SYS_HEADER
				3508
				3509	# If the target file and the include we're checking share a
				3510	# basename when we drop common extensions, and the include
				3511	# lives in . , then it's likely to be owned by the target file.
				3512	target_dir, target_base = (
				3513	os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
				3514	include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
				3515	if target_base == include_base and (
				3516	include_dir == target_dir or
				3517	include_dir == os.path.normpath(target_dir + '/../public')):
				3518	return _LIKELY_MY_HEADER
				3519
				3520	# If the target and include share some initial basename
				3521	# component, it's possible the target is implementing the
				3522	# include, so it's allowed to be first, but we'll never
				3523	# complain if it's not there.
				3524	target_first_component = _RE_FIRST_COMPONENT.match(target_base)
				3525	include_first_component = _RE_FIRST_COMPONENT.match(include_base)
				3526	if (target_first_component and include_first_component and
				3527	target_first_component.group(0) ==
				3528	include_first_component.group(0)):
				3529	return _POSSIBLE_MY_HEADER
				3530
				3531	return _OTHER_HEADER
				3532
				3533
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3534
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3535	def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
				3536	"""Check rules that are applicable to #include lines.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3537
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3538	Strings on #include lines are NOT removed from elided line, to make
				3539	certain tasks easier. However, to prevent false positives, checks
				3540	applicable to #include lines in CheckLanguage must be put here.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3541
				3542	Args:
				3543	filename: The name of the current file.
				3544	clean_lines: A CleansedLines instance containing the file.
				3545	linenum: The number of the line to check.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3546	include_state: An _IncludeState instance in which the headers are inserted.
				3547	error: The function to call with any errors found.
				3548	"""
				3549	fileinfo = FileInfo(filename)
				3550
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3551	line = clean_lines.lines[linenum]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3552
				3553	# "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3554	if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3555	error(filename, linenum, 'build/include', 4,
				3556	'Include the directory when naming .h files')
				3557
				3558	# we shouldn't include a file more than once. actually, there are a
				3559	# handful of instances where doing so is okay, but in general it's
				3560	# not.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3561	match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3562	if match:
				3563	include = match.group(2)
				3564	is_system = (match.group(1) == '<')
				3565	if include in include_state:
				3566	error(filename, linenum, 'build/include', 4,
				3567	'"%s" already included at %s:%s' %
				3568	(include, filename, include_state[include]))
				3569	else:
				3570	include_state[include] = linenum
				3571
				3572	# We want to ensure that headers appear in the right order:
				3573	# 1) for foo.cc, foo.h (preferred location)
				3574	# 2) c system files
				3575	# 3) cpp system files
				3576	# 4) for foo.cc, foo.h (deprecated location)
				3577	# 5) other google headers
				3578	#
				3579	# We classify each include statement as one of those 5 types
				3580	# using a number of techniques. The include_state object keeps
				3581	# track of the highest type seen, and complains if we see a
				3582	# lower type after that.
				3583	error_message = include_state.CheckNextIncludeOrder(
				3584	_ClassifyInclude(fileinfo, include, is_system))
				3585	if error_message:
				3586	error(filename, linenum, 'build/include_order', 4,
				3587	'%s. Should be: %s.h, c system, c++ system, other.' %
				3588	(error_message, fileinfo.BaseName()))
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3589	canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
				3590	if not include_state.IsInAlphabeticalOrder(
				3591	clean_lines, linenum, canonical_include):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3592	error(filename, linenum, 'build/include_alpha', 4,
				3593	'Include "%s" not in alphabetical order' % include)
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3594	include_state.SetLastHeader(canonical_include)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3595
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3596	# Look for any of the stream classes that are part of standard C++.
				3597	match = _RE_PATTERN_INCLUDE.match(line)
				3598	if match:
				3599	include = match.group(2)
				3600	if Match(r'(f\|ind\|io\|i\|o\|parse\|pf\|stdio\|str\|)?stream$', include):
				3601	# Many unit tests use cout, so we exempt them.
				3602	if not _IsTestFilename(filename):
				3603	error(filename, linenum, 'readability/streams', 3,
				3604	'Streams are highly discouraged.')
				3605
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3606
				3607	def _GetTextInside(text, start_pattern):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	3608	r"""Retrieves all the text between matching open and close parentheses.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3609
				3610	Given a string of lines and a regular expression string, retrieve all the text
				3611	following the expression and between opening punctuation symbols like
				3612	(, [, or {, and the matching close-punctuation symbol. This properly nested
				3613	occurrences of the punctuations, so for the text like
				3614	printf(a(), b(c()));
				3615	a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
				3616	start_pattern must match string having an open punctuation symbol at the end.
				3617
				3618	Args:
				3619	text: The lines to extract text. Its comments and strings must be elided.
				3620	It can be single line and can span multiple lines.
				3621	start_pattern: The regexp string indicating where to start extracting
				3622	the text.
				3623	Returns:
				3624	The extracted text.
				3625	None if either the opening string or ending punctuation could not be found.
				3626	"""
				3627	# TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
				3628	# rewritten to use _GetTextInside (and use inferior regexp matching today).
				3629
				3630	# Give opening punctuations to get the matching close-punctuations.
				3631	matching_punctuation = {'(': ')', '{': '}', '[': ']'}
				3632	closing_punctuation = set(matching_punctuation.itervalues())
				3633
				3634	# Find the position to start extracting text.
				3635	match = re.search(start_pattern, text, re.M)
				3636	if not match: # start_pattern not found in text.
				3637	return None
				3638	start_position = match.end(0)
				3639
				3640	assert start_position > 0, (
				3641	'start_pattern must ends with an opening punctuation.')
				3642	assert text[start_position - 1] in matching_punctuation, (
				3643	'start_pattern must ends with an opening punctuation.')
				3644	# Stack of closing punctuations we expect to have in text after position.
				3645	punctuation_stack = [matching_punctuation[text[start_position - 1]]]
				3646	position = start_position
				3647	while punctuation_stack and position < len(text):
				3648	if text[position] == punctuation_stack[-1]:
				3649	punctuation_stack.pop()
				3650	elif text[position] in closing_punctuation:
				3651	# A closing punctuation without matching opening punctuations.
				3652	return None
				3653	elif text[position] in matching_punctuation:
				3654	punctuation_stack.append(matching_punctuation[text[position]])
				3655	position += 1
				3656	if punctuation_stack:
				3657	# Opening punctuations left without matching close-punctuations.
				3658	return None
				3659	# punctuations match.
				3660	return text[start_position:position - 1]
				3661
				3662
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3663	# Patterns for matching call-by-reference parameters.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	3664	#
				3665	# Supports nested templates up to 2 levels deep using this messy pattern:
				3666	# < (?: < (?: < [^<>]*
				3667	# >
				3668	# \| [^<>] )*
				3669	# >
				3670	# \| [^<>] )*
				3671	# >
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3672	_RE_PATTERN_IDENT = r'[_a-zA-Z]\w' # =~ [[:alpha:]][[:alnum:]]
				3673	_RE_PATTERN_TYPE = (
				3674	r'(?:const\s+)?(?:typename\s+\|class\s+\|struct\s+\|union\s+\|enum\s+)?'
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	3675	r'(?:\w\|'
				3676	r'\s<(?:<(?:<[^<>]>\|[^<>])>\|[^<>])>\|'
				3677	r'::)+')
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3678	# A call-by-reference parameter ends with '& identifier'.
				3679	_RE_PATTERN_REF_PARAM = re.compile(
				3680	r'(' + _RE_PATTERN_TYPE + r'(?:\s(?:\bconst\b\|[]))\s'
				3681	r'&\s' + _RE_PATTERN_IDENT + r')\s(?:=[^,()]+)?[,)]')
				3682	# A call-by-const-reference parameter either ends with 'const& identifier'
				3683	# or looks like 'const type& identifier' when 'type' is atomic.
				3684	_RE_PATTERN_CONST_REF_PARAM = (
				3685	r'(?:.\s\bconst\s&\s' + _RE_PATTERN_IDENT +
				3686	r'\|const\s+' + _RE_PATTERN_TYPE + r'\s&\s' + _RE_PATTERN_IDENT + r')')
				3687
				3688
				3689	def CheckLanguage(filename, clean_lines, linenum, file_extension,
				3690	include_state, nesting_state, error):
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3691	"""Checks rules from the 'C++ language rules' section of cppguide.html.
				3692
				3693	Some of these rules are hard to test (function overloading, using
				3694	uint32 inappropriately), but we do the best we can.
				3695
				3696	Args:
				3697	filename: The name of the current file.
				3698	clean_lines: A CleansedLines instance containing the file.
				3699	linenum: The number of the line to check.
				3700	file_extension: The extension (without the dot) of the filename.
				3701	include_state: An _IncludeState instance in which the headers are inserted.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3702	nesting_state: A _NestingState instance which maintains information about
				3703	the current stack of nested blocks being parsed.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3704	error: The function to call with any errors found.
				3705	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3706	# If the line is empty or consists of entirely a comment, no need to
				3707	# check it.
				3708	line = clean_lines.elided[linenum]
				3709	if not line:
				3710	return
				3711
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3712	match = _RE_PATTERN_INCLUDE.search(line)
				3713	if match:
				3714	CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
				3715	return
				3716
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	3717	# Reset include state across preprocessor directives. This is meant
				3718	# to silence warnings for conditional includes.
				3719	if Match(r'^\s#\s(?:ifdef\|elif\|else\|endif)\b', line):
				3720	include_state.ResetSection()
				3721
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3722	# Make Windows paths like Unix.
				3723	fullname = os.path.abspath(filename).replace('\\', '/')
				3724
				3725	# TODO(unknown): figure out if they're using default arguments in fn proto.
				3726
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3727	# Check to see if they're using an conversion function cast.
				3728	# I just try to capture the most common basic types, though there are more.
				3729	# Parameterless conversion functions, such as bool(), are allowed as they are
				3730	# probably a member operator declaration or default constructor.
				3731	match = Search(
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3732	r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3733	r'(int\|float\|double\|bool\|char\|int32\|uint32\|int64\|uint64)'
				3734	r'(\([^)].*)', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3735	if match:
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3736	matched_new = match.group(1)
				3737	matched_type = match.group(2)
				3738	matched_funcptr = match.group(3)
				3739
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3740	# gMock methods are defined using some variant of MOCK_METHODx(name, type)
				3741	# where type may be float(), int(string), etc. Without context they are
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3742	# virtually indistinguishable from int(x) casts. Likewise, gMock's
				3743	# MockCallback takes a template parameter of the form return_type(arg_type),
				3744	# which looks much like the cast we're trying to detect.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3745	#
				3746	# std::function<> wrapper has a similar problem.
				3747	#
				3748	# Return types for function pointers also look like casts if they
				3749	# don't have an extra space.
				3750	if (matched_new is None and # If new operator, then this isn't a cast
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3751	not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3752	Search(r'\bMockCallback<.*>', line) or
				3753	Search(r'\bstd::function<.*>', line)) and
				3754	not (matched_funcptr and
				3755	Match(r'$(?:[^() ]+::\s\\s)?[^() ]+$\s\(',
				3756	matched_funcptr))):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3757	# Try a bit harder to catch gmock lines: the only place where
				3758	# something looks like an old-style cast is where we declare the
				3759	# return type of the mocked method, and the only time when we
				3760	# are missing context is if MOCK_METHOD was split across
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3761	# multiple lines. The missing MOCK_METHOD is usually one or two
				3762	# lines back, so scan back one or two lines.
				3763	#
				3764	# It's not possible for gmock macros to appear in the first 2
				3765	# lines, since the class head + section name takes up 2 lines.
				3766	if (linenum < 2 or
				3767	not (Match(r'^\sMOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s$',
				3768	clean_lines.elided[linenum - 1]) or
				3769	Match(r'^\sMOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s$',
				3770	clean_lines.elided[linenum - 2]))):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3771	error(filename, linenum, 'readability/casting', 4,
				3772	'Using deprecated casting style. '
				3773	'Use static_cast<%s>(...) instead' %
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3774	matched_type)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3775
				3776	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3777	'static_cast',
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3778	r'$(int\|float\|double\|bool\|char\|u?int(16\|32\|64))$', error)
				3779
				3780	# This doesn't catch all cases. Consider (const char * const)"hello".
				3781	#
				3782	# (char *) "foo" should always be a const_cast (reinterpret_cast won't
				3783	# compile).
				3784	if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3785	'const_cast', r'$(char\s?\+\s?)$\s"', error):
				3786	pass
				3787	else:
				3788	# Check pointer casts for other than string constants
				3789	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3790	'reinterpret_cast', r'$(\w+\s?\*+\s?)$', error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3791
				3792	# In addition, we look for people taking the address of a cast. This
				3793	# is dangerous -- casts can assign to temporaries, so the pointer doesn't
				3794	# point where you think.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3795	match = Search(
				3796	r'(?:&$([^)]+)$[\w(])\|'
				3797	r'(?:&(static\|dynamic\|down\|reinterpret)_cast\b)', line)
				3798	if match and match.group(1) != '*':
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3799	error(filename, linenum, 'runtime/casting', 4,
				3800	('Are you taking an address of a cast? '
				3801	'This is dangerous: could be a temp var. '
				3802	'Take the address before doing the cast, rather than after'))
				3803
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3804	# Create an extended_line, which is the concatenation of the current and
				3805	# next lines, for more effective checking of code that may span more than one
				3806	# line.
				3807	if linenum + 1 < clean_lines.NumLines():
				3808	extended_line = line + clean_lines.elided[linenum + 1]
				3809	else:
				3810	extended_line = line
				3811
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3812	# Check for people declaring static/global STL strings at the top level.
				3813	# This is dangerous because the C++ language does not guarantee that
				3814	# globals with constructors are initialized before the first access.
				3815	match = Match(
				3816	r'((?:\|static +)(?:\|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
				3817	line)
				3818	# Make sure it's not a function.
				3819	# Function template specialization looks like: "string foo<Type>(...".
				3820	# Class template definitions look like: "string Foo<Type>::Method(...".
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	3821	#
				3822	# Also ignore things that look like operators. These are matched separately
				3823	# because operator names cross non-word boundaries. If we change the pattern
				3824	# above, we would decrease the accuracy of matching identifiers.
				3825	if (match and
				3826	not Search(r'\boperator\W', line) and
				3827	not Match(r'\s(<.>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]\|$)', match.group(3))):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3828	error(filename, linenum, 'runtime/string', 4,
				3829	'For a static/global string constant, use a C style string instead: '
				3830	'"%schar %s[]".' %
				3831	(match.group(1), match.group(2)))
				3832
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3833	if Search(r'\b([A-Za-z0-9_]*_)$\1$', line):
				3834	error(filename, linenum, 'runtime/init', 4,
				3835	'You seem to be initializing a member variable with itself.')
				3836
				3837	if file_extension == 'h':
				3838	# TODO(unknown): check that 1-arg constructors are explicit.
				3839	# How to tell it's a constructor?
				3840	# (handled in CheckForNonStandardConstructs for now)
				3841	# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
				3842	# (level 1 error)
				3843	pass
				3844
				3845	# Check if people are using the verboten C basic types. The only exception
				3846	# we regularly allow is "unsigned short port" for port.
				3847	if Search(r'\bshort port\b', line):
				3848	if not Search(r'\bunsigned short port\b', line):
				3849	error(filename, linenum, 'runtime/int', 4,
				3850	'Use "unsigned short" for ports, not "short"')
				3851	else:
				3852	match = Search(r'\b(short\|long(?! +double)\|long long)\b', line)
				3853	if match:
				3854	error(filename, linenum, 'runtime/int', 4,
				3855	'Use int16/int64/etc, rather than the C type %s' % match.group(1))
				3856
				3857	# When snprintf is used, the second argument shouldn't be a literal.
				3858	match = Search(r'snprintf\s\(([^,]),\s([0-9])\s*,', line)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	3859	if match and match.group(2) != '0':
				3860	# If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3861	error(filename, linenum, 'runtime/printf', 3,
				3862	'If you can, use sizeof(%s) instead of %s as the 2nd arg '
				3863	'to snprintf.' % (match.group(1), match.group(2)))
				3864
				3865	# Check if some verboten C functions are being used.
				3866	if Search(r'\bsprintf\b', line):
				3867	error(filename, linenum, 'runtime/printf', 5,
				3868	'Never use sprintf. Use snprintf instead.')
				3869	match = Search(r'\b(strcpy\|strcat)\b', line)
				3870	if match:
				3871	error(filename, linenum, 'runtime/printf', 4,
				3872	'Almost always, snprintf is better than %s' % match.group(1))
				3873
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3874	# Check if some verboten operator overloading is going on
				3875	# TODO(unknown): catch out-of-line unary operator&:
				3876	# class X {};
				3877	# int operator&(const X& x) { return 42; } // unary operator&
				3878	# The trick is it's hard to tell apart from binary operator&:
				3879	# class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
				3880	if Search(r'\boperator\s&\s$\s*$', line):
				3881	error(filename, linenum, 'runtime/operator', 4,
				3882	'Unary operator& is dangerous. Do not use it.')
				3883
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3884	# Check for suspicious usage of "if" like
				3885	# } if (a == b) {
				3886	if Search(r'\}\sif\s\(', line):
				3887	error(filename, linenum, 'readability/braces', 4,
				3888	'Did you mean "else if"? If not, start a new line for "if".')
				3889
				3890	# Check for potential format string bugs like printf(foo).
				3891	# We constrain the pattern not to pick things like DocidForPrintf(foo).
				3892	# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3893	# TODO(sugawarayu): Catch the following case. Need to change the calling
				3894	# convention of the whole function to process multiple line to handle it.
				3895	# printf(
				3896	# boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
				3897	printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
				3898	if printf_args:
				3899	match = Match(r'([\w.\->()]+)$', printf_args)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3900	if match and match.group(1) != '__VA_ARGS__':
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3901	function_name = re.search(r'\b((?:string)?printf)\s*\(',
				3902	line, re.I).group(1)
				3903	error(filename, linenum, 'runtime/printf', 4,
				3904	'Potential format string bug. Do %s("%%s", %s) instead.'
				3905	% (function_name, match.group(1)))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3906
				3907	# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
				3908	match = Search(r'memset\s$([^,]),\s([^,]),\s0\s$', line)
				3909	if match and not Match(r"^''\|-?[0-9]+\|0x[0-9A-Fa-f]$", match.group(2)):
				3910	error(filename, linenum, 'runtime/memset', 4,
				3911	'Did you mean "memset(%s, 0, %s)"?'
				3912	% (match.group(1), match.group(2)))
				3913
				3914	if Search(r'\busing namespace\b', line):
				3915	error(filename, linenum, 'build/namespaces', 5,
				3916	'Do not use namespace using-directives. '
				3917	'Use using-declarations instead.')
				3918
				3919	# Detect variable-length arrays.
				3920	match = Match(r'\s(.+::)?(\w+) [a-z]\w\[(.+)];', line)
				3921	if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
				3922	match.group(3).find(']') == -1):
				3923	# Split the size using space and arithmetic operators as delimiters.
				3924	# If any of the resulting tokens are not compile time constants then
				3925	# report the error.
				3926	tokens = re.split(r'\s\|\+\|\-\|\*\|\/\|<<\|>>]', match.group(3))
				3927	is_const = True
				3928	skip_next = False
				3929	for tok in tokens:
				3930	if skip_next:
				3931	skip_next = False
				3932	continue
				3933
				3934	if Search(r'sizeof$.+$', tok): continue
				3935	if Search(r'arraysize$\w+$', tok): continue
				3936
				3937	tok = tok.lstrip('(')
				3938	tok = tok.rstrip(')')
				3939	if not tok: continue
				3940	if Match(r'\d+', tok): continue
				3941	if Match(r'0[xX][0-9a-fA-F]+', tok): continue
				3942	if Match(r'k[A-Z0-9]\w*', tok): continue
				3943	if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
				3944	if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
				3945	# A catch all for tricky sizeof cases, including 'sizeof expression',
				3946	# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3947	# requires skipping the next token because we split on ' ' and '*'.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3948	if tok.startswith('sizeof'):
				3949	skip_next = True
				3950	continue
				3951	is_const = False
				3952	break
				3953	if not is_const:
				3954	error(filename, linenum, 'runtime/arrays', 1,
				3955	'Do not use variable-length arrays. Use an appropriately named '
				3956	"('k' followed by CamelCase) compile-time constant for the size.")
				3957
				3958	# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
				3959	# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
				3960	# in the class declaration.
				3961	match = Match(
				3962	(r'\s*'
				3963	r'(DISALLOW_(EVIL_CONSTRUCTORS\|COPY_AND_ASSIGN\|IMPLICIT_CONSTRUCTORS))'
				3964	r'$.*$;$'),
				3965	line)
				3966	if match and linenum + 1 < clean_lines.NumLines():
				3967	next_line = clean_lines.elided[linenum + 1]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3968	# We allow some, but not all, declarations of variables to be present
				3969	# in the statement that defines the class. The [\w\,\s] fragment of
				3970	# the regular expression below allows users to declare instances of
				3971	# the class or pointers to instances, but not less common types such
				3972	# as function pointers or arrays. It's a tradeoff between allowing
				3973	# reasonable code and avoiding trying to parse more C++ using regexps.
				3974	if not Search(r'^\s}[\w\,\s]*;', next_line):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3975	error(filename, linenum, 'readability/constructors', 3,
				3976	match.group(1) + ' should be the last thing in the class')
				3977
				3978	# Check for use of unnamed namespaces in header files. Registration
				3979	# macros are typically OK, so we allow use of "namespace {" on lines
				3980	# that end with backslashes.
				3981	if (file_extension == 'h'
				3982	and Search(r'\bnamespace\s*{', line)
				3983	and line[-1] != '\\'):
				3984	error(filename, linenum, 'build/namespaces', 4,
				3985	'Do not use unnamed namespaces in header files. See '
				3986	'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
				3987	' for more information.')
				3988
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3989	def CheckForNonConstReference(filename, clean_lines, linenum,
				3990	nesting_state, error):
				3991	"""Check for non-const references.
				3992
				3993	Separate from CheckLanguage since it scans backwards from current
				3994	line, instead of scanning forward.
				3995
				3996	Args:
				3997	filename: The name of the current file.
				3998	clean_lines: A CleansedLines instance containing the file.
				3999	linenum: The number of the line to check.
				4000	nesting_state: A _NestingState instance which maintains information about
				4001	the current stack of nested blocks being parsed.
				4002	error: The function to call with any errors found.
				4003	"""
				4004	# Do nothing if there is no '&' on current line.
				4005	line = clean_lines.elided[linenum]
				4006	if '&' not in line:
				4007	return
				4008
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4009	# Long type names may be broken across multiple lines, usually in one
				4010	# of these forms:
				4011	# LongType
				4012	# ::LongTypeContinued &identifier
				4013	# LongType::
				4014	# LongTypeContinued &identifier
				4015	# LongType<
				4016	# ...>::LongTypeContinued &identifier
				4017	#
				4018	# If we detected a type split across two lines, join the previous
				4019	# line to current line so that we can match const references
				4020	# accordingly.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4021	#
				4022	# Note that this only scans back one line, since scanning back
				4023	# arbitrary number of lines would be expensive. If you have a type
				4024	# that spans more than 2 lines, please use a typedef.
				4025	if linenum > 1:
				4026	previous = None
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4027	if Match(r'\s::(?:[\w<>]\|::)+\s&\s*\S', line):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4028	# previous_line\n + ::current_line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4029	previous = Search(r'\b((?:const\s)?(?:[\w<>]\|::)+[\w<>])\s$',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4030	clean_lines.elided[linenum - 1])
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4031	elif Match(r'\s[a-zA-Z_]([\w<>]\|::)+\s&\s*\S', line):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4032	# previous_line::\n + current_line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4033	previous = Search(r'\b((?:const\s)?(?:[\w<>]\|::)+::)\s$',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4034	clean_lines.elided[linenum - 1])
				4035	if previous:
				4036	line = previous.group(1) + line.lstrip()
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4037	else:
				4038	# Check for templated parameter that is split across multiple lines
				4039	endpos = line.rfind('>')
				4040	if endpos > -1:
				4041	(_, startline, startpos) = ReverseCloseExpression(
				4042	clean_lines, linenum, endpos)
				4043	if startpos > -1 and startline < linenum:
				4044	# Found the matching < on an earlier line, collect all
				4045	# pieces up to current line.
				4046	line = ''
				4047	for i in xrange(startline, linenum + 1):
				4048	line += clean_lines.elided[i].strip()
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4049
				4050	# Check for non-const references in function parameters. A single '&' may
				4051	# found in the following places:
				4052	# inside expression: binary & for bitwise AND
				4053	# inside expression: unary & for taking the address of something
				4054	# inside declarators: reference parameter
				4055	# We will exclude the first two cases by checking that we are not inside a
				4056	# function body, including one that was just introduced by a trailing '{'.
				4057	# TODO(unknwon): Doesn't account for preprocessor directives.
				4058	# TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
				4059	check_params = False
				4060	if not nesting_state.stack:
				4061	check_params = True # top level
				4062	elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
				4063	isinstance(nesting_state.stack[-1], _NamespaceInfo)):
				4064	check_params = True # within class or namespace
				4065	elif Match(r'.{\s$', line):
				4066	if (len(nesting_state.stack) == 1 or
				4067	isinstance(nesting_state.stack[-2], _ClassInfo) or
				4068	isinstance(nesting_state.stack[-2], _NamespaceInfo)):
				4069	check_params = True # just opened global/class/namespace block
				4070	# We allow non-const references in a few standard places, like functions
				4071	# called "swap()" or iostream operators like "<<" or ">>". Do not check
				4072	# those function parameters.
				4073	#
				4074	# We also accept & in static_assert, which looks like a function but
				4075	# it's actually a declaration expression.
				4076	whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?\|'
				4077	r'operator\s*[<>][<>]\|'
				4078	r'static_assert\|COMPILE_ASSERT'
				4079	r')\s*\(')
				4080	if Search(whitelisted_functions, line):
				4081	check_params = False
				4082	elif not Search(r'\S+\([^)]*$', line):
				4083	# Don't see a whitelisted function on this line. Actually we
				4084	# didn't see any function name on this line, so this is likely a
				4085	# multi-line parameter list. Try a bit harder to catch this case.
				4086	for i in xrange(2):
				4087	if (linenum > i and
				4088	Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
				4089	check_params = False
				4090	break
				4091
				4092	if check_params:
				4093	decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
				4094	for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
				4095	if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
				4096	error(filename, linenum, 'runtime/references', 2,
				4097	'Is this a non-const reference? '
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4098	'If so, make const or use a pointer: ' +
				4099	ReplaceAll(' *<', '<', parameter))
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4100
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4101
				4102	def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
				4103	error):
				4104	"""Checks for a C-style cast by looking for the pattern.
				4105
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4106	Args:
				4107	filename: The name of the current file.
				4108	linenum: The number of the line to check.
				4109	line: The line of code to check.
				4110	raw_line: The raw line of code to check, with comments.
				4111	cast_type: The string for the C++ cast to recommend. This is either
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4112	reinterpret_cast, static_cast, or const_cast, depending.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4113	pattern: The regular expression used to find C-style casts.
				4114	error: The function to call with any errors found.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4115
				4116	Returns:
				4117	True if an error was emitted.
				4118	False otherwise.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4119	"""
				4120	match = Search(pattern, line)
				4121	if not match:
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4122	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4123
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4124	# Exclude lines with sizeof, since sizeof looks like a cast.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4125	sizeof_match = Match(r'.sizeof\s$', line[0:match.start(1) - 1])
				4126	if sizeof_match:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4127	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4128
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4129	# operator++(int) and operator--(int)
				4130	if (line[0:match.start(1) - 1].endswith(' operator++') or
				4131	line[0:match.start(1) - 1].endswith(' operator--')):
				4132	return False
				4133
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4134	# A single unnamed argument for a function tends to look like old
				4135	# style cast. If we see those, don't issue warnings for deprecated
				4136	# casts, instead issue warnings for unnamed arguments where
				4137	# appropriate.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4138	#
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4139	# These are things that we want warnings for, since the style guide
				4140	# explicitly require all parameters to be named:
				4141	# Function(int);
				4142	# Function(int) {
				4143	# ConstMember(int) const;
				4144	# ConstMember(int) const {
				4145	# ExceptionMember(int) throw (...);
				4146	# ExceptionMember(int) throw (...) {
				4147	# PureVirtual(int) = 0;
				4148	#
				4149	# These are functions of some sort, where the compiler would be fine
				4150	# if they had named parameters, but people often omit those
				4151	# identifiers to reduce clutter:
				4152	# (FunctionPointer)(int);
				4153	# (FunctionPointer)(int) = value;
				4154	# Function((function_pointer_arg)(int))
				4155	# <TemplateArgument(int)>;
				4156	# <(FunctionPointerTemplateArgument)(int)>;
				4157	remainder = line[match.end(0):]
				4158	if Match(r'^\s*(?:;\|const\b\|throw\b\|=\|>\|\{\|\))', remainder):
				4159	# Looks like an unnamed parameter.
				4160
				4161	# Don't warn on any kind of template arguments.
				4162	if Match(r'^\s*>', remainder):
				4163	return False
				4164
				4165	# Don't warn on assignments to function pointers, but keep warnings for
				4166	# unnamed parameters to pure virtual functions. Note that this pattern
				4167	# will also pass on assignments of "0" to function pointers, but the
				4168	# preferred values for those would be "nullptr" or "NULL".
				4169	matched_zero = Match(r'^\s=\s(\S+)\s;', remainder)
				4170	if matched_zero and matched_zero.group(1) != '0':
				4171	return False
				4172
				4173	# Don't warn on function pointer declarations. For this we need
				4174	# to check what came before the "(type)" string.
				4175	if Match(r'.\)\s$', line[0:match.start(0)]):
				4176	return False
				4177
				4178	# Don't warn if the parameter is named with block comments, e.g.:
				4179	# Function(int /unused_param/);
				4180	if '/*' in raw_line:
				4181	return False
				4182
				4183	# Passed all filters, issue warning here.
				4184	error(filename, linenum, 'readability/function', 3,
				4185	'All parameters should be named in a function')
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4186	return True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4187
				4188	# At this point, all that should be left is actual casts.
				4189	error(filename, linenum, 'readability/casting', 4,
				4190	'Using C-style cast. Use %s<%s>(...) instead' %
				4191	(cast_type, match.group(1)))
				4192
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4193	return True
				4194
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4195
				4196	_HEADERS_CONTAINING_TEMPLATES = (
				4197	('<deque>', ('deque',)),
				4198	('<functional>', ('unary_function', 'binary_function',
				4199	'plus', 'minus', 'multiplies', 'divides', 'modulus',
				4200	'negate',
				4201	'equal_to', 'not_equal_to', 'greater', 'less',
				4202	'greater_equal', 'less_equal',
				4203	'logical_and', 'logical_or', 'logical_not',
				4204	'unary_negate', 'not1', 'binary_negate', 'not2',
				4205	'bind1st', 'bind2nd',
				4206	'pointer_to_unary_function',
				4207	'pointer_to_binary_function',
				4208	'ptr_fun',
				4209	'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
				4210	'mem_fun_ref_t',
				4211	'const_mem_fun_t', 'const_mem_fun1_t',
				4212	'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
				4213	'mem_fun_ref',
				4214	)),
				4215	('<limits>', ('numeric_limits',)),
				4216	('<list>', ('list',)),
				4217	('<map>', ('map', 'multimap',)),
				4218	('<memory>', ('allocator',)),
				4219	('<queue>', ('queue', 'priority_queue',)),
				4220	('<set>', ('set', 'multiset',)),
				4221	('<stack>', ('stack',)),
				4222	('<string>', ('char_traits', 'basic_string',)),
				4223	('<utility>', ('pair',)),
				4224	('<vector>', ('vector',)),
				4225
				4226	# gcc extensions.
				4227	# Note: std::hash is their hash, ::hash is our hash
				4228	('<hash_map>', ('hash_map', 'hash_multimap',)),
				4229	('<hash_set>', ('hash_set', 'hash_multiset',)),
				4230	('<slist>', ('slist',)),
				4231	)
				4232
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4233	_RE_PATTERN_STRING = re.compile(r'\bstring\b')
				4234
				4235	_re_pattern_algorithm_header = []
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	4236	for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
				4237	'transform'):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4238	# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
				4239	# type::max().
				4240	_re_pattern_algorithm_header.append(
				4241	(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?$[^$]'),
				4242	_template,
				4243	'<algorithm>'))
				4244
				4245	_re_pattern_templates = []
				4246	for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
				4247	for _template in _templates:
				4248	_re_pattern_templates.append(
				4249	(re.compile(r'(\<\|\b)' + _template + r'\s*\<'),
				4250	_template + '<>',
				4251	_header))
				4252
				4253
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4254	def FilesBelongToSameModule(filename_cc, filename_h):
				4255	"""Check if these two filenames belong to the same module.
				4256
				4257	The concept of a 'module' here is a as follows:
				4258	foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
				4259	same 'module' if they are in the same directory.
				4260	some/path/public/xyzzy and some/path/internal/xyzzy are also considered
				4261	to belong to the same module here.
				4262
				4263	If the filename_cc contains a longer path than the filename_h, for example,
				4264	'/absolute/path/to/base/sysinfo.cc', and this file would include
				4265	'base/sysinfo.h', this function also produces the prefix needed to open the
				4266	header. This is used by the caller of this function to more robustly open the
				4267	header file. We don't have access to the real include paths in this context,
				4268	so we need this guesswork here.
				4269
				4270	Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
				4271	according to this implementation. Because of this, this function gives
				4272	some false positives. This should be sufficiently rare in practice.
				4273
				4274	Args:
				4275	filename_cc: is the path for the .cc file
				4276	filename_h: is the path for the header path
				4277
				4278	Returns:
				4279	Tuple with a bool and a string:
				4280	bool: True if filename_cc and filename_h belong to the same module.
				4281	string: the additional prefix needed to open the header file.
				4282	"""
				4283
				4284	if not filename_cc.endswith('.cc'):
				4285	return (False, '')
				4286	filename_cc = filename_cc[:-len('.cc')]
				4287	if filename_cc.endswith('_unittest'):
				4288	filename_cc = filename_cc[:-len('_unittest')]
				4289	elif filename_cc.endswith('_test'):
				4290	filename_cc = filename_cc[:-len('_test')]
				4291	filename_cc = filename_cc.replace('/public/', '/')
				4292	filename_cc = filename_cc.replace('/internal/', '/')
				4293
				4294	if not filename_h.endswith('.h'):
				4295	return (False, '')
				4296	filename_h = filename_h[:-len('.h')]
				4297	if filename_h.endswith('-inl'):
				4298	filename_h = filename_h[:-len('-inl')]
				4299	filename_h = filename_h.replace('/public/', '/')
				4300	filename_h = filename_h.replace('/internal/', '/')
				4301
				4302	files_belong_to_same_module = filename_cc.endswith(filename_h)
				4303	common_path = ''
				4304	if files_belong_to_same_module:
				4305	common_path = filename_cc[:-len(filename_h)]
				4306	return files_belong_to_same_module, common_path
				4307
				4308
				4309	def UpdateIncludeState(filename, include_state, io=codecs):
				4310	"""Fill up the include_state with new includes found from the file.
				4311
				4312	Args:
				4313	filename: the name of the header to read.
				4314	include_state: an _IncludeState instance in which the headers are inserted.
				4315	io: The io factory to use to read the file. Provided for testability.
				4316
				4317	Returns:
				4318	True if a header was succesfully added. False otherwise.
				4319	"""
				4320	headerfile = None
				4321	try:
				4322	headerfile = io.open(filename, 'r', 'utf8', 'replace')
				4323	except IOError:
				4324	return False
				4325	linenum = 0
				4326	for line in headerfile:
				4327	linenum += 1
				4328	clean_line = CleanseComments(line)
				4329	match = _RE_PATTERN_INCLUDE.search(clean_line)
				4330	if match:
				4331	include = match.group(2)
				4332	# The value formatting is cute, but not really used right now.
				4333	# What matters here is that the key is in include_state.
				4334	include_state.setdefault(include, '%s:%d' % (filename, linenum))
				4335	return True
				4336
				4337
				4338	def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
				4339	io=codecs):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4340	"""Reports for missing stl includes.
				4341
				4342	This function will output warnings to make sure you are including the headers
				4343	necessary for the stl containers and functions that you use. We only give one
				4344	reason to include a header. For example, if you use both equal_to<> and
				4345	less<> in a .h file, only one (the latter in the file) of these will be
				4346	reported as a reason to include the <functional>.
				4347
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4348	Args:
				4349	filename: The name of the current file.
				4350	clean_lines: A CleansedLines instance containing the file.
				4351	include_state: An _IncludeState instance.
				4352	error: The function to call with any errors found.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4353	io: The IO factory to use to read the header file. Provided for unittest
				4354	injection.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4355	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4356	required = {} # A map of header name to linenumber and the template entity.
				4357	# Example of required: { '<functional>': (1219, 'less<>') }
				4358
				4359	for linenum in xrange(clean_lines.NumLines()):
				4360	line = clean_lines.elided[linenum]
				4361	if not line or line[0] == '#':
				4362	continue
				4363
				4364	# String is special -- it is a non-templatized type in STL.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4365	matched = _RE_PATTERN_STRING.search(line)
				4366	if matched:
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4367	# Don't warn about strings in non-STL namespaces:
				4368	# (We check only the first match per line; good enough.)
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4369	prefix = line[:matched.start()]
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4370	if prefix.endswith('std::') or not prefix.endswith('::'):
				4371	required['<string>'] = (linenum, 'string')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4372
				4373	for pattern, template, header in _re_pattern_algorithm_header:
				4374	if pattern.search(line):
				4375	required[header] = (linenum, template)
				4376
				4377	# The following function is just a speed up, no semantics are changed.
				4378	if not '<' in line: # Reduces the cpu time usage by skipping lines.
				4379	continue
				4380
				4381	for pattern, template, header in _re_pattern_templates:
				4382	if pattern.search(line):
				4383	required[header] = (linenum, template)
				4384
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4385	# The policy is that if you #include something in foo.h you don't need to
				4386	# include it again in foo.cc. Here, we will look at possible includes.
				4387	# Let's copy the include_state so it is only messed up within this function.
				4388	include_state = include_state.copy()
				4389
				4390	# Did we find the header for this file (if any) and succesfully load it?
				4391	header_found = False
				4392
				4393	# Use the absolute path so that matching works properly.
erg@google.com	90ecb62	2012-01-30 19:34:23 +0000	[diff] [blame]	4394	abs_filename = FileInfo(filename).FullName()
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4395
				4396	# For Emacs's flymake.
				4397	# If cpplint is invoked from Emacs's flymake, a temporary file is generated
				4398	# by flymake and that file name might end with '_flymake.cc'. In that case,
				4399	# restore original file name here so that the corresponding header file can be
				4400	# found.
				4401	# e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
				4402	# instead of 'foo_flymake.h'
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4403	abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4404
				4405	# include_state is modified during iteration, so we iterate over a copy of
				4406	# the keys.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4407	header_keys = include_state.keys()
				4408	for header in header_keys:
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4409	(same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
				4410	fullpath = common_path + header
				4411	if same_module and UpdateIncludeState(fullpath, include_state, io):
				4412	header_found = True
				4413
				4414	# If we can't find the header file for a .cc, assume it's because we don't
				4415	# know where to look. In that case we'll give up as we're not sure they
				4416	# didn't include it in the .h file.
				4417	# TODO(unknown): Do a better job of finding .h files so we are confident that
				4418	# not having the .h file means there isn't one.
				4419	if filename.endswith('.cc') and not header_found:
				4420	return
				4421
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4422	# All the lines have been processed, report the errors found.
				4423	for required_header_unstripped in required:
				4424	template = required[required_header_unstripped][1]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4425	if required_header_unstripped.strip('<>"') not in include_state:
				4426	error(filename, required[required_header_unstripped][0],
				4427	'build/include_what_you_use', 4,
				4428	'Add #include ' + required_header_unstripped + ' for ' + template)
				4429
				4430
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4431	_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
				4432
				4433
				4434	def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
				4435	"""Check that make_pair's template arguments are deduced.
				4436
				4437	G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
				4438	specified explicitly, and such use isn't intended in any case.
				4439
				4440	Args:
				4441	filename: The name of the current file.
				4442	clean_lines: A CleansedLines instance containing the file.
				4443	linenum: The number of the line to check.
				4444	error: The function to call with any errors found.
				4445	"""
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4446	line = clean_lines.elided[linenum]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4447	match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
				4448	if match:
				4449	error(filename, linenum, 'build/explicit_make_pair',
				4450	4, # 4 = high confidence
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4451	'For C++11-compatibility, omit template arguments from make_pair'
				4452	' OR use pair directly OR if appropriate, construct a pair directly')
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4453
				4454
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4455	def ProcessLine(filename, file_extension, clean_lines, line,
				4456	include_state, function_state, nesting_state, error,
				4457	extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4458	"""Processes a single line in the file.
				4459
				4460	Args:
				4461	filename: Filename of the file that is being processed.
				4462	file_extension: The extension (dot not included) of the file.
				4463	clean_lines: An array of strings, each representing a line of the file,
				4464	with comments stripped.
				4465	line: Number of line being processed.
				4466	include_state: An _IncludeState instance in which the headers are inserted.
				4467	function_state: A _FunctionState instance which counts function lines, etc.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4468	nesting_state: A _NestingState instance which maintains information about
				4469	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4470	error: A callable to which errors are reported, which takes 4 arguments:
				4471	filename, line number, error level, and message
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4472	extra_check_functions: An array of additional check functions that will be
				4473	run on each source line. Each function takes 4
				4474	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4475	"""
				4476	raw_lines = clean_lines.raw_lines
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4477	ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4478	nesting_state.Update(filename, clean_lines, line, error)
				4479	if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
				4480	return
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4481	CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4482	CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4483	CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4484	CheckLanguage(filename, clean_lines, line, file_extension, include_state,
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4485	nesting_state, error)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4486	CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4487	CheckForNonStandardConstructs(filename, clean_lines, line,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4488	nesting_state, error)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4489	CheckVlogArguments(filename, clean_lines, line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4490	CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	4491	CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4492	CheckMakePairUsesDeduction(filename, clean_lines, line, error)
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4493	for check_fn in extra_check_functions:
				4494	check_fn(filename, clean_lines, line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4495
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4496	def ProcessFileData(filename, file_extension, lines, error,
				4497	extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4498	"""Performs lint checks and reports any errors to the given error function.
				4499
				4500	Args:
				4501	filename: Filename of the file that is being processed.
				4502	file_extension: The extension (dot not included) of the file.
				4503	lines: An array of strings, each representing a line of the file, with the
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4504	last element being empty if the file is terminated with a newline.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4505	error: A callable to which errors are reported, which takes 4 arguments:
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4506	filename, line number, error level, and message
				4507	extra_check_functions: An array of additional check functions that will be
				4508	run on each source line. Each function takes 4
				4509	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4510	"""
				4511	lines = (['// marker so line numbers and indices both start at 1'] + lines +
				4512	['// marker so line numbers end in a known way'])
				4513
				4514	include_state = _IncludeState()
				4515	function_state = _FunctionState()
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4516	nesting_state = _NestingState()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4517
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4518	ResetNolintSuppressions()
				4519
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4520	CheckForCopyright(filename, lines, error)
				4521
				4522	if file_extension == 'h':
				4523	CheckForHeaderGuard(filename, lines, error)
				4524
				4525	RemoveMultiLineComments(filename, lines, error)
				4526	clean_lines = CleansedLines(lines)
				4527	for line in xrange(clean_lines.NumLines()):
				4528	ProcessLine(filename, file_extension, clean_lines, line,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4529	include_state, function_state, nesting_state, error,
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4530	extra_check_functions)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4531	nesting_state.CheckCompletedBlocks(filename, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4532
				4533	CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
				4534
				4535	# We check here rather than inside ProcessLine so that we see raw
				4536	# lines rather than "cleaned" lines.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4537	CheckForBadCharacters(filename, lines, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4538
				4539	CheckForNewlineAtEOF(filename, lines, error)
				4540
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4541	def ProcessFile(filename, vlevel, extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4542	"""Does google-lint on a single file.
				4543
				4544	Args:
				4545	filename: The name of the file to parse.
				4546
				4547	vlevel: The level of errors to report. Every error of confidence
				4548	>= verbose_level will be reported. 0 is a good default.
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4549
				4550	extra_check_functions: An array of additional check functions that will be
				4551	run on each source line. Each function takes 4
				4552	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4553	"""
				4554
				4555	_SetVerboseLevel(vlevel)
				4556
				4557	try:
				4558	# Support the UNIX convention of using "-" for stdin. Note that
				4559	# we are not opening the file with universal newline support
				4560	# (which codecs doesn't support anyway), so the resulting lines do
				4561	# contain trailing '\r' characters if we are reading a file that
				4562	# has CRLF endings.
				4563	# If after the split a trailing '\r' is present, it is removed
				4564	# below. If it is not expected to be present (i.e. os.linesep !=
				4565	# '\r\n' as in Windows), a warning is issued below if this file
				4566	# is processed.
				4567
				4568	if filename == '-':
				4569	lines = codecs.StreamReaderWriter(sys.stdin,
				4570	codecs.getreader('utf8'),
				4571	codecs.getwriter('utf8'),
				4572	'replace').read().split('\n')
				4573	else:
				4574	lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
				4575
				4576	carriage_return_found = False
				4577	# Remove trailing '\r'.
				4578	for linenum in range(len(lines)):
				4579	if lines[linenum].endswith('\r'):
				4580	lines[linenum] = lines[linenum].rstrip('\r')
				4581	carriage_return_found = True
				4582
				4583	except IOError:
				4584	sys.stderr.write(
				4585	"Skipping input '%s': Can't open for reading\n" % filename)
				4586	return
				4587
				4588	# Note, if no dot is found, this will give the entire filename as the ext.
				4589	file_extension = filename[filename.rfind('.') + 1:]
				4590
				4591	# When reading from stdin, the extension is unknown, so no cpplint tests
				4592	# should rely on the extension.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame^]	4593	valid_extensions = ['cc', 'h', 'cpp', 'cu', 'cuh']
				4594	if filename != '-' and file_extension not in valid_extensions:
				4595	sys.stderr.write('Ignoring %s; not a valid file name '
				4596	'(.cc, .h, .cpp, .cu, .cuh)\n' % filename)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4597	else:
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4598	ProcessFileData(filename, file_extension, lines, Error,
				4599	extra_check_functions)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4600	if carriage_return_found and os.linesep != '\r\n':
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4601	# Use 0 for linenum since outputting only one error for potentially
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4602	# several lines.
				4603	Error(filename, 0, 'whitespace/newline', 1,
				4604	'One or more unexpected \\r (^M) found;'
				4605	'better to use only a \\n')
				4606
				4607	sys.stderr.write('Done processing %s\n' % filename)
				4608
				4609
				4610	def PrintUsage(message):
				4611	"""Prints a brief usage string and exits, optionally with an error message.
				4612
				4613	Args:
				4614	message: The optional error message.
				4615	"""
				4616	sys.stderr.write(_USAGE)
				4617	if message:
				4618	sys.exit('\nFATAL ERROR: ' + message)
				4619	else:
				4620	sys.exit(1)
				4621
				4622
				4623	def PrintCategories():
				4624	"""Prints a list of all the error-categories used by error messages.
				4625
				4626	These are the categories used to filter messages via --filter.
				4627	"""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4628	sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4629	sys.exit(0)
				4630
				4631
				4632	def ParseArguments(args):
				4633	"""Parses the command line arguments.
				4634
				4635	This may set the output format and verbosity level as side-effects.
				4636
				4637	Args:
				4638	args: The command line arguments:
				4639
				4640	Returns:
				4641	The list of filenames to lint.
				4642	"""
				4643	try:
				4644	(opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4645	'counting=',
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	4646	'filter=',
				4647	'root='])
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4648	except getopt.GetoptError:
				4649	PrintUsage('Invalid arguments.')
				4650
				4651	verbosity = _VerboseLevel()
				4652	output_format = _OutputFormat()
				4653	filters = ''
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4654	counting_style = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4655
				4656	for (opt, val) in opts:
				4657	if opt == '--help':
				4658	PrintUsage(None)
				4659	elif opt == '--output':
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4660	if val not in ('emacs', 'vs7', 'eclipse'):
erg@google.com	02c27fd	2013-05-28 21:34:34 +0000	[diff] [blame]	4661	PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4662	output_format = val
				4663	elif opt == '--verbose':
				4664	verbosity = int(val)
				4665	elif opt == '--filter':
				4666	filters = val
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	4667	if not filters:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4668	PrintCategories()
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4669	elif opt == '--counting':
				4670	if val not in ('total', 'toplevel', 'detailed'):
				4671	PrintUsage('Valid counting options are total, toplevel, and detailed')
				4672	counting_style = val
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	4673	elif opt == '--root':
				4674	global _root
				4675	_root = val
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4676
				4677	if not filenames:
				4678	PrintUsage('No files were specified.')
				4679
				4680	_SetOutputFormat(output_format)
				4681	_SetVerboseLevel(verbosity)
				4682	_SetFilters(filters)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4683	_SetCountingStyle(counting_style)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4684
				4685	return filenames
				4686
				4687
				4688	def main():
				4689	filenames = ParseArguments(sys.argv[1:])
				4690
				4691	# Change stderr to write with replacement characters so we don't die
				4692	# if we try to print something containing non-ASCII characters.
				4693	sys.stderr = codecs.StreamReaderWriter(sys.stderr,
				4694	codecs.getreader('utf8'),
				4695	codecs.getwriter('utf8'),
				4696	'replace')
				4697
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4698	_cpplint_state.ResetErrorCounts()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4699	for filename in filenames:
				4700	ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4701	_cpplint_state.PrintErrorCounts()
				4702
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4703	sys.exit(_cpplint_state.error_count > 0)
				4704
				4705
				4706	if __name__ == '__main__':
				4707	main()