Blame - cpplint/cpplint.py - platform/external/google-styleguide

blob: b5493b6d986443c7650e169eb160d0c4642d86a2 [file] [log] [blame]

erg@google.com	720121a	2012-05-11 16:31:47 +0000	[diff] [blame]	1	#!/usr/bin/python
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2	#
erg@google.com	8f91ab2	2011-09-06 21:04:45 +0000	[diff] [blame]	3	# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	8	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	18	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	30
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	31	"""Does google-lint on c++ files.
				32
				33	The goal of this script is to identify places in the code that may
				34	be in non-compliance with google style. It does not attempt to fix
				35	up these problems -- the point is to educate. It does also not
				36	attempt to find all problems, or to ensure that everything it does
				37	find is legitimately a problem.
				38
				39	In particular, we can get very confused by /* and // inside strings!
				40	We do a small hack, which is to ignore //'s with "'s after them on the
				41	same line, but it is far from perfect (in either direction).
				42	"""
				43
				44	import codecs
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	45	import copy
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	46	import getopt
				47	import math # for log
				48	import os
				49	import re
				50	import sre_compile
				51	import string
				52	import sys
				53	import unicodedata
				54
				55
				56	_USAGE = """
				57	Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame]	58	[--counting=total\|toplevel\|detailed] [--root=subdir]
				59	[--linelength=digits]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	60	<file> [file] ...
				61
				62	The style guidelines this tries to follow are those in
				63	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
				64
				65	Every problem is given a confidence score from 1-5, with 5 meaning we are
				66	certain of the problem, and 1 meaning it could be a legitimate construct.
				67	This will miss some errors, and is not a substitute for a code review.
				68
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	69	To suppress false-positive errors of a certain category, add a
				70	'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
				71	suppresses errors of all categories on that line.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	72
				73	The files passed in will be linted; at least one file must be provided.
				74	Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
				75
				76	Flags:
				77
				78	output=vs7
				79	By default, the output is formatted to ease emacs parsing. Visual Studio
				80	compatible output (vs7) may also be used. Other formats are unsupported.
				81
				82	verbose=#
				83	Specify a number 0-5 to restrict errors to certain verbosity levels.
				84
				85	filter=-x,+y,...
				86	Specify a comma-separated list of category-filters to apply: only
				87	error messages whose category names pass the filters will be printed.
				88	(Category names are printed with the message and look like
				89	"[whitespace/indent]".) Filters are evaluated left to right.
				90	"-FOO" and "FOO" means "do not print categories that start with FOO".
				91	"+FOO" means "do print categories that start with FOO".
				92
				93	Examples: --filter=-whitespace,+whitespace/braces
				94	--filter=whitespace,runtime/printf,+runtime/printf_format
				95	--filter=-,+build/include_what_you_use
				96
				97	To see a list of all the categories used in cpplint, pass no arg:
				98	--filter=
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	99
				100	counting=total\|toplevel\|detailed
				101	The total number of errors found is always printed. If
				102	'toplevel' is provided, then the count of errors in each of
				103	the top-level categories like 'build' and 'whitespace' will
				104	also be printed. If 'detailed' is provided, then a count
				105	is provided for each category like 'build/class'.
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	106
				107	root=subdir
				108	The root directory used for deriving header guard CPP variable.
				109	By default, the header guard CPP variable is calculated as the relative
				110	path to the directory that contains .git, .hg, or .svn. When this flag
				111	is specified, the relative path is calculated from the specified
				112	directory. If the specified directory does not exist, this flag is
				113	ignored.
				114
				115	Examples:
				116	Assuing that src/.git exists, the header guard CPP variables for
				117	src/chrome/browser/ui/browser.h are:
				118
				119	No flag => CHROME_BROWSER_UI_BROWSER_H_
				120	--root=chrome => BROWSER_UI_BROWSER_H_
				121	--root=chrome/browser => UI_BROWSER_H_
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame]	122
				123	linelength=digits
				124	This is the allowed line length for the project. The default value is
				125	80 characters.
				126
				127	Examples:
				128	--linelength=120
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	129	"""
				130
				131	# We categorize each error message we print. Here are the categories.
				132	# We want an explicit list so we can list them all in cpplint --filter=.
				133	# If you add a new error message with a new category, add it to the list
				134	# here! cpplint_unittest.py should tell you if you forget to do this.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	135	_ERROR_CATEGORIES = [
				136	'build/class',
				137	'build/deprecated',
				138	'build/endif_comment',
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	139	'build/explicit_make_pair',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	140	'build/forward_decl',
				141	'build/header_guard',
				142	'build/include',
				143	'build/include_alpha',
				144	'build/include_order',
				145	'build/include_what_you_use',
				146	'build/namespaces',
				147	'build/printf_format',
				148	'build/storage_class',
				149	'legal/copyright',
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	150	'readability/alt_tokens',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	151	'readability/braces',
				152	'readability/casting',
				153	'readability/check',
				154	'readability/constructors',
				155	'readability/fn_size',
				156	'readability/function',
				157	'readability/multiline_comment',
				158	'readability/multiline_string',
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	159	'readability/namespace',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	160	'readability/nolint',
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	161	'readability/nul',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	162	'readability/streams',
				163	'readability/todo',
				164	'readability/utf8',
				165	'runtime/arrays',
				166	'runtime/casting',
				167	'runtime/explicit',
				168	'runtime/int',
				169	'runtime/init',
				170	'runtime/invalid_increment',
				171	'runtime/member_string_references',
				172	'runtime/memset',
				173	'runtime/operator',
				174	'runtime/printf',
				175	'runtime/printf_format',
				176	'runtime/references',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	177	'runtime/string',
				178	'runtime/threadsafe_fn',
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	179	'runtime/vlog',
				180	'whitespace/blank_line',
				181	'whitespace/braces',
				182	'whitespace/comma',
				183	'whitespace/comments',
				184	'whitespace/empty_conditional_body',
				185	'whitespace/empty_loop_body',
				186	'whitespace/end_of_line',
				187	'whitespace/ending_newline',
				188	'whitespace/forcolon',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	189	'whitespace/indent',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	190	'whitespace/line_length',
				191	'whitespace/newline',
				192	'whitespace/operators',
				193	'whitespace/parens',
				194	'whitespace/semicolon',
				195	'whitespace/tab',
				196	'whitespace/todo'
				197	]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	198
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	199	# The default state of the category filter. This is overrided by the --filter=
				200	# flag. By default all errors are on, so only add here categories that should be
				201	# off by default (i.e., categories that must be enabled by the --filter= flags).
				202	# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	203	_DEFAULT_FILTERS = ['-build/include_alpha']
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	204
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	205	# We used to check for high-bit characters, but after much discussion we
				206	# decided those were OK, as long as they were in UTF-8 and didn't represent
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	207	# hard-coded international strings, which belong in a separate i18n file.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	208
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	209
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	210	# C++ headers
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	211	_CPP_HEADERS = frozenset([
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	212	# Legacy
				213	'algobase.h',
				214	'algo.h',
				215	'alloc.h',
				216	'builtinbuf.h',
				217	'bvector.h',
				218	'complex.h',
				219	'defalloc.h',
				220	'deque.h',
				221	'editbuf.h',
				222	'fstream.h',
				223	'function.h',
				224	'hash_map',
				225	'hash_map.h',
				226	'hash_set',
				227	'hash_set.h',
				228	'hashtable.h',
				229	'heap.h',
				230	'indstream.h',
				231	'iomanip.h',
				232	'iostream.h',
				233	'istream.h',
				234	'iterator.h',
				235	'list.h',
				236	'map.h',
				237	'multimap.h',
				238	'multiset.h',
				239	'ostream.h',
				240	'pair.h',
				241	'parsestream.h',
				242	'pfstream.h',
				243	'procbuf.h',
				244	'pthread_alloc',
				245	'pthread_alloc.h',
				246	'rope',
				247	'rope.h',
				248	'ropeimpl.h',
				249	'set.h',
				250	'slist',
				251	'slist.h',
				252	'stack.h',
				253	'stdiostream.h',
				254	'stl_alloc.h',
				255	'stl_relops.h',
				256	'streambuf.h',
				257	'stream.h',
				258	'strfile.h',
				259	'strstream.h',
				260	'tempbuf.h',
				261	'tree.h',
				262	'type_traits.h',
				263	'vector.h',
				264	# 17.6.1.2 C++ library headers
				265	'algorithm',
				266	'array',
				267	'atomic',
				268	'bitset',
				269	'chrono',
				270	'codecvt',
				271	'complex',
				272	'condition_variable',
				273	'deque',
				274	'exception',
				275	'forward_list',
				276	'fstream',
				277	'functional',
				278	'future',
				279	'initializer_list',
				280	'iomanip',
				281	'ios',
				282	'iosfwd',
				283	'iostream',
				284	'istream',
				285	'iterator',
				286	'limits',
				287	'list',
				288	'locale',
				289	'map',
				290	'memory',
				291	'mutex',
				292	'new',
				293	'numeric',
				294	'ostream',
				295	'queue',
				296	'random',
				297	'ratio',
				298	'regex',
				299	'set',
				300	'sstream',
				301	'stack',
				302	'stdexcept',
				303	'streambuf',
				304	'string',
				305	'strstream',
				306	'system_error',
				307	'thread',
				308	'tuple',
				309	'typeindex',
				310	'typeinfo',
				311	'type_traits',
				312	'unordered_map',
				313	'unordered_set',
				314	'utility',
erg@google.com	5d00c56	2013-07-12 19:57:05 +0000	[diff] [blame]	315	'valarray',
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	316	'vector',
				317	# 17.6.1.2 C++ headers for C library facilities
				318	'cassert',
				319	'ccomplex',
				320	'cctype',
				321	'cerrno',
				322	'cfenv',
				323	'cfloat',
				324	'cinttypes',
				325	'ciso646',
				326	'climits',
				327	'clocale',
				328	'cmath',
				329	'csetjmp',
				330	'csignal',
				331	'cstdalign',
				332	'cstdarg',
				333	'cstdbool',
				334	'cstddef',
				335	'cstdint',
				336	'cstdio',
				337	'cstdlib',
				338	'cstring',
				339	'ctgmath',
				340	'ctime',
				341	'cuchar',
				342	'cwchar',
				343	'cwctype',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	344	])
				345
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	346	# Assertion macros. These are defined in base/logging.h and
				347	# testing/base/gunit.h. Note that the _M versions need to come first
				348	# for substring matching to work.
				349	_CHECK_MACROS = [
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	350	'DCHECK', 'CHECK',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	351	'EXPECT_TRUE_M', 'EXPECT_TRUE',
				352	'ASSERT_TRUE_M', 'ASSERT_TRUE',
				353	'EXPECT_FALSE_M', 'EXPECT_FALSE',
				354	'ASSERT_FALSE_M', 'ASSERT_FALSE',
				355	]
				356
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	357	# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	358	_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
				359
				360	for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
				361	('>=', 'GE'), ('>', 'GT'),
				362	('<=', 'LE'), ('<', 'LT')]:
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	363	_CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	364	_CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
				365	_CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
				366	_CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
				367	_CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
				368	_CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
				369
				370	for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
				371	('>=', 'LT'), ('>', 'LE'),
				372	('<=', 'GT'), ('<', 'GE')]:
				373	_CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
				374	_CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
				375	_CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
				376	_CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
				377
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	378	# Alternative tokens and their replacements. For full list, see section 2.5
				379	# Alternative tokens [lex.digraph] in the C++ standard.
				380	#
				381	# Digraphs (such as '%:') are not included here since it's a mess to
				382	# match those on a word boundary.
				383	_ALT_TOKEN_REPLACEMENT = {
				384	'and': '&&',
				385	'bitor': '\|',
				386	'or': '\|\|',
				387	'xor': '^',
				388	'compl': '~',
				389	'bitand': '&',
				390	'and_eq': '&=',
				391	'or_eq': '\|=',
				392	'xor_eq': '^=',
				393	'not': '!',
				394	'not_eq': '!='
				395	}
				396
				397	# Compile regular expression that matches all the above keywords. The "[ =()]"
				398	# bit is meant to avoid matching these keywords outside of boolean expressions.
				399	#
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	400	# False positives include C-style multi-line comments and multi-line strings
				401	# but those have always been troublesome for cpplint.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	402	_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
				403	r'[ =()](' + ('\|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]\|$)')
				404
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	405
				406	# These constants define types of headers for use with
				407	# _IncludeState.CheckNextIncludeOrder().
				408	_C_SYS_HEADER = 1
				409	_CPP_SYS_HEADER = 2
				410	_LIKELY_MY_HEADER = 3
				411	_POSSIBLE_MY_HEADER = 4
				412	_OTHER_HEADER = 5
				413
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	414	# These constants define the current inline assembly state
				415	_NO_ASM = 0 # Outside of inline assembly block
				416	_INSIDE_ASM = 1 # Inside inline assembly block
				417	_END_ASM = 2 # Last line of inline assembly block
				418	_BLOCK_ASM = 3 # The whole block is an inline assembly block
				419
				420	# Match start of assembly blocks
				421	_MATCH_ASM = re.compile(r'^\s*(?:asm\|_asm\|__asm\|__asm__)'
				422	r'(?:\s+(volatile\|__volatile__))?'
				423	r'\s*[{(]')
				424
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	425
				426	_regexp_compile_cache = {}
				427
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	428	# Finds occurrences of NOLINT or NOLINT(...).
				429	_RE_SUPPRESSION = re.compile(r'\bNOLINT\b($[^)]*$)?')
				430
				431	# {str, set(int)}: a map from error categories to sets of linenumbers
				432	# on which those errors are expected and should be suppressed.
				433	_error_suppressions = {}
				434
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	435	# The root directory used for deriving header guard CPP variable.
				436	# This is set by --root flag.
				437	_root = None
				438
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame]	439	# The allowed line length of files.
				440	# This is set by --linelength flag.
				441	_line_length = 80
				442
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	443	def ParseNolintSuppressions(filename, raw_line, linenum, error):
				444	"""Updates the global list of error-suppressions.
				445
				446	Parses any NOLINT comments on the current line, updating the global
				447	error_suppressions store. Reports an error if the NOLINT comment
				448	was malformed.
				449
				450	Args:
				451	filename: str, the name of the input file.
				452	raw_line: str, the line of input text, with comments.
				453	linenum: int, the number of the current line.
				454	error: function, an error handler.
				455	"""
				456	# FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	457	matched = _RE_SUPPRESSION.search(raw_line)
				458	if matched:
				459	category = matched.group(1)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	460	if category in (None, '(*)'): # => "suppress all"
				461	_error_suppressions.setdefault(None, set()).add(linenum)
				462	else:
				463	if category.startswith('(') and category.endswith(')'):
				464	category = category[1:-1]
				465	if category in _ERROR_CATEGORIES:
				466	_error_suppressions.setdefault(category, set()).add(linenum)
				467	else:
				468	error(filename, linenum, 'readability/nolint', 5,
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	469	'Unknown NOLINT error category: %s' % category)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	470
				471
				472	def ResetNolintSuppressions():
				473	"Resets the set of NOLINT suppressions to empty."
				474	_error_suppressions.clear()
				475
				476
				477	def IsErrorSuppressedByNolint(category, linenum):
				478	"""Returns true if the specified error category is suppressed on this line.
				479
				480	Consults the global error_suppressions map populated by
				481	ParseNolintSuppressions/ResetNolintSuppressions.
				482
				483	Args:
				484	category: str, the category of the error.
				485	linenum: int, the current line number.
				486	Returns:
				487	bool, True iff the error should be suppressed due to a NOLINT comment.
				488	"""
				489	return (linenum in _error_suppressions.get(category, set()) or
				490	linenum in _error_suppressions.get(None, set()))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	491
				492	def Match(pattern, s):
				493	"""Matches the string with the pattern, caching the compiled regexp."""
				494	# The regexp compilation caching is inlined in both Match and Search for
				495	# performance reasons; factoring it out into a separate function turns out
				496	# to be noticeably expensive.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	497	if pattern not in _regexp_compile_cache:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	498	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				499	return _regexp_compile_cache[pattern].match(s)
				500
				501
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	502	def ReplaceAll(pattern, rep, s):
				503	"""Replaces instances of pattern in a string with a replacement.
				504
				505	The compiled regex is kept in a cache shared by Match and Search.
				506
				507	Args:
				508	pattern: regex pattern
				509	rep: replacement text
				510	s: search string
				511
				512	Returns:
				513	string with replacements made (or original string if no replacements)
				514	"""
				515	if pattern not in _regexp_compile_cache:
				516	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				517	return _regexp_compile_cache[pattern].sub(rep, s)
				518
				519
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	520	def Search(pattern, s):
				521	"""Searches the string for the pattern, caching the compiled regexp."""
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	522	if pattern not in _regexp_compile_cache:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	523	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				524	return _regexp_compile_cache[pattern].search(s)
				525
				526
				527	class _IncludeState(dict):
				528	"""Tracks line numbers for includes, and the order in which includes appear.
				529
				530	As a dict, an _IncludeState object serves as a mapping between include
				531	filename and line number on which that file was included.
				532
				533	Call CheckNextIncludeOrder() once for each header in the file, passing
				534	in the type constants defined above. Calls in an illegal order will
				535	raise an _IncludeError with an appropriate error message.
				536
				537	"""
				538	# self._section will move monotonically through this set. If it ever
				539	# needs to move backwards, CheckNextIncludeOrder will raise an error.
				540	_INITIAL_SECTION = 0
				541	_MY_H_SECTION = 1
				542	_C_SECTION = 2
				543	_CPP_SECTION = 3
				544	_OTHER_H_SECTION = 4
				545
				546	_TYPE_NAMES = {
				547	_C_SYS_HEADER: 'C system header',
				548	_CPP_SYS_HEADER: 'C++ system header',
				549	_LIKELY_MY_HEADER: 'header this file implements',
				550	_POSSIBLE_MY_HEADER: 'header this file may implement',
				551	_OTHER_HEADER: 'other header',
				552	}
				553	_SECTION_NAMES = {
				554	_INITIAL_SECTION: "... nothing. (This can't be an error.)",
				555	_MY_H_SECTION: 'a header this file implements',
				556	_C_SECTION: 'C system header',
				557	_CPP_SECTION: 'C++ system header',
				558	_OTHER_H_SECTION: 'other header',
				559	}
				560
				561	def __init__(self):
				562	dict.__init__(self)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	563	self.ResetSection()
				564
				565	def ResetSection(self):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	566	# The name of the current section.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	567	self._section = self._INITIAL_SECTION
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	568	# The path of last found header.
				569	self._last_header = ''
				570
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	571	def SetLastHeader(self, header_path):
				572	self._last_header = header_path
				573
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	574	def CanonicalizeAlphabeticalOrder(self, header_path):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	575	"""Returns a path canonicalized for alphabetical comparison.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	576
				577	- replaces "-" with "_" so they both cmp the same.
				578	- removes '-inl' since we don't require them to be after the main header.
				579	- lowercase everything, just in case.
				580
				581	Args:
				582	header_path: Path to be canonicalized.
				583
				584	Returns:
				585	Canonicalized path.
				586	"""
				587	return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
				588
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	589	def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	590	"""Check if a header is in alphabetical order with the previous header.
				591
				592	Args:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	593	clean_lines: A CleansedLines instance containing the file.
				594	linenum: The number of the line to check.
				595	header_path: Canonicalized header to be checked.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	596
				597	Returns:
				598	Returns true if the header is in alphabetical order.
				599	"""
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	600	# If previous section is different from current section, _last_header will
				601	# be reset to empty string, so it's always less than current header.
				602	#
				603	# If previous line was a blank line, assume that the headers are
				604	# intentionally sorted the way they are.
				605	if (self._last_header > header_path and
				606	not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	607	return False
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	608	return True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	609
				610	def CheckNextIncludeOrder(self, header_type):
				611	"""Returns a non-empty error message if the next header is out of order.
				612
				613	This function also updates the internal state to be ready to check
				614	the next include.
				615
				616	Args:
				617	header_type: One of the _XXX_HEADER constants defined above.
				618
				619	Returns:
				620	The empty string if the header is in the right order, or an
				621	error message describing what's wrong.
				622
				623	"""
				624	error_message = ('Found %s after %s' %
				625	(self._TYPE_NAMES[header_type],
				626	self._SECTION_NAMES[self._section]))
				627
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	628	last_section = self._section
				629
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	630	if header_type == _C_SYS_HEADER:
				631	if self._section <= self._C_SECTION:
				632	self._section = self._C_SECTION
				633	else:
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	634	self._last_header = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	635	return error_message
				636	elif header_type == _CPP_SYS_HEADER:
				637	if self._section <= self._CPP_SECTION:
				638	self._section = self._CPP_SECTION
				639	else:
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	640	self._last_header = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	641	return error_message
				642	elif header_type == _LIKELY_MY_HEADER:
				643	if self._section <= self._MY_H_SECTION:
				644	self._section = self._MY_H_SECTION
				645	else:
				646	self._section = self._OTHER_H_SECTION
				647	elif header_type == _POSSIBLE_MY_HEADER:
				648	if self._section <= self._MY_H_SECTION:
				649	self._section = self._MY_H_SECTION
				650	else:
				651	# This will always be the fallback because we're not sure
				652	# enough that the header is associated with this file.
				653	self._section = self._OTHER_H_SECTION
				654	else:
				655	assert header_type == _OTHER_HEADER
				656	self._section = self._OTHER_H_SECTION
				657
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	658	if last_section != self._section:
				659	self._last_header = ''
				660
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	661	return ''
				662
				663
				664	class _CppLintState(object):
				665	"""Maintains module-wide state.."""
				666
				667	def __init__(self):
				668	self.verbose_level = 1 # global setting.
				669	self.error_count = 0 # global count of reported errors
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	670	# filters to apply when emitting error messages
				671	self.filters = _DEFAULT_FILTERS[:]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	672	self.counting = 'total' # In what way are we counting errors?
				673	self.errors_by_category = {} # string to int dict storing error counts
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	674
				675	# output format:
				676	# "emacs" - format that emacs can parse (default)
				677	# "vs7" - format that Microsoft Visual Studio 7 can parse
				678	self.output_format = 'emacs'
				679
				680	def SetOutputFormat(self, output_format):
				681	"""Sets the output format for errors."""
				682	self.output_format = output_format
				683
				684	def SetVerboseLevel(self, level):
				685	"""Sets the module's verbosity, and returns the previous setting."""
				686	last_verbose_level = self.verbose_level
				687	self.verbose_level = level
				688	return last_verbose_level
				689
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	690	def SetCountingStyle(self, counting_style):
				691	"""Sets the module's counting options."""
				692	self.counting = counting_style
				693
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	694	def SetFilters(self, filters):
				695	"""Sets the error-message filters.
				696
				697	These filters are applied when deciding whether to emit a given
				698	error message.
				699
				700	Args:
				701	filters: A string of comma-separated filters (eg "+whitespace/indent").
				702	Each filter should start with + or -; else we die.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	703
				704	Raises:
				705	ValueError: The comma-separated filters did not all start with '+' or '-'.
				706	E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	707	"""
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	708	# Default filters always have less priority than the flag ones.
				709	self.filters = _DEFAULT_FILTERS[:]
				710	for filt in filters.split(','):
				711	clean_filt = filt.strip()
				712	if clean_filt:
				713	self.filters.append(clean_filt)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	714	for filt in self.filters:
				715	if not (filt.startswith('+') or filt.startswith('-')):
				716	raise ValueError('Every filter in --filters must start with + or -'
				717	' (%s does not)' % filt)
				718
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	719	def ResetErrorCounts(self):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	720	"""Sets the module's error statistic back to zero."""
				721	self.error_count = 0
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	722	self.errors_by_category = {}
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	723
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	724	def IncrementErrorCount(self, category):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	725	"""Bumps the module's error statistic."""
				726	self.error_count += 1
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	727	if self.counting in ('toplevel', 'detailed'):
				728	if self.counting != 'detailed':
				729	category = category.split('/')[0]
				730	if category not in self.errors_by_category:
				731	self.errors_by_category[category] = 0
				732	self.errors_by_category[category] += 1
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	733
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	734	def PrintErrorCounts(self):
				735	"""Print a summary of errors by category, and the total."""
				736	for category, count in self.errors_by_category.iteritems():
				737	sys.stderr.write('Category \'%s\' errors found: %d\n' %
				738	(category, count))
				739	sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	740
				741	_cpplint_state = _CppLintState()
				742
				743
				744	def _OutputFormat():
				745	"""Gets the module's output format."""
				746	return _cpplint_state.output_format
				747
				748
				749	def _SetOutputFormat(output_format):
				750	"""Sets the module's output format."""
				751	_cpplint_state.SetOutputFormat(output_format)
				752
				753
				754	def _VerboseLevel():
				755	"""Returns the module's verbosity setting."""
				756	return _cpplint_state.verbose_level
				757
				758
				759	def _SetVerboseLevel(level):
				760	"""Sets the module's verbosity, and returns the previous setting."""
				761	return _cpplint_state.SetVerboseLevel(level)
				762
				763
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	764	def _SetCountingStyle(level):
				765	"""Sets the module's counting options."""
				766	_cpplint_state.SetCountingStyle(level)
				767
				768
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	769	def _Filters():
				770	"""Returns the module's list of output filters, as a list."""
				771	return _cpplint_state.filters
				772
				773
				774	def _SetFilters(filters):
				775	"""Sets the module's error-message filters.
				776
				777	These filters are applied when deciding whether to emit a given
				778	error message.
				779
				780	Args:
				781	filters: A string of comma-separated filters (eg "whitespace/indent").
				782	Each filter should start with + or -; else we die.
				783	"""
				784	_cpplint_state.SetFilters(filters)
				785
				786
				787	class _FunctionState(object):
				788	"""Tracks current function name and the number of lines in its body."""
				789
				790	_NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
				791	_TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
				792
				793	def __init__(self):
				794	self.in_a_function = False
				795	self.lines_in_function = 0
				796	self.current_function = ''
				797
				798	def Begin(self, function_name):
				799	"""Start analyzing function body.
				800
				801	Args:
				802	function_name: The name of the function being tracked.
				803	"""
				804	self.in_a_function = True
				805	self.lines_in_function = 0
				806	self.current_function = function_name
				807
				808	def Count(self):
				809	"""Count line in current function body."""
				810	if self.in_a_function:
				811	self.lines_in_function += 1
				812
				813	def Check(self, error, filename, linenum):
				814	"""Report if too many lines in function body.
				815
				816	Args:
				817	error: The function to call with any errors found.
				818	filename: The name of the current file.
				819	linenum: The number of the line to check.
				820	"""
				821	if Match(r'T(EST\|est)', self.current_function):
				822	base_trigger = self._TEST_TRIGGER
				823	else:
				824	base_trigger = self._NORMAL_TRIGGER
				825	trigger = base_trigger * 2**_VerboseLevel()
				826
				827	if self.lines_in_function > trigger:
				828	error_level = int(math.log(self.lines_in_function / base_trigger, 2))
				829	# 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
				830	if error_level > 5:
				831	error_level = 5
				832	error(filename, linenum, 'readability/fn_size', error_level,
				833	'Small and focused functions are preferred:'
				834	' %s has %d non-comment lines'
				835	' (error triggered by exceeding %d lines).' % (
				836	self.current_function, self.lines_in_function, trigger))
				837
				838	def End(self):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	839	"""Stop analyzing function body."""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	840	self.in_a_function = False
				841
				842
				843	class _IncludeError(Exception):
				844	"""Indicates a problem with the include order in a file."""
				845	pass
				846
				847
				848	class FileInfo:
				849	"""Provides utility functions for filenames.
				850
				851	FileInfo provides easy access to the components of a file's path
				852	relative to the project root.
				853	"""
				854
				855	def __init__(self, filename):
				856	self._filename = filename
				857
				858	def FullName(self):
				859	"""Make Windows paths like Unix."""
				860	return os.path.abspath(self._filename).replace('\\', '/')
				861
				862	def RepositoryName(self):
				863	"""FullName after removing the local path to the repository.
				864
				865	If we have a real absolute path name here we can try to do something smart:
				866	detecting the root of the checkout and truncating /path/to/checkout from
				867	the name so that we get header guards that don't include things like
				868	"C:\Documents and Settings\..." or "/home/username/..." in them and thus
				869	people on different computers who have checked the source out to different
				870	locations won't see bogus errors.
				871	"""
				872	fullname = self.FullName()
				873
				874	if os.path.exists(fullname):
				875	project_dir = os.path.dirname(fullname)
				876
				877	if os.path.exists(os.path.join(project_dir, ".svn")):
				878	# If there's a .svn file in the current directory, we recursively look
				879	# up the directory tree for the top of the SVN checkout
				880	root_dir = project_dir
				881	one_up_dir = os.path.dirname(root_dir)
				882	while os.path.exists(os.path.join(one_up_dir, ".svn")):
				883	root_dir = os.path.dirname(root_dir)
				884	one_up_dir = os.path.dirname(one_up_dir)
				885
				886	prefix = os.path.commonprefix([root_dir, project_dir])
				887	return fullname[len(prefix) + 1:]
				888
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	889	# Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
				890	# searching up from the current path.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	891	root_dir = os.path.dirname(fullname)
				892	while (root_dir != os.path.dirname(root_dir) and
erg@google.com	5e16969	2010-01-28 20:17:01 +0000	[diff] [blame]	893	not os.path.exists(os.path.join(root_dir, ".git")) and
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	894	not os.path.exists(os.path.join(root_dir, ".hg")) and
				895	not os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	896	root_dir = os.path.dirname(root_dir)
erg@google.com	42e59b0	2010-10-04 22:18:07 +0000	[diff] [blame]	897
				898	if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	899	os.path.exists(os.path.join(root_dir, ".hg")) or
				900	os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com	42e59b0	2010-10-04 22:18:07 +0000	[diff] [blame]	901	prefix = os.path.commonprefix([root_dir, project_dir])
				902	return fullname[len(prefix) + 1:]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	903
				904	# Don't know what to do; header guard warnings may be wrong...
				905	return fullname
				906
				907	def Split(self):
				908	"""Splits the file into the directory, basename, and extension.
				909
				910	For 'chrome/browser/browser.cc', Split() would
				911	return ('chrome/browser', 'browser', '.cc')
				912
				913	Returns:
				914	A tuple of (directory, basename, extension).
				915	"""
				916
				917	googlename = self.RepositoryName()
				918	project, rest = os.path.split(googlename)
				919	return (project,) + os.path.splitext(rest)
				920
				921	def BaseName(self):
				922	"""File base name - text after the final slash, before the final period."""
				923	return self.Split()[1]
				924
				925	def Extension(self):
				926	"""File extension - text following the final period."""
				927	return self.Split()[2]
				928
				929	def NoExtension(self):
				930	"""File has no source file extension."""
				931	return '/'.join(self.Split()[0:2])
				932
				933	def IsSource(self):
				934	"""File has a source file extension."""
				935	return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
				936
				937
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	938	def _ShouldPrintError(category, confidence, linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	939	"""If confidence >= verbose, category passes filter and is not suppressed."""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	940
				941	# There are three ways we might decide not to print an error message:
				942	# a "NOLINT(category)" comment appears in the source,
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	943	# the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	944	if IsErrorSuppressedByNolint(category, linenum):
				945	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	946	if confidence < _cpplint_state.verbose_level:
				947	return False
				948
				949	is_filtered = False
				950	for one_filter in _Filters():
				951	if one_filter.startswith('-'):
				952	if category.startswith(one_filter[1:]):
				953	is_filtered = True
				954	elif one_filter.startswith('+'):
				955	if category.startswith(one_filter[1:]):
				956	is_filtered = False
				957	else:
				958	assert False # should have been checked for in SetFilter.
				959	if is_filtered:
				960	return False
				961
				962	return True
				963
				964
				965	def Error(filename, linenum, category, confidence, message):
				966	"""Logs the fact we've found a lint error.
				967
				968	We log where the error was found, and also our confidence in the error,
				969	that is, how certain we are this is a legitimate style regression, and
				970	not a misidentification or a use that's sometimes justified.
				971
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	972	False positives can be suppressed by the use of
				973	"cpplint(category)" comments on the offending line. These are
				974	parsed into _error_suppressions.
				975
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	976	Args:
				977	filename: The name of the file containing the error.
				978	linenum: The number of the line containing the error.
				979	category: A string used to describe the "category" this bug
				980	falls under: "whitespace", say, or "runtime". Categories
				981	may have a hierarchy separated by slashes: "whitespace/indent".
				982	confidence: A number from 1-5 representing a confidence score for
				983	the error, with 5 meaning that we are certain of the problem,
				984	and 1 meaning that it could be a legitimate construct.
				985	message: The error message.
				986	"""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	987	if _ShouldPrintError(category, confidence, linenum):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	988	_cpplint_state.IncrementErrorCount(category)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	989	if _cpplint_state.output_format == 'vs7':
				990	sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
				991	filename, linenum, message, category, confidence))
erg@google.com	02c27fd	2013-05-28 21:34:34 +0000	[diff] [blame]	992	elif _cpplint_state.output_format == 'eclipse':
				993	sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
				994	filename, linenum, message, category, confidence))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	995	else:
				996	sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
				997	filename, linenum, message, category, confidence))
				998
				999
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1000	# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1001	_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
				1002	r'\\([abfnrtv?"\\\']\|\d+\|x[0-9a-fA-F]+)')
				1003	# Matches strings. Escape codes should already be removed by ESCAPES.
				1004	_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
				1005	# Matches characters. Escape codes should already be removed by ESCAPES.
				1006	_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
				1007	# Matches multi-line C++ comments.
				1008	# This RE is a little bit more complicated than one might expect, because we
				1009	# have to take care of space removals tools so we can handle comments inside
				1010	# statements better.
				1011	# The current rule is: We only clear spaces from both sides when we're at the
				1012	# end of the line. Otherwise, we try to remove spaces from the right side,
				1013	# if this doesn't work we try on left side but only if there's a non-character
				1014	# on the right.
				1015	_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
				1016	r"""(\s/\.\/\s*$\|
				1017	/\.\*/\s+\|
				1018	\s+/\.\*/(?=\W)\|
				1019	/\.\*/)""", re.VERBOSE)
				1020
				1021
				1022	def IsCppString(line):
				1023	"""Does line terminate so, that the next symbol is in string constant.
				1024
				1025	This function does not consider single-line nor multi-line comments.
				1026
				1027	Args:
				1028	line: is a partial line of code starting from the 0..n.
				1029
				1030	Returns:
				1031	True, if next character appended to 'line' is inside a
				1032	string constant.
				1033	"""
				1034
				1035	line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
				1036	return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
				1037
				1038
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1039	def CleanseRawStrings(raw_lines):
				1040	"""Removes C++11 raw strings from lines.
				1041
				1042	Before:
				1043	static const char kData[] = R"(
				1044	multi-line string
				1045	)";
				1046
				1047	After:
				1048	static const char kData[] = ""
				1049	(replaced by blank line)
				1050	"";
				1051
				1052	Args:
				1053	raw_lines: list of raw lines.
				1054
				1055	Returns:
				1056	list of lines with C++11 raw strings replaced by empty strings.
				1057	"""
				1058
				1059	delimiter = None
				1060	lines_without_raw_strings = []
				1061	for line in raw_lines:
				1062	if delimiter:
				1063	# Inside a raw string, look for the end
				1064	end = line.find(delimiter)
				1065	if end >= 0:
				1066	# Found the end of the string, match leading space for this
				1067	# line and resume copying the original lines, and also insert
				1068	# a "" on the last line.
				1069	leading_space = Match(r'^(\s*)\S', line)
				1070	line = leading_space.group(1) + '""' + line[end + len(delimiter):]
				1071	delimiter = None
				1072	else:
				1073	# Haven't found the end yet, append a blank line.
				1074	line = ''
				1075
				1076	else:
				1077	# Look for beginning of a raw string.
				1078	# See 2.14.15 [lex.string] for syntax.
				1079	matched = Match(r'^(.)\b(?:R\|u8R\|uR\|UR\|LR)"([^\s\\()])\((.*)$', line)
				1080	if matched:
				1081	delimiter = ')' + matched.group(2) + '"'
				1082
				1083	end = matched.group(3).find(delimiter)
				1084	if end >= 0:
				1085	# Raw string ended on same line
				1086	line = (matched.group(1) + '""' +
				1087	matched.group(3)[end + len(delimiter):])
				1088	delimiter = None
				1089	else:
				1090	# Start of a multi-line raw string
				1091	line = matched.group(1) + '""'
				1092
				1093	lines_without_raw_strings.append(line)
				1094
				1095	# TODO(unknown): if delimiter is not None here, we might want to
				1096	# emit a warning for unterminated string.
				1097	return lines_without_raw_strings
				1098
				1099
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1100	def FindNextMultiLineCommentStart(lines, lineix):
				1101	"""Find the beginning marker for a multiline comment."""
				1102	while lineix < len(lines):
				1103	if lines[lineix].strip().startswith('/*'):
				1104	# Only return this marker if the comment goes beyond this line
				1105	if lines[lineix].strip().find('*/', 2) < 0:
				1106	return lineix
				1107	lineix += 1
				1108	return len(lines)
				1109
				1110
				1111	def FindNextMultiLineCommentEnd(lines, lineix):
				1112	"""We are inside a comment, find the end marker."""
				1113	while lineix < len(lines):
				1114	if lines[lineix].strip().endswith('*/'):
				1115	return lineix
				1116	lineix += 1
				1117	return len(lines)
				1118
				1119
				1120	def RemoveMultiLineCommentsFromRange(lines, begin, end):
				1121	"""Clears a range of lines for multi-line comments."""
				1122	# Having // dummy comments makes the lines non-empty, so we will not get
				1123	# unnecessary blank line warnings later in the code.
				1124	for i in range(begin, end):
				1125	lines[i] = '// dummy'
				1126
				1127
				1128	def RemoveMultiLineComments(filename, lines, error):
				1129	"""Removes multiline (c-style) comments from lines."""
				1130	lineix = 0
				1131	while lineix < len(lines):
				1132	lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
				1133	if lineix_begin >= len(lines):
				1134	return
				1135	lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
				1136	if lineix_end >= len(lines):
				1137	error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
				1138	'Could not find end of multi-line comment')
				1139	return
				1140	RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
				1141	lineix = lineix_end + 1
				1142
				1143
				1144	def CleanseComments(line):
				1145	"""Removes //-comments and single-line C-style /* */ comments.
				1146
				1147	Args:
				1148	line: A line of C++ source.
				1149
				1150	Returns:
				1151	The line with single-line comments removed.
				1152	"""
				1153	commentpos = line.find('//')
				1154	if commentpos != -1 and not IsCppString(line[:commentpos]):
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	1155	line = line[:commentpos].rstrip()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1156	# get rid of /* ... */
				1157	return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
				1158
				1159
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1160	class CleansedLines(object):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1161	"""Holds 3 copies of all lines with different preprocessing applied to them.
				1162
				1163	1) elided member contains lines without strings and comments,
				1164	2) lines member contains lines without comments, and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1165	3) raw_lines member contains all the lines without processing.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1166	All these three members are of <type 'list'>, and of the same length.
				1167	"""
				1168
				1169	def __init__(self, lines):
				1170	self.elided = []
				1171	self.lines = []
				1172	self.raw_lines = lines
				1173	self.num_lines = len(lines)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1174	self.lines_without_raw_strings = CleanseRawStrings(lines)
				1175	for linenum in range(len(self.lines_without_raw_strings)):
				1176	self.lines.append(CleanseComments(
				1177	self.lines_without_raw_strings[linenum]))
				1178	elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1179	self.elided.append(CleanseComments(elided))
				1180
				1181	def NumLines(self):
				1182	"""Returns the number of lines represented."""
				1183	return self.num_lines
				1184
				1185	@staticmethod
				1186	def _CollapseStrings(elided):
				1187	"""Collapses strings and chars on a line to simple "" or '' blocks.
				1188
				1189	We nix strings first so we're not fooled by text like '"http://"'
				1190
				1191	Args:
				1192	elided: The line being processed.
				1193
				1194	Returns:
				1195	The line with collapsed strings.
				1196	"""
				1197	if not _RE_PATTERN_INCLUDE.match(elided):
				1198	# Remove escaped characters first to make quote/single quote collapsing
				1199	# basic. Things that look like escaped characters shouldn't occur
				1200	# outside of strings and chars.
				1201	elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
				1202	elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
				1203	elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
				1204	return elided
				1205
				1206
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1207	def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
				1208	"""Find the position just after the matching endchar.
				1209
				1210	Args:
				1211	line: a CleansedLines line.
				1212	startpos: start searching at this position.
				1213	depth: nesting level at startpos.
				1214	startchar: expression opening character.
				1215	endchar: expression closing character.
				1216
				1217	Returns:
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1218	On finding matching endchar: (index just after matching endchar, 0)
				1219	Otherwise: (-1, new depth at end of this line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1220	"""
				1221	for i in xrange(startpos, len(line)):
				1222	if line[i] == startchar:
				1223	depth += 1
				1224	elif line[i] == endchar:
				1225	depth -= 1
				1226	if depth == 0:
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1227	return (i + 1, 0)
				1228	return (-1, depth)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1229
				1230
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1231	def CloseExpression(clean_lines, linenum, pos):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1232	"""If input points to ( or { or [ or <, finds the position that closes it.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1233
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1234	If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1235	linenum/pos that correspond to the closing of the expression.
				1236
				1237	Args:
				1238	clean_lines: A CleansedLines instance containing the file.
				1239	linenum: The number of the line to check.
				1240	pos: A position on the line.
				1241
				1242	Returns:
				1243	A tuple (line, linenum, pos) pointer past the closing brace, or
				1244	(line, len(lines), -1) if we never find a close. Note we ignore
				1245	strings and comments when matching; and the line we return is the
				1246	'cleansed' line at linenum.
				1247	"""
				1248
				1249	line = clean_lines.elided[linenum]
				1250	startchar = line[pos]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1251	if startchar not in '({[<':
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1252	return (line, clean_lines.NumLines(), -1)
				1253	if startchar == '(': endchar = ')'
				1254	if startchar == '[': endchar = ']'
				1255	if startchar == '{': endchar = '}'
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1256	if startchar == '<': endchar = '>'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1257
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1258	# Check first line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1259	(end_pos, num_open) = FindEndOfExpressionInLine(
				1260	line, pos, 0, startchar, endchar)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1261	if end_pos > -1:
				1262	return (line, linenum, end_pos)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1263
				1264	# Continue scanning forward
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1265	while linenum < clean_lines.NumLines() - 1:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1266	linenum += 1
				1267	line = clean_lines.elided[linenum]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1268	(end_pos, num_open) = FindEndOfExpressionInLine(
				1269	line, 0, num_open, startchar, endchar)
				1270	if end_pos > -1:
				1271	return (line, linenum, end_pos)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1272
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1273	# Did not find endchar before end of file, give up
				1274	return (line, clean_lines.NumLines(), -1)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1275
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1276
				1277	def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
				1278	"""Find position at the matching startchar.
				1279
				1280	This is almost the reverse of FindEndOfExpressionInLine, but note
				1281	that the input position and returned position differs by 1.
				1282
				1283	Args:
				1284	line: a CleansedLines line.
				1285	endpos: start searching at this position.
				1286	depth: nesting level at endpos.
				1287	startchar: expression opening character.
				1288	endchar: expression closing character.
				1289
				1290	Returns:
				1291	On finding matching startchar: (index at matching startchar, 0)
				1292	Otherwise: (-1, new depth at beginning of this line)
				1293	"""
				1294	for i in xrange(endpos, -1, -1):
				1295	if line[i] == endchar:
				1296	depth += 1
				1297	elif line[i] == startchar:
				1298	depth -= 1
				1299	if depth == 0:
				1300	return (i, 0)
				1301	return (-1, depth)
				1302
				1303
				1304	def ReverseCloseExpression(clean_lines, linenum, pos):
				1305	"""If input points to ) or } or ] or >, finds the position that opens it.
				1306
				1307	If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
				1308	linenum/pos that correspond to the opening of the expression.
				1309
				1310	Args:
				1311	clean_lines: A CleansedLines instance containing the file.
				1312	linenum: The number of the line to check.
				1313	pos: A position on the line.
				1314
				1315	Returns:
				1316	A tuple (line, linenum, pos) pointer at the opening brace, or
				1317	(line, 0, -1) if we never find the matching opening brace. Note
				1318	we ignore strings and comments when matching; and the line we
				1319	return is the 'cleansed' line at linenum.
				1320	"""
				1321	line = clean_lines.elided[linenum]
				1322	endchar = line[pos]
				1323	if endchar not in ')}]>':
				1324	return (line, 0, -1)
				1325	if endchar == ')': startchar = '('
				1326	if endchar == ']': startchar = '['
				1327	if endchar == '}': startchar = '{'
				1328	if endchar == '>': startchar = '<'
				1329
				1330	# Check last line
				1331	(start_pos, num_open) = FindStartOfExpressionInLine(
				1332	line, pos, 0, startchar, endchar)
				1333	if start_pos > -1:
				1334	return (line, linenum, start_pos)
				1335
				1336	# Continue scanning backward
				1337	while linenum > 0:
				1338	linenum -= 1
				1339	line = clean_lines.elided[linenum]
				1340	(start_pos, num_open) = FindStartOfExpressionInLine(
				1341	line, len(line) - 1, num_open, startchar, endchar)
				1342	if start_pos > -1:
				1343	return (line, linenum, start_pos)
				1344
				1345	# Did not find startchar before beginning of file, give up
				1346	return (line, 0, -1)
				1347
				1348
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1349	def CheckForCopyright(filename, lines, error):
				1350	"""Logs an error if no Copyright message appears at the top of the file."""
				1351
				1352	# We'll say it should occur by line 10. Don't forget there's a
				1353	# dummy line at the front.
				1354	for line in xrange(1, min(len(lines), 11)):
				1355	if re.search(r'Copyright', lines[line], re.I): break
				1356	else: # means no copyright line was found
				1357	error(filename, 0, 'legal/copyright', 5,
				1358	'No copyright message found. '
				1359	'You should have a line: "Copyright [year] <Copyright Owner>"')
				1360
				1361
				1362	def GetHeaderGuardCPPVariable(filename):
				1363	"""Returns the CPP variable that should be used as a header guard.
				1364
				1365	Args:
				1366	filename: The name of a C++ header file.
				1367
				1368	Returns:
				1369	The CPP variable that should be used as a header guard in the
				1370	named file.
				1371
				1372	"""
				1373
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1374	# Restores original filename in case that cpplint is invoked from Emacs's
				1375	# flymake.
				1376	filename = re.sub(r'_flymake\.h$', '.h', filename)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1377	filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1378
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1379	fileinfo = FileInfo(filename)
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	1380	file_path_from_root = fileinfo.RepositoryName()
				1381	if _root:
				1382	file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
				1383	return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1384
				1385
				1386	def CheckForHeaderGuard(filename, lines, error):
				1387	"""Checks that the file contains a header guard.
				1388
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1389	Logs an error if no #ifndef header guard is present. For other
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1390	headers, checks that the full pathname is used.
				1391
				1392	Args:
				1393	filename: The name of the C++ header file.
				1394	lines: An array of strings, each representing a line of the file.
				1395	error: The function to call with any errors found.
				1396	"""
				1397
				1398	cppvar = GetHeaderGuardCPPVariable(filename)
				1399
				1400	ifndef = None
				1401	ifndef_linenum = 0
				1402	define = None
				1403	endif = None
				1404	endif_linenum = 0
				1405	for linenum, line in enumerate(lines):
				1406	linesplit = line.split()
				1407	if len(linesplit) >= 2:
				1408	# find the first occurrence of #ifndef and #define, save arg
				1409	if not ifndef and linesplit[0] == '#ifndef':
				1410	# set ifndef to the header guard presented on the #ifndef line.
				1411	ifndef = linesplit[1]
				1412	ifndef_linenum = linenum
				1413	if not define and linesplit[0] == '#define':
				1414	define = linesplit[1]
				1415	# find the last occurrence of #endif, save entire line
				1416	if line.startswith('#endif'):
				1417	endif = line
				1418	endif_linenum = linenum
				1419
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1420	if not ifndef:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1421	error(filename, 0, 'build/header_guard', 5,
				1422	'No #ifndef header guard found, suggested CPP variable is: %s' %
				1423	cppvar)
				1424	return
				1425
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1426	if not define:
				1427	error(filename, 0, 'build/header_guard', 5,
				1428	'No #define header guard found, suggested CPP variable is: %s' %
				1429	cppvar)
				1430	return
				1431
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1432	# The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
				1433	# for backward compatibility.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1434	if ifndef != cppvar:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1435	error_level = 0
				1436	if ifndef != cppvar + '_':
				1437	error_level = 5
				1438
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1439	ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
				1440	error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1441	error(filename, ifndef_linenum, 'build/header_guard', error_level,
				1442	'#ifndef header guard has wrong style, please use: %s' % cppvar)
				1443
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1444	if define != ifndef:
				1445	error(filename, 0, 'build/header_guard', 5,
				1446	'#ifndef and #define don\'t match, suggested CPP variable is: %s' %
				1447	cppvar)
				1448	return
				1449
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1450	if endif != ('#endif // %s' % cppvar):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1451	error_level = 0
				1452	if endif != ('#endif // %s' % (cppvar + '_')):
				1453	error_level = 5
				1454
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1455	ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
				1456	error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1457	error(filename, endif_linenum, 'build/header_guard', error_level,
				1458	'#endif line should be "#endif // %s"' % cppvar)
				1459
				1460
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1461	def CheckForBadCharacters(filename, lines, error):
				1462	"""Logs an error for each line containing bad characters.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1463
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1464	Two kinds of bad characters:
				1465
				1466	1. Unicode replacement characters: These indicate that either the file
				1467	contained invalid UTF-8 (likely) or Unicode replacement characters (which
				1468	it shouldn't). Note that it's possible for this to throw off line
				1469	numbering if the invalid UTF-8 occurred adjacent to a newline.
				1470
				1471	2. NUL bytes. These are problematic for some tools.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1472
				1473	Args:
				1474	filename: The name of the current file.
				1475	lines: An array of strings, each representing a line of the file.
				1476	error: The function to call with any errors found.
				1477	"""
				1478	for linenum, line in enumerate(lines):
				1479	if u'\ufffd' in line:
				1480	error(filename, linenum, 'readability/utf8', 5,
				1481	'Line contains invalid UTF-8 (or Unicode replacement character).')
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1482	if '\0' in line:
				1483	error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1484
				1485
				1486	def CheckForNewlineAtEOF(filename, lines, error):
				1487	"""Logs an error if there is no newline char at the end of the file.
				1488
				1489	Args:
				1490	filename: The name of the current file.
				1491	lines: An array of strings, each representing a line of the file.
				1492	error: The function to call with any errors found.
				1493	"""
				1494
				1495	# The array lines() was created by adding two newlines to the
				1496	# original file (go figure), then splitting on \n.
				1497	# To verify that the file ends in \n, we just have to make sure the
				1498	# last-but-two element of lines() exists and is empty.
				1499	if len(lines) < 3 or lines[-2]:
				1500	error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
				1501	'Could not find a newline character at the end of the file.')
				1502
				1503
				1504	def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
				1505	"""Logs an error if we see /* ... */ or "..." that extend past one line.
				1506
				1507	/* ... */ comments are legit inside macros, for one line.
				1508	Otherwise, we prefer // comments, so it's ok to warn about the
				1509	other. Likewise, it's ok for strings to extend across multiple
				1510	lines, as long as a line continuation character (backslash)
				1511	terminates each line. Although not currently prohibited by the C++
				1512	style guide, it's ugly and unnecessary. We don't do well with either
				1513	in this lint program, so we warn about both.
				1514
				1515	Args:
				1516	filename: The name of the current file.
				1517	clean_lines: A CleansedLines instance containing the file.
				1518	linenum: The number of the line to check.
				1519	error: The function to call with any errors found.
				1520	"""
				1521	line = clean_lines.elided[linenum]
				1522
				1523	# Remove all \\ (escaped backslashes) from the line. They are OK, and the
				1524	# second (escaped) slash may trigger later \" detection erroneously.
				1525	line = line.replace('\\\\', '')
				1526
				1527	if line.count('/') > line.count('/'):
				1528	error(filename, linenum, 'readability/multiline_comment', 5,
				1529	'Complex multi-line /.../-style comment found. '
				1530	'Lint may give bogus warnings. '
				1531	'Consider replacing these with //-style comments, '
				1532	'with #if 0...#endif, '
				1533	'or with more clearly structured multi-line comments.')
				1534
				1535	if (line.count('"') - line.count('\\"')) % 2:
				1536	error(filename, linenum, 'readability/multiline_string', 5,
				1537	'Multi-line string ("...") found. This lint script doesn\'t '
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1538	'do well with such strings, and may give bogus warnings. '
				1539	'Use C++11 raw strings or concatenation instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1540
				1541
				1542	threading_list = (
				1543	('asctime(', 'asctime_r('),
				1544	('ctime(', 'ctime_r('),
				1545	('getgrgid(', 'getgrgid_r('),
				1546	('getgrnam(', 'getgrnam_r('),
				1547	('getlogin(', 'getlogin_r('),
				1548	('getpwnam(', 'getpwnam_r('),
				1549	('getpwuid(', 'getpwuid_r('),
				1550	('gmtime(', 'gmtime_r('),
				1551	('localtime(', 'localtime_r('),
				1552	('rand(', 'rand_r('),
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1553	('strtok(', 'strtok_r('),
				1554	('ttyname(', 'ttyname_r('),
				1555	)
				1556
				1557
				1558	def CheckPosixThreading(filename, clean_lines, linenum, error):
				1559	"""Checks for calls to thread-unsafe functions.
				1560
				1561	Much code has been originally written without consideration of
				1562	multi-threading. Also, engineers are relying on their old experience;
				1563	they have learned posix before threading extensions were added. These
				1564	tests guide the engineers to use thread-safe functions (when using
				1565	posix directly).
				1566
				1567	Args:
				1568	filename: The name of the current file.
				1569	clean_lines: A CleansedLines instance containing the file.
				1570	linenum: The number of the line to check.
				1571	error: The function to call with any errors found.
				1572	"""
				1573	line = clean_lines.elided[linenum]
				1574	for single_thread_function, multithread_safe_function in threading_list:
				1575	ix = line.find(single_thread_function)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1576	# Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1577	if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
				1578	line[ix - 1] not in ('_', '.', '>'))):
				1579	error(filename, linenum, 'runtime/threadsafe_fn', 2,
				1580	'Consider using ' + multithread_safe_function +
				1581	'...) instead of ' + single_thread_function +
				1582	'...) for improved thread safety.')
				1583
				1584
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1585	def CheckVlogArguments(filename, clean_lines, linenum, error):
				1586	"""Checks that VLOG() is only used for defining a logging level.
				1587
				1588	For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
				1589	VLOG(FATAL) are not.
				1590
				1591	Args:
				1592	filename: The name of the current file.
				1593	clean_lines: A CleansedLines instance containing the file.
				1594	linenum: The number of the line to check.
				1595	error: The function to call with any errors found.
				1596	"""
				1597	line = clean_lines.elided[linenum]
				1598	if Search(r'\bVLOG$(INFO\|ERROR\|WARNING\|DFATAL\|FATAL)$', line):
				1599	error(filename, linenum, 'runtime/vlog', 5,
				1600	'VLOG() should be used with numeric verbosity level. '
				1601	'Use LOG() if you want symbolic severity levels.')
				1602
				1603
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1604	# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1605	# incrementing a value.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1606	_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1607	r'^\s\\w+(\+\+\|--);')
				1608
				1609
				1610	def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1611	"""Checks for invalid increment *count++.
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1612
				1613	For example following function:
				1614	void increment_counter(int* count) {
				1615	*count++;
				1616	}
				1617	is invalid, because it effectively does count++, moving pointer, and should
				1618	be replaced with ++count, (count)++ or *count += 1.
				1619
				1620	Args:
				1621	filename: The name of the current file.
				1622	clean_lines: A CleansedLines instance containing the file.
				1623	linenum: The number of the line to check.
				1624	error: The function to call with any errors found.
				1625	"""
				1626	line = clean_lines.elided[linenum]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1627	if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1628	error(filename, linenum, 'runtime/invalid_increment', 5,
				1629	'Changing pointer instead of value (or unused value of operator*).')
				1630
				1631
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1632	class _BlockInfo(object):
				1633	"""Stores information about a generic block of code."""
				1634
				1635	def __init__(self, seen_open_brace):
				1636	self.seen_open_brace = seen_open_brace
				1637	self.open_parentheses = 0
				1638	self.inline_asm = _NO_ASM
				1639
				1640	def CheckBegin(self, filename, clean_lines, linenum, error):
				1641	"""Run checks that applies to text up to the opening brace.
				1642
				1643	This is mostly for checking the text after the class identifier
				1644	and the "{", usually where the base class is specified. For other
				1645	blocks, there isn't much to check, so we always pass.
				1646
				1647	Args:
				1648	filename: The name of the current file.
				1649	clean_lines: A CleansedLines instance containing the file.
				1650	linenum: The number of the line to check.
				1651	error: The function to call with any errors found.
				1652	"""
				1653	pass
				1654
				1655	def CheckEnd(self, filename, clean_lines, linenum, error):
				1656	"""Run checks that applies to text after the closing brace.
				1657
				1658	This is mostly used for checking end of namespace comments.
				1659
				1660	Args:
				1661	filename: The name of the current file.
				1662	clean_lines: A CleansedLines instance containing the file.
				1663	linenum: The number of the line to check.
				1664	error: The function to call with any errors found.
				1665	"""
				1666	pass
				1667
				1668
				1669	class _ClassInfo(_BlockInfo):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1670	"""Stores information about a class."""
				1671
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1672	def __init__(self, name, class_or_struct, clean_lines, linenum):
				1673	_BlockInfo.__init__(self, False)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1674	self.name = name
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1675	self.starting_linenum = linenum
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1676	self.is_derived = False
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1677	if class_or_struct == 'struct':
				1678	self.access = 'public'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1679	self.is_struct = True
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1680	else:
				1681	self.access = 'private'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1682	self.is_struct = False
				1683
				1684	# Remember initial indentation level for this class. Using raw_lines here
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1685	# instead of elided to account for leading comments.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1686	initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
				1687	if initial_indent:
				1688	self.class_indent = len(initial_indent.group(1))
				1689	else:
				1690	self.class_indent = 0
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1691
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	1692	# Try to find the end of the class. This will be confused by things like:
				1693	# class A {
				1694	# } *x = { ...
				1695	#
				1696	# But it's still good enough for CheckSectionSpacing.
				1697	self.last_line = 0
				1698	depth = 0
				1699	for i in range(linenum, clean_lines.NumLines()):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1700	line = clean_lines.elided[i]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	1701	depth += line.count('{') - line.count('}')
				1702	if not depth:
				1703	self.last_line = i
				1704	break
				1705
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1706	def CheckBegin(self, filename, clean_lines, linenum, error):
				1707	# Look for a bare ':'
				1708	if Search('(^\|[^:]):($\|[^:])', clean_lines.elided[linenum]):
				1709	self.is_derived = True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1710
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1711	def CheckEnd(self, filename, clean_lines, linenum, error):
				1712	# Check that closing brace is aligned with beginning of the class.
				1713	# Only do this if the closing brace is indented by only whitespaces.
				1714	# This means we will not check single-line class definitions.
				1715	indent = Match(r'^( *)\}', clean_lines.elided[linenum])
				1716	if indent and len(indent.group(1)) != self.class_indent:
				1717	if self.is_struct:
				1718	parent = 'struct ' + self.name
				1719	else:
				1720	parent = 'class ' + self.name
				1721	error(filename, linenum, 'whitespace/indent', 3,
				1722	'Closing brace should be aligned with beginning of %s' % parent)
				1723
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1724
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1725	class _NamespaceInfo(_BlockInfo):
				1726	"""Stores information about a namespace."""
				1727
				1728	def __init__(self, name, linenum):
				1729	_BlockInfo.__init__(self, False)
				1730	self.name = name or ''
				1731	self.starting_linenum = linenum
				1732
				1733	def CheckEnd(self, filename, clean_lines, linenum, error):
				1734	"""Check end of namespace comments."""
				1735	line = clean_lines.raw_lines[linenum]
				1736
				1737	# Check how many lines is enclosed in this namespace. Don't issue
				1738	# warning for missing namespace comments if there aren't enough
				1739	# lines. However, do apply checks if there is already an end of
				1740	# namespace comment and it's incorrect.
				1741	#
				1742	# TODO(unknown): We always want to check end of namespace comments
				1743	# if a namespace is large, but sometimes we also want to apply the
				1744	# check if a short namespace contained nontrivial things (something
				1745	# other than forward declarations). There is currently no logic on
				1746	# deciding what these nontrivial things are, so this check is
				1747	# triggered by namespace size only, which works most of the time.
				1748	if (linenum - self.starting_linenum < 10
				1749	and not Match(r'};\s(//\|/\).\bnamespace\b', line)):
				1750	return
				1751
				1752	# Look for matching comment at end of namespace.
				1753	#
				1754	# Note that we accept C style "/* */" comments for terminating
				1755	# namespaces, so that code that terminate namespaces inside
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1756	# preprocessor macros can be cpplint clean.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1757	#
				1758	# We also accept stuff like "// end of namespace <name>." with the
				1759	# period at the end.
				1760	#
				1761	# Besides these, we don't accept anything else, otherwise we might
				1762	# get false negatives when existing comment is a substring of the
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1763	# expected namespace.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1764	if self.name:
				1765	# Named namespace
				1766	if not Match((r'};\s(//\|/\).\bnamespace\s+' + re.escape(self.name) +
				1767	r'[\/\.\\\s]$'),
				1768	line):
				1769	error(filename, linenum, 'readability/namespace', 5,
				1770	'Namespace should be terminated with "// namespace %s"' %
				1771	self.name)
				1772	else:
				1773	# Anonymous namespace
				1774	if not Match(r'};\s(//\|/\).\bnamespace[\/\.\\\s]$', line):
				1775	error(filename, linenum, 'readability/namespace', 5,
				1776	'Namespace should be terminated with "// namespace"')
				1777
				1778
				1779	class _PreprocessorInfo(object):
				1780	"""Stores checkpoints of nesting stacks when #if/#else is seen."""
				1781
				1782	def __init__(self, stack_before_if):
				1783	# The entire nesting stack before #if
				1784	self.stack_before_if = stack_before_if
				1785
				1786	# The entire nesting stack up to #else
				1787	self.stack_before_else = []
				1788
				1789	# Whether we have already seen #else or #elif
				1790	self.seen_else = False
				1791
				1792
				1793	class _NestingState(object):
				1794	"""Holds states related to parsing braces."""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1795
				1796	def __init__(self):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1797	# Stack for tracking all braces. An object is pushed whenever we
				1798	# see a "{", and popped when we see a "}". Only 3 types of
				1799	# objects are possible:
				1800	# - _ClassInfo: a class or struct.
				1801	# - _NamespaceInfo: a namespace.
				1802	# - _BlockInfo: some other type of block.
				1803	self.stack = []
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1804
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1805	# Stack of _PreprocessorInfo objects.
				1806	self.pp_stack = []
				1807
				1808	def SeenOpenBrace(self):
				1809	"""Check if we have seen the opening brace for the innermost block.
				1810
				1811	Returns:
				1812	True if we have seen the opening brace, False if the innermost
				1813	block is still expecting an opening brace.
				1814	"""
				1815	return (not self.stack) or self.stack[-1].seen_open_brace
				1816
				1817	def InNamespaceBody(self):
				1818	"""Check if we are currently one level inside a namespace body.
				1819
				1820	Returns:
				1821	True if top of the stack is a namespace block, False otherwise.
				1822	"""
				1823	return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
				1824
				1825	def UpdatePreprocessor(self, line):
				1826	"""Update preprocessor stack.
				1827
				1828	We need to handle preprocessors due to classes like this:
				1829	#ifdef SWIG
				1830	struct ResultDetailsPageElementExtensionPoint {
				1831	#else
				1832	struct ResultDetailsPageElementExtensionPoint : public Extension {
				1833	#endif
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1834
				1835	We make the following assumptions (good enough for most files):
				1836	- Preprocessor condition evaluates to true from #if up to first
				1837	#else/#elif/#endif.
				1838
				1839	- Preprocessor condition evaluates to false from #else/#elif up
				1840	to #endif. We still perform lint checks on these lines, but
				1841	these do not affect nesting stack.
				1842
				1843	Args:
				1844	line: current line to check.
				1845	"""
				1846	if Match(r'^\s#\s(if\|ifdef\|ifndef)\b', line):
				1847	# Beginning of #if block, save the nesting stack here. The saved
				1848	# stack will allow us to restore the parsing state in the #else case.
				1849	self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
				1850	elif Match(r'^\s#\s(else\|elif)\b', line):
				1851	# Beginning of #else block
				1852	if self.pp_stack:
				1853	if not self.pp_stack[-1].seen_else:
				1854	# This is the first #else or #elif block. Remember the
				1855	# whole nesting stack up to this point. This is what we
				1856	# keep after the #endif.
				1857	self.pp_stack[-1].seen_else = True
				1858	self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
				1859
				1860	# Restore the stack to how it was before the #if
				1861	self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
				1862	else:
				1863	# TODO(unknown): unexpected #else, issue warning?
				1864	pass
				1865	elif Match(r'^\s#\sendif\b', line):
				1866	# End of #if or #else blocks.
				1867	if self.pp_stack:
				1868	# If we saw an #else, we will need to restore the nesting
				1869	# stack to its former state before the #else, otherwise we
				1870	# will just continue from where we left off.
				1871	if self.pp_stack[-1].seen_else:
				1872	# Here we can just use a shallow copy since we are the last
				1873	# reference to it.
				1874	self.stack = self.pp_stack[-1].stack_before_else
				1875	# Drop the corresponding #if
				1876	self.pp_stack.pop()
				1877	else:
				1878	# TODO(unknown): unexpected #endif, issue warning?
				1879	pass
				1880
				1881	def Update(self, filename, clean_lines, linenum, error):
				1882	"""Update nesting state with current line.
				1883
				1884	Args:
				1885	filename: The name of the current file.
				1886	clean_lines: A CleansedLines instance containing the file.
				1887	linenum: The number of the line to check.
				1888	error: The function to call with any errors found.
				1889	"""
				1890	line = clean_lines.elided[linenum]
				1891
				1892	# Update pp_stack first
				1893	self.UpdatePreprocessor(line)
				1894
				1895	# Count parentheses. This is to avoid adding struct arguments to
				1896	# the nesting stack.
				1897	if self.stack:
				1898	inner_block = self.stack[-1]
				1899	depth_change = line.count('(') - line.count(')')
				1900	inner_block.open_parentheses += depth_change
				1901
				1902	# Also check if we are starting or ending an inline assembly block.
				1903	if inner_block.inline_asm in (_NO_ASM, _END_ASM):
				1904	if (depth_change != 0 and
				1905	inner_block.open_parentheses == 1 and
				1906	_MATCH_ASM.match(line)):
				1907	# Enter assembly block
				1908	inner_block.inline_asm = _INSIDE_ASM
				1909	else:
				1910	# Not entering assembly block. If previous line was _END_ASM,
				1911	# we will now shift to _NO_ASM state.
				1912	inner_block.inline_asm = _NO_ASM
				1913	elif (inner_block.inline_asm == _INSIDE_ASM and
				1914	inner_block.open_parentheses == 0):
				1915	# Exit assembly block
				1916	inner_block.inline_asm = _END_ASM
				1917
				1918	# Consume namespace declaration at the beginning of the line. Do
				1919	# this in a loop so that we catch same line declarations like this:
				1920	# namespace proto2 { namespace bridge { class MessageSet; } }
				1921	while True:
				1922	# Match start of namespace. The "\b\s*" below catches namespace
				1923	# declarations even if it weren't followed by a whitespace, this
				1924	# is so that we don't confuse our namespace checker. The
				1925	# missing spaces will be flagged by CheckSpacing.
				1926	namespace_decl_match = Match(r'^\snamespace\b\s([:\w]+)?(.*)$', line)
				1927	if not namespace_decl_match:
				1928	break
				1929
				1930	new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
				1931	self.stack.append(new_namespace)
				1932
				1933	line = namespace_decl_match.group(2)
				1934	if line.find('{') != -1:
				1935	new_namespace.seen_open_brace = True
				1936	line = line[line.find('{') + 1:]
				1937
				1938	# Look for a class declaration in whatever is left of the line
				1939	# after parsing namespaces. The regexp accounts for decorated classes
				1940	# such as in:
				1941	# class LOCKABLE API Object {
				1942	# };
				1943	#
				1944	# Templates with class arguments may confuse the parser, for example:
				1945	# template <class T
				1946	# class Comparator = less<T>,
				1947	# class Vector = vector<T> >
				1948	# class HeapQueue {
				1949	#
				1950	# Because this parser has no nesting state about templates, by the
				1951	# time it saw "class Comparator", it may think that it's a new class.
				1952	# Nested templates have a similar problem:
				1953	# template <
				1954	# typename ExportedType,
				1955	# typename TupleType,
				1956	# template <typename, typename> class ImplTemplate>
				1957	#
				1958	# To avoid these cases, we ignore classes that are followed by '=' or '>'
				1959	class_decl_match = Match(
				1960	r'\s(template\s<[\w\s<>,:]>\s)?'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1961	r'(class\|struct)\s+([A-Z_]+\s+)(\w+(?:::\w+))'
				1962	r'(([^=>]\|<[^<>]>\|<[^<>]<[^<>]>\s>)*)$', line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1963	if (class_decl_match and
				1964	(not self.stack or self.stack[-1].open_parentheses == 0)):
				1965	self.stack.append(_ClassInfo(
				1966	class_decl_match.group(4), class_decl_match.group(2),
				1967	clean_lines, linenum))
				1968	line = class_decl_match.group(5)
				1969
				1970	# If we have not yet seen the opening brace for the innermost block,
				1971	# run checks here.
				1972	if not self.SeenOpenBrace():
				1973	self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
				1974
				1975	# Update access control if we are inside a class/struct
				1976	if self.stack and isinstance(self.stack[-1], _ClassInfo):
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1977	classinfo = self.stack[-1]
				1978	access_match = Match(
				1979	r'^(.)\b(public\|private\|protected\|signals)(\s+(?:slots\s)?)?'
				1980	r':(?:[^:]\|$)',
				1981	line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1982	if access_match:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1983	classinfo.access = access_match.group(2)
				1984
				1985	# Check that access keywords are indented +1 space. Skip this
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1986	# check if the keywords are not preceded by whitespaces.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1987	indent = access_match.group(1)
				1988	if (len(indent) != classinfo.class_indent + 1 and
				1989	Match(r'^\s*$', indent)):
				1990	if classinfo.is_struct:
				1991	parent = 'struct ' + classinfo.name
				1992	else:
				1993	parent = 'class ' + classinfo.name
				1994	slots = ''
				1995	if access_match.group(3):
				1996	slots = access_match.group(3)
				1997	error(filename, linenum, 'whitespace/indent', 3,
				1998	'%s%s: should be indented +1 space inside %s' % (
				1999	access_match.group(2), slots, parent))
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2000
				2001	# Consume braces or semicolons from what's left of the line
				2002	while True:
				2003	# Match first brace, semicolon, or closed parenthesis.
				2004	matched = Match(r'^[^{;)}]([{;)}])(.)$', line)
				2005	if not matched:
				2006	break
				2007
				2008	token = matched.group(1)
				2009	if token == '{':
				2010	# If namespace or class hasn't seen a opening brace yet, mark
				2011	# namespace/class head as complete. Push a new block onto the
				2012	# stack otherwise.
				2013	if not self.SeenOpenBrace():
				2014	self.stack[-1].seen_open_brace = True
				2015	else:
				2016	self.stack.append(_BlockInfo(True))
				2017	if _MATCH_ASM.match(line):
				2018	self.stack[-1].inline_asm = _BLOCK_ASM
				2019	elif token == ';' or token == ')':
				2020	# If we haven't seen an opening brace yet, but we already saw
				2021	# a semicolon, this is probably a forward declaration. Pop
				2022	# the stack for these.
				2023	#
				2024	# Similarly, if we haven't seen an opening brace yet, but we
				2025	# already saw a closing parenthesis, then these are probably
				2026	# function arguments with extra "class" or "struct" keywords.
				2027	# Also pop these stack for these.
				2028	if not self.SeenOpenBrace():
				2029	self.stack.pop()
				2030	else: # token == '}'
				2031	# Perform end of block checks and pop the stack.
				2032	if self.stack:
				2033	self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
				2034	self.stack.pop()
				2035	line = matched.group(2)
				2036
				2037	def InnermostClass(self):
				2038	"""Get class info on the top of the stack.
				2039
				2040	Returns:
				2041	A _ClassInfo object if we are inside a class, or None otherwise.
				2042	"""
				2043	for i in range(len(self.stack), 0, -1):
				2044	classinfo = self.stack[i - 1]
				2045	if isinstance(classinfo, _ClassInfo):
				2046	return classinfo
				2047	return None
				2048
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2049	def CheckCompletedBlocks(self, filename, error):
				2050	"""Checks that all classes and namespaces have been completely parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2051
				2052	Call this when all lines in a file have been processed.
				2053	Args:
				2054	filename: The name of the current file.
				2055	error: The function to call with any errors found.
				2056	"""
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2057	# Note: This test can result in false positives if #ifdef constructs
				2058	# get in the way of brace matching. See the testBuildClass test in
				2059	# cpplint_unittest.py for an example of this.
				2060	for obj in self.stack:
				2061	if isinstance(obj, _ClassInfo):
				2062	error(filename, obj.starting_linenum, 'build/class', 5,
				2063	'Failed to find complete declaration of class %s' %
				2064	obj.name)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2065	elif isinstance(obj, _NamespaceInfo):
				2066	error(filename, obj.starting_linenum, 'build/namespaces', 5,
				2067	'Failed to find complete declaration of namespace %s' %
				2068	obj.name)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2069
				2070
				2071	def CheckForNonStandardConstructs(filename, clean_lines, linenum,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2072	nesting_state, error):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2073	r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2074
				2075	Complain about several constructs which gcc-2 accepts, but which are
				2076	not standard C++. Warning about these in lint is one way to ease the
				2077	transition to new compilers.
				2078	- put storage class first (e.g. "static const" instead of "const static").
				2079	- "%lld" instead of %qd" in printf-type functions.
				2080	- "%1$d" is non-standard in printf-type functions.
				2081	- "\%" is an undefined character escape sequence.
				2082	- text after #endif is not allowed.
				2083	- invalid inner-style forward declaration.
				2084	- >? and <? operators, and their >?= and <?= cousins.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2085
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2086	Additionally, check for constructor/destructor style violations and reference
				2087	members, as it is very convenient to do so while checking for
				2088	gcc-2 compliance.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2089
				2090	Args:
				2091	filename: The name of the current file.
				2092	clean_lines: A CleansedLines instance containing the file.
				2093	linenum: The number of the line to check.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2094	nesting_state: A _NestingState instance which maintains information about
				2095	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2096	error: A callable to which errors are reported, which takes 4 arguments:
				2097	filename, line number, error level, and message
				2098	"""
				2099
				2100	# Remove comments from the line, but leave in strings for now.
				2101	line = clean_lines.lines[linenum]
				2102
				2103	if Search(r'printf\s\(.".%[-+ ]?\dq', line):
				2104	error(filename, linenum, 'runtime/printf_format', 3,
				2105	'%q in format strings is deprecated. Use %ll instead.')
				2106
				2107	if Search(r'printf\s\(.".*%\d+\$', line):
				2108	error(filename, linenum, 'runtime/printf_format', 2,
				2109	'%N$ formats are unconventional. Try rewriting to avoid them.')
				2110
				2111	# Remove escaped backslashes before looking for undefined escapes.
				2112	line = line.replace('\\\\', '')
				2113
				2114	if Search(r'("\|\').*\\(%\|\[\|\(\|{)', line):
				2115	error(filename, linenum, 'build/printf_format', 3,
				2116	'%, [, (, and { are undefined character escapes. Unescape them.')
				2117
				2118	# For the rest, work with both comments and strings removed.
				2119	line = clean_lines.elided[linenum]
				2120
				2121	if Search(r'\b(const\|volatile\|void\|char\|short\|int\|long'
				2122	r'\|float\|double\|signed\|unsigned'
				2123	r'\|schar\|u?int8\|u?int16\|u?int32\|u?int64)'
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2124	r'\s+(register\|static\|extern\|typedef)\b',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2125	line):
				2126	error(filename, linenum, 'build/storage_class', 5,
				2127	'Storage class (static, extern, typedef, etc) should be first.')
				2128
				2129	if Match(r'\s#\sendif\s*[^/\s]+', line):
				2130	error(filename, linenum, 'build/endif_comment', 5,
				2131	'Uncommented text after #endif is non-standard. Use a comment.')
				2132
				2133	if Match(r'\sclass\s+(\w+\s::\s)+\w+\s;', line):
				2134	error(filename, linenum, 'build/forward_decl', 5,
				2135	'Inner-style forward declarations are invalid. Remove this line.')
				2136
				2137	if Search(r'(\w+\|[+-]?\d+(\.\d)?)\s(<\|>)\?=?\s(\w+\|[+-]?\d+)(\.\d)?',
				2138	line):
				2139	error(filename, linenum, 'build/deprecated', 3,
				2140	'>? and <? (max and min) operators are non-standard and deprecated.')
				2141
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2142	if Search(r'^\sconst\sstring\s&\s\w+\s*;', line):
				2143	# TODO(unknown): Could it be expanded safely to arbitrary references,
				2144	# without triggering too many false positives? The first
				2145	# attempt triggered 5 warnings for mostly benign code in the regtest, hence
				2146	# the restriction.
				2147	# Here's the original regexp, for the reference:
				2148	# type_name = r'\w+((\s::\s\w+)\|(\s<\s\w+?\s*>))?'
				2149	# r'\sconst\s' + type_name + '\s&\s\w+\s*;'
				2150	error(filename, linenum, 'runtime/member_string_references', 2,
				2151	'const string& members are dangerous. It is much better to use '
				2152	'alternatives, such as pointers or simple constants.')
				2153
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2154	# Everything else in this function operates on class declarations.
				2155	# Return early if the top of the nesting stack is not a class, or if
				2156	# the class head is not completed yet.
				2157	classinfo = nesting_state.InnermostClass()
				2158	if not classinfo or not classinfo.seen_open_brace:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2159	return
				2160
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2161	# The class may have been declared with namespace or classname qualifiers.
				2162	# The constructor and destructor will not have those qualifiers.
				2163	base_classname = classinfo.name.split('::')[-1]
				2164
				2165	# Look for single-argument constructors that aren't marked explicit.
				2166	# Technically a valid construct, but against style.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2167	args = Match(r'\s+(?:inline\s+)?%s\s*$([^,()]+)$'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2168	% re.escape(base_classname),
				2169	line)
				2170	if (args and
				2171	args.group(1) != 'void' and
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	2172	not Match(r'(const\s+)?%s(\s+const)?\s(?:<\w+>\s)?&'
				2173	% re.escape(base_classname), args.group(1).strip())):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2174	error(filename, linenum, 'runtime/explicit', 5,
				2175	'Single-argument constructors should be marked explicit.')
				2176
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2177
				2178	def CheckSpacingForFunctionCall(filename, line, linenum, error):
				2179	"""Checks for the correctness of various spacing around function calls.
				2180
				2181	Args:
				2182	filename: The name of the current file.
				2183	line: The text of the line to check.
				2184	linenum: The number of the line to check.
				2185	error: The function to call with any errors found.
				2186	"""
				2187
				2188	# Since function calls often occur inside if/for/while/switch
				2189	# expressions - which have their own, more liberal conventions - we
				2190	# first see if we should be looking inside such an expression for a
				2191	# function call, to which we can apply more strict standards.
				2192	fncall = line # if there's no control flow construct, look at whole line
				2193	for pattern in (r'\bif\s$(.)$\s*{',
				2194	r'\bfor\s$(.)$\s*{',
				2195	r'\bwhile\s$(.)$\s*[{;]',
				2196	r'\bswitch\s$(.)$\s*{'):
				2197	match = Search(pattern, line)
				2198	if match:
				2199	fncall = match.group(1) # look inside the parens for function calls
				2200	break
				2201
				2202	# Except in if/for/while/switch, there should never be space
				2203	# immediately inside parens (eg "f( 3, 4 )"). We make an exception
				2204	# for nested parens ( (a+b) + c ). Likewise, there should never be
				2205	# a space before a ( when it's a function argument. I assume it's a
				2206	# function argument when the char before the whitespace is legal in
				2207	# a function name (alnum + _) and we're not starting a macro. Also ignore
				2208	# pointers and references to arrays and functions coz they're too tricky:
				2209	# we use a very simple way to recognize these:
				2210	# " (something)(maybe-something)" or
				2211	# " (something)(maybe-something," or
				2212	# " (something)[something]"
				2213	# Note that we assume the contents of [] to be short enough that
				2214	# they'll never need to wrap.
				2215	if ( # Ignore control structures.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2216	not Search(r'\b(if\|for\|while\|switch\|return\|new\|delete\|catch\|sizeof)\b',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2217	fncall) and
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2218	# Ignore pointers/references to functions.
				2219	not Search(r' $[^)]+$$[^)]*($\|,$)', fncall) and
				2220	# Ignore pointers/references to arrays.
				2221	not Search(r' $[^)]+$\[[^\]]+\]', fncall)):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	2222	if Search(r'\w\s\(\s(?!\s\\$)', fncall): # a ( used for a fn call
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2223	error(filename, linenum, 'whitespace/parens', 4,
				2224	'Extra space after ( in function call')
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	2225	elif Search(r'$\s+(?!(\s*\$\|\()', fncall):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2226	error(filename, linenum, 'whitespace/parens', 2,
				2227	'Extra space after (')
				2228	if (Search(r'\w\s+\(', fncall) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2229	not Search(r'#\s*define\|typedef', fncall) and
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2230	not Search(r'\w\s+$(\w+::)\\w+$\(', fncall)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2231	error(filename, linenum, 'whitespace/parens', 4,
				2232	'Extra space before ( in function call')
				2233	# If the ) is followed only by a newline or a { + newline, assume it's
				2234	# part of a control statement (if/while/etc), and don't complain
				2235	if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2236	# If the closing parenthesis is preceded by only whitespaces,
				2237	# try to give a more descriptive error message.
				2238	if Search(r'^\s+\)', fncall):
				2239	error(filename, linenum, 'whitespace/parens', 2,
				2240	'Closing ) should be moved to the previous line')
				2241	else:
				2242	error(filename, linenum, 'whitespace/parens', 2,
				2243	'Extra space before )')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2244
				2245
				2246	def IsBlankLine(line):
				2247	"""Returns true if the given line is blank.
				2248
				2249	We consider a line to be blank if the line is empty or consists of
				2250	only white spaces.
				2251
				2252	Args:
				2253	line: A line of a string.
				2254
				2255	Returns:
				2256	True, if the given line is blank.
				2257	"""
				2258	return not line or line.isspace()
				2259
				2260
				2261	def CheckForFunctionLengths(filename, clean_lines, linenum,
				2262	function_state, error):
				2263	"""Reports for long function bodies.
				2264
				2265	For an overview why this is done, see:
				2266	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
				2267
				2268	Uses a simplistic algorithm assuming other style guidelines
				2269	(especially spacing) are followed.
				2270	Only checks unindented functions, so class members are unchecked.
				2271	Trivial bodies are unchecked, so constructors with huge initializer lists
				2272	may be missed.
				2273	Blank/comment lines are not counted so as to avoid encouraging the removal
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2274	of vertical space and comments just to get through a lint check.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2275	NOLINT on the last line of a function disables this check.
				2276
				2277	Args:
				2278	filename: The name of the current file.
				2279	clean_lines: A CleansedLines instance containing the file.
				2280	linenum: The number of the line to check.
				2281	function_state: Current function name and lines in body so far.
				2282	error: The function to call with any errors found.
				2283	"""
				2284	lines = clean_lines.lines
				2285	line = lines[linenum]
				2286	raw = clean_lines.raw_lines
				2287	raw_line = raw[linenum]
				2288	joined_line = ''
				2289
				2290	starting_func = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2291	regexp = r'(\w(\w\|::\|\\|\&\|\s))\(' # decls * & space::name( ...
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2292	match_result = Match(regexp, line)
				2293	if match_result:
				2294	# If the name is all caps and underscores, figure it's a macro and
				2295	# ignore it, unless it's TEST or TEST_F.
				2296	function_name = match_result.group(1).split()[-1]
				2297	if function_name == 'TEST' or function_name == 'TEST_F' or (
				2298	not Match(r'[A-Z_]+$', function_name)):
				2299	starting_func = True
				2300
				2301	if starting_func:
				2302	body_found = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2303	for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2304	start_line = lines[start_linenum]
				2305	joined_line += ' ' + start_line.lstrip()
				2306	if Search(r'(;\|})', start_line): # Declarations and trivial functions
				2307	body_found = True
				2308	break # ... ignore
				2309	elif Search(r'{', start_line):
				2310	body_found = True
				2311	function = Search(r'((\w\|:)*)\(', line).group(1)
				2312	if Match(r'TEST', function): # Handle TEST... macros
				2313	parameter_regexp = Search(r'($.*$)', joined_line)
				2314	if parameter_regexp: # Ignore bad syntax
				2315	function += parameter_regexp.group(1)
				2316	else:
				2317	function += '()'
				2318	function_state.Begin(function)
				2319	break
				2320	if not body_found:
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2321	# No body for the function (or evidence of a non-function) was found.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2322	error(filename, linenum, 'readability/fn_size', 5,
				2323	'Lint failed to find start of function body.')
				2324	elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2325	function_state.Check(error, filename, linenum)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2326	function_state.End()
				2327	elif not Match(r'^\s*$', line):
				2328	function_state.Count() # Count non-blank/non-comment lines.
				2329
				2330
				2331	_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO($.+?$)?:?(\s\|$)?')
				2332
				2333
				2334	def CheckComment(comment, filename, linenum, error):
				2335	"""Checks for common mistakes in TODO comments.
				2336
				2337	Args:
				2338	comment: The text of the comment from the line in question.
				2339	filename: The name of the current file.
				2340	linenum: The number of the line to check.
				2341	error: The function to call with any errors found.
				2342	"""
				2343	match = _RE_PATTERN_TODO.match(comment)
				2344	if match:
				2345	# One whitespace is correct; zero whitespace is handled elsewhere.
				2346	leading_whitespace = match.group(1)
				2347	if len(leading_whitespace) > 1:
				2348	error(filename, linenum, 'whitespace/todo', 2,
				2349	'Too many spaces before TODO')
				2350
				2351	username = match.group(2)
				2352	if not username:
				2353	error(filename, linenum, 'readability/todo', 2,
				2354	'Missing username in TODO; it should look like '
				2355	'"// TODO(my_username): Stuff."')
				2356
				2357	middle_whitespace = match.group(3)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2358	# Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2359	if middle_whitespace != ' ' and middle_whitespace != '':
				2360	error(filename, linenum, 'whitespace/todo', 2,
				2361	'TODO(my_username) should be followed by a space')
				2362
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2363	def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
				2364	"""Checks for improper use of DISALLOW* macros.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2365
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2366	Args:
				2367	filename: The name of the current file.
				2368	clean_lines: A CleansedLines instance containing the file.
				2369	linenum: The number of the line to check.
				2370	nesting_state: A _NestingState instance which maintains information about
				2371	the current stack of nested blocks being parsed.
				2372	error: The function to call with any errors found.
				2373	"""
				2374	line = clean_lines.elided[linenum] # get rid of comments and strings
				2375
				2376	matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN\|'
				2377	r'DISALLOW_EVIL_CONSTRUCTORS\|'
				2378	r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
				2379	if not matched:
				2380	return
				2381	if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
				2382	if nesting_state.stack[-1].access != 'private':
				2383	error(filename, linenum, 'readability/constructors', 3,
				2384	'%s must be in the private: section' % matched.group(1))
				2385
				2386	else:
				2387	# Found DISALLOW* macro outside a class declaration, or perhaps it
				2388	# was used inside a function when it should have been part of the
				2389	# class declaration. We could issue a warning here, but it
				2390	# probably resulted in a compiler error already.
				2391	pass
				2392
				2393
				2394	def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
				2395	"""Find the corresponding > to close a template.
				2396
				2397	Args:
				2398	clean_lines: A CleansedLines instance containing the file.
				2399	linenum: Current line number.
				2400	init_suffix: Remainder of the current line after the initial <.
				2401
				2402	Returns:
				2403	True if a matching bracket exists.
				2404	"""
				2405	line = init_suffix
				2406	nesting_stack = ['<']
				2407	while True:
				2408	# Find the next operator that can tell us whether < is used as an
				2409	# opening bracket or as a less-than operator. We only want to
				2410	# warn on the latter case.
				2411	#
				2412	# We could also check all other operators and terminate the search
				2413	# early, e.g. if we got something like this "a<b+c", the "<" is
				2414	# most likely a less-than operator, but then we will get false
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2415	# positives for default arguments and other template expressions.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2416	match = Search(r'^[^<>(),;\[\]]([<>(),;\[\]])(.)$', line)
				2417	if match:
				2418	# Found an operator, update nesting stack
				2419	operator = match.group(1)
				2420	line = match.group(2)
				2421
				2422	if nesting_stack[-1] == '<':
				2423	# Expecting closing angle bracket
				2424	if operator in ('<', '(', '['):
				2425	nesting_stack.append(operator)
				2426	elif operator == '>':
				2427	nesting_stack.pop()
				2428	if not nesting_stack:
				2429	# Found matching angle bracket
				2430	return True
				2431	elif operator == ',':
				2432	# Got a comma after a bracket, this is most likely a template
				2433	# argument. We have not seen a closing angle bracket yet, but
				2434	# it's probably a few lines later if we look for it, so just
				2435	# return early here.
				2436	return True
				2437	else:
				2438	# Got some other operator.
				2439	return False
				2440
				2441	else:
				2442	# Expecting closing parenthesis or closing bracket
				2443	if operator in ('<', '(', '['):
				2444	nesting_stack.append(operator)
				2445	elif operator in (')', ']'):
				2446	# We don't bother checking for matching () or []. If we got
				2447	# something like (] or [), it would have been a syntax error.
				2448	nesting_stack.pop()
				2449
				2450	else:
				2451	# Scan the next line
				2452	linenum += 1
				2453	if linenum >= len(clean_lines.elided):
				2454	break
				2455	line = clean_lines.elided[linenum]
				2456
				2457	# Exhausted all remaining lines and still no matching angle bracket.
				2458	# Most likely the input was incomplete, otherwise we should have
				2459	# seen a semicolon and returned early.
				2460	return True
				2461
				2462
				2463	def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
				2464	"""Find the corresponding < that started a template.
				2465
				2466	Args:
				2467	clean_lines: A CleansedLines instance containing the file.
				2468	linenum: Current line number.
				2469	init_prefix: Part of the current line before the initial >.
				2470
				2471	Returns:
				2472	True if a matching bracket exists.
				2473	"""
				2474	line = init_prefix
				2475	nesting_stack = ['>']
				2476	while True:
				2477	# Find the previous operator
				2478	match = Search(r'^(.)([<>(),;\[\]])[^<>(),;\[\]]$', line)
				2479	if match:
				2480	# Found an operator, update nesting stack
				2481	operator = match.group(2)
				2482	line = match.group(1)
				2483
				2484	if nesting_stack[-1] == '>':
				2485	# Expecting opening angle bracket
				2486	if operator in ('>', ')', ']'):
				2487	nesting_stack.append(operator)
				2488	elif operator == '<':
				2489	nesting_stack.pop()
				2490	if not nesting_stack:
				2491	# Found matching angle bracket
				2492	return True
				2493	elif operator == ',':
				2494	# Got a comma before a bracket, this is most likely a
				2495	# template argument. The opening angle bracket is probably
				2496	# there if we look for it, so just return early here.
				2497	return True
				2498	else:
				2499	# Got some other operator.
				2500	return False
				2501
				2502	else:
				2503	# Expecting opening parenthesis or opening bracket
				2504	if operator in ('>', ')', ']'):
				2505	nesting_stack.append(operator)
				2506	elif operator in ('(', '['):
				2507	nesting_stack.pop()
				2508
				2509	else:
				2510	# Scan the previous line
				2511	linenum -= 1
				2512	if linenum < 0:
				2513	break
				2514	line = clean_lines.elided[linenum]
				2515
				2516	# Exhausted all earlier lines and still no matching angle bracket.
				2517	return False
				2518
				2519
				2520	def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2521	"""Checks for the correctness of various spacing issues in the code.
				2522
				2523	Things we check for: spaces around operators, spaces after
				2524	if/for/while/switch, no spaces around parens in function calls, two
				2525	spaces between code and comment, don't start a block with a blank
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2526	line, don't end a function with a blank line, don't add a blank line
				2527	after public/protected/private, don't have too many blank lines in a row.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2528
				2529	Args:
				2530	filename: The name of the current file.
				2531	clean_lines: A CleansedLines instance containing the file.
				2532	linenum: The number of the line to check.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2533	nesting_state: A _NestingState instance which maintains information about
				2534	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2535	error: The function to call with any errors found.
				2536	"""
				2537
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2538	# Don't use "elided" lines here, otherwise we can't check commented lines.
				2539	# Don't want to use "raw" either, because we don't want to check inside C++11
				2540	# raw strings,
				2541	raw = clean_lines.lines_without_raw_strings
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2542	line = raw[linenum]
				2543
				2544	# Before nixing comments, check if the line is blank for no good
				2545	# reason. This includes the first line after a block is opened, and
				2546	# blank lines at the end of a function (ie, right before a line like '}'
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2547	#
				2548	# Skip all the blank line checks if we are immediately inside a
				2549	# namespace body. In other words, don't issue blank line warnings
				2550	# for this block:
				2551	# namespace {
				2552	#
				2553	# }
				2554	#
				2555	# A warning about missing end of namespace comments will be issued instead.
				2556	if IsBlankLine(line) and not nesting_state.InNamespaceBody():
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2557	elided = clean_lines.elided
				2558	prev_line = elided[linenum - 1]
				2559	prevbrace = prev_line.rfind('{')
				2560	# TODO(unknown): Don't complain if line before blank line, and line after,
				2561	# both start with alnums and are indented the same amount.
				2562	# This ignores whitespace at the start of a namespace block
				2563	# because those are not usually indented.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2564	if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2565	# OK, we have a blank line at the start of a code block. Before we
				2566	# complain, we check if it is an exception to the rule: The previous
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2567	# non-empty line has the parameters of a function header that are indented
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2568	# 4 spaces (because they did not fit in a 80 column line when placed on
				2569	# the same line as the function name). We also check for the case where
				2570	# the previous line is indented 6 spaces, which may happen when the
				2571	# initializers of a constructor do not fit into a 80 column line.
				2572	exception = False
				2573	if Match(r' {6}\w', prev_line): # Initializer list?
				2574	# We are looking for the opening column of initializer list, which
				2575	# should be indented 4 spaces to cause 6 space indentation afterwards.
				2576	search_position = linenum-2
				2577	while (search_position >= 0
				2578	and Match(r' {6}\w', elided[search_position])):
				2579	search_position -= 1
				2580	exception = (search_position >= 0
				2581	and elided[search_position][:5] == ' :')
				2582	else:
				2583	# Search for the function arguments or an initializer list. We use a
				2584	# simple heuristic here: If the line is indented 4 spaces; and we have a
				2585	# closing paren, without the opening paren, followed by an opening brace
				2586	# or colon (for initializer lists) we assume that it is the last line of
				2587	# a function header. If we have a colon indented 4 spaces, it is an
				2588	# initializer list.
				2589	exception = (Match(r' {4}\w[^$]$\s(const\s)?(\{\s$\|:)',
				2590	prev_line)
				2591	or Match(r' {4}:', prev_line))
				2592
				2593	if not exception:
				2594	error(filename, linenum, 'whitespace/blank_line', 2,
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2595	'Redundant blank line at the start of a code block '
				2596	'should be deleted.')
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2597	# Ignore blank lines at the end of a block in a long if-else
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2598	# chain, like this:
				2599	# if (condition1) {
				2600	# // Something followed by a blank line
				2601	#
				2602	# } else if (condition2) {
				2603	# // Something else
				2604	# }
				2605	if linenum + 1 < clean_lines.NumLines():
				2606	next_line = raw[linenum + 1]
				2607	if (next_line
				2608	and Match(r'\s*}', next_line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2609	and next_line.find('} else ') == -1):
				2610	error(filename, linenum, 'whitespace/blank_line', 3,
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2611	'Redundant blank line at the end of a code block '
				2612	'should be deleted.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2613
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2614	matched = Match(r'\s*(public\|protected\|private):', prev_line)
				2615	if matched:
				2616	error(filename, linenum, 'whitespace/blank_line', 3,
				2617	'Do not leave a blank line after "%s:"' % matched.group(1))
				2618
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2619	# Next, we complain if there's a comment too near the text
				2620	commentpos = line.find('//')
				2621	if commentpos != -1:
				2622	# Check if the // may be in quotes. If so, ignore it
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2623	# Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2624	if (line.count('"', 0, commentpos) -
				2625	line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
				2626	# Allow one space for new scopes, two spaces otherwise:
				2627	if (not Match(r'^\s*{ //', line) and
				2628	((commentpos >= 1 and
				2629	line[commentpos-1] not in string.whitespace) or
				2630	(commentpos >= 2 and
				2631	line[commentpos-2] not in string.whitespace))):
				2632	error(filename, linenum, 'whitespace/comments', 2,
				2633	'At least two spaces is best between code and comments')
				2634	# There should always be a space between the // and the comment
				2635	commentend = commentpos + 2
				2636	if commentend < len(line) and not line[commentend] == ' ':
				2637	# but some lines are exceptions -- e.g. if they're big
				2638	# comment delimiters like:
				2639	# //----------------------------------------------------------
erg@google.com	a51c16b	2010-11-17 18:09:31 +0000	[diff] [blame]	2640	# or are an empty C++ style Doxygen comment, like:
				2641	# ///
erg@google.com	6d8d983	2013-10-31 19:46:18 +0000	[diff] [blame]	2642	# or C++ style Doxygen comments placed after the variable:
				2643	# ///< Header comment
				2644	# //!< Header comment
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2645	# or they begin with multiple slashes followed by a space:
				2646	# //////// Header comment
				2647	match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.com	a51c16b	2010-11-17 18:09:31 +0000	[diff] [blame]	2648	Search(r'^/$', line[commentend:]) or
erg@google.com	6d8d983	2013-10-31 19:46:18 +0000	[diff] [blame]	2649	Search(r'^!< ', line[commentend:]) or
				2650	Search(r'^/< ', line[commentend:]) or
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2651	Search(r'^/+ ', line[commentend:]))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2652	if not match:
				2653	error(filename, linenum, 'whitespace/comments', 4,
				2654	'Should have a space between // and comment')
				2655	CheckComment(line[commentpos:], filename, linenum, error)
				2656
				2657	line = clean_lines.elided[linenum] # get rid of comments and strings
				2658
				2659	# Don't try to do spacing checks for operator methods
				2660	line = re.sub(r'operator(==\|!=\|<\|<<\|<=\|>=\|>>\|>)\(', 'operator\(', line)
				2661
				2662	# We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
				2663	# Otherwise not. Note we only check for non-spaces on both sides;
				2664	# sometimes people put non-spaces on one side when aligning ='s among
				2665	# many lines (not that this is behavior that I approve of...)
				2666	if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if\|while) ', line):
				2667	error(filename, linenum, 'whitespace/operators', 4,
				2668	'Missing spaces around =')
				2669
				2670	# It's ok not to have spaces around binary operators like + - * /, but if
				2671	# there's too little whitespace, we get concerned. It's hard to tell,
				2672	# though, so we punt on this one for now. TODO.
				2673
				2674	# You should always have whitespace around binary operators.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2675	#
				2676	# Check <= and >= first to avoid false positives with < and >, then
				2677	# check non-include lines for spacing around < and >.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2678	match = Search(r'[^<>=!\s](==\|!=\|<=\|>=)[^<>=!\s]', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2679	if match:
				2680	error(filename, linenum, 'whitespace/operators', 3,
				2681	'Missing spaces around %s' % match.group(1))
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2682	# We allow no-spaces around << when used like this: 10<<20, but
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2683	# not otherwise (particularly, not when used as streams)
erg@google.com	0075d14	2013-11-05 22:28:07 +0000	[diff] [blame^]	2684	# Also ignore using ns::operator<<;
				2685	match = Search(r'(operator\|\S)(?:L\|UL\|ULL\|l\|ul\|ull)?<<(\S)', line)
				2686	if (match and
				2687	not (match.group(1).isdigit() and match.group(2).isdigit()) and
				2688	not (match.group(1) == 'operator' and match.group(2) == ';')):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2689	error(filename, linenum, 'whitespace/operators', 3,
				2690	'Missing spaces around <<')
				2691	elif not Match(r'#.*include', line):
				2692	# Avoid false positives on ->
				2693	reduced_line = line.replace('->', '')
				2694
				2695	# Look for < that is not surrounded by spaces. This is only
				2696	# triggered if both sides are missing spaces, even though
				2697	# technically should should flag if at least one side is missing a
				2698	# space. This is done to avoid some false positives with shifts.
				2699	match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
				2700	if (match and
				2701	not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
				2702	error(filename, linenum, 'whitespace/operators', 3,
				2703	'Missing spaces around <')
				2704
				2705	# Look for > that is not surrounded by spaces. Similar to the
				2706	# above, we only trigger if both sides are missing spaces to avoid
				2707	# false positives with shifts.
				2708	match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
				2709	if (match and
				2710	not FindPreviousMatchingAngleBracket(clean_lines, linenum,
				2711	match.group(1))):
				2712	error(filename, linenum, 'whitespace/operators', 3,
				2713	'Missing spaces around >')
				2714
				2715	# We allow no-spaces around >> for almost anything. This is because
				2716	# C++11 allows ">>" to close nested templates, which accounts for
				2717	# most cases when ">>" is not followed by a space.
				2718	#
				2719	# We still warn on ">>" followed by alpha character, because that is
				2720	# likely due to ">>" being used for right shifts, e.g.:
				2721	# value >> alpha
				2722	#
				2723	# When ">>" is used to close templates, the alphanumeric letter that
				2724	# follows would be part of an identifier, and there should still be
				2725	# a space separating the template type and the identifier.
				2726	# type<type<type>> alpha
				2727	match = Search(r'>>[a-zA-Z_]', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2728	if match:
				2729	error(filename, linenum, 'whitespace/operators', 3,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2730	'Missing spaces around >>')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2731
				2732	# There shouldn't be space around unary operators
				2733	match = Search(r'(!\s\|~\s\|[\s]--[\s;]\|[\s]\+\+[\s;])', line)
				2734	if match:
				2735	error(filename, linenum, 'whitespace/operators', 4,
				2736	'Extra space for operator %s' % match.group(1))
				2737
				2738	# A pet peeve of mine: no spaces after an if, while, switch, or for
				2739	match = Search(r' (if\(\|for\(\|while\(\|switch\()', line)
				2740	if match:
				2741	error(filename, linenum, 'whitespace/parens', 5,
				2742	'Missing space before ( in %s' % match.group(1))
				2743
				2744	# For if/for/while/switch, the left and right parens should be
				2745	# consistent about how many spaces are inside the parens, and
				2746	# there should either be zero or one spaces inside the parens.
				2747	# We don't want: "if ( foo)" or "if ( foo )".
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2748	# Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2749	match = Search(r'\b(if\|for\|while\|switch)\s*'
				2750	r'$([ ])(.).[^ ]+([ ])$\s{\s*$',
				2751	line)
				2752	if match:
				2753	if len(match.group(2)) != len(match.group(4)):
				2754	if not (match.group(3) == ';' and
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2755	len(match.group(2)) == 1 + len(match.group(4)) or
				2756	not match.group(2) and Search(r'\bfor\s$.; $', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2757	error(filename, linenum, 'whitespace/parens', 5,
				2758	'Mismatching spaces inside () in %s' % match.group(1))
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2759	if len(match.group(2)) not in [0, 1]:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2760	error(filename, linenum, 'whitespace/parens', 5,
				2761	'Should have zero or one spaces inside ( and ) in %s' %
				2762	match.group(1))
				2763
				2764	# You should always have a space after a comma (either as fn arg or operator)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2765	#
				2766	# This does not apply when the non-space character following the
				2767	# comma is another comma, since the only time when that happens is
				2768	# for empty macro arguments.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2769	#
				2770	# We run this check in two passes: first pass on elided lines to
				2771	# verify that lines contain missing whitespaces, second pass on raw
				2772	# lines to confirm that those missing whitespaces are not due to
				2773	# elided comments.
				2774	if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2775	error(filename, linenum, 'whitespace/comma', 3,
				2776	'Missing space after ,')
				2777
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	2778	# You should always have a space after a semicolon
				2779	# except for few corner cases
				2780	# TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
				2781	# space after ;
				2782	if Search(r';[^\s};\\)/]', line):
				2783	error(filename, linenum, 'whitespace/semicolon', 3,
				2784	'Missing space after ;')
				2785
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2786	# Next we will look for issues with function calls.
				2787	CheckSpacingForFunctionCall(filename, line, linenum, error)
				2788
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2789	# Except after an opening paren, or after another opening brace (in case of
				2790	# an initializer list, for instance), you should have spaces before your
				2791	# braces. And since you should never have braces at the beginning of a line,
				2792	# this is an easy test.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2793	match = Match(r'^(.*[^ ({]){', line)
				2794	if match:
				2795	# Try a bit harder to check for brace initialization. This
				2796	# happens in one of the following forms:
				2797	# Constructor() : initializer_list_{} { ... }
				2798	# Constructor{}.MemberFunction()
				2799	# Type variable{};
				2800	# FunctionCall(type{}, ...);
				2801	# LastArgument(..., type{});
				2802	# LOG(INFO) << type{} << " ...";
				2803	# map_of_type[{...}] = ...;
				2804	#
				2805	# We check for the character following the closing brace, and
				2806	# silence the warning if it's one of those listed above, i.e.
				2807	# "{.;,)<]".
				2808	#
				2809	# To account for nested initializer list, we allow any number of
				2810	# closing braces up to "{;,)<". We can't simply silence the
				2811	# warning on first sight of closing brace, because that would
				2812	# cause false negatives for things that are not initializer lists.
				2813	# Silence this: But not this:
				2814	# Outer{ if (...) {
				2815	# Inner{...} if (...){ // Missing space before {
				2816	# }; }
				2817	#
				2818	# There is a false negative with this approach if people inserted
				2819	# spurious semicolons, e.g. "if (cond){};", but we will catch the
				2820	# spurious semicolon with a separate check.
				2821	(endline, endlinenum, endpos) = CloseExpression(
				2822	clean_lines, linenum, len(match.group(1)))
				2823	trailing_text = ''
				2824	if endpos > -1:
				2825	trailing_text = endline[endpos:]
				2826	for offset in xrange(endlinenum + 1,
				2827	min(endlinenum + 3, clean_lines.NumLines() - 1)):
				2828	trailing_text += clean_lines.elided[offset]
				2829	if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
				2830	error(filename, linenum, 'whitespace/braces', 5,
				2831	'Missing space before {')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2832
				2833	# Make sure '} else {' has spaces.
				2834	if Search(r'}else', line):
				2835	error(filename, linenum, 'whitespace/braces', 5,
				2836	'Missing space before else')
				2837
				2838	# You shouldn't have spaces before your brackets, except maybe after
				2839	# 'delete []' or 'new char * []'.
				2840	if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
				2841	error(filename, linenum, 'whitespace/braces', 5,
				2842	'Extra space before [')
				2843
				2844	# You shouldn't have a space before a semicolon at the end of the line.
				2845	# There's a special case for "for" since the style guide allows space before
				2846	# the semicolon there.
				2847	if Search(r':\s;\s$', line):
				2848	error(filename, linenum, 'whitespace/semicolon', 5,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2849	'Semicolon defining empty statement. Use {} instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2850	elif Search(r'^\s;\s$', line):
				2851	error(filename, linenum, 'whitespace/semicolon', 5,
				2852	'Line contains only semicolon. If this should be an empty statement, '
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2853	'use {} instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2854	elif (Search(r'\s+;\s*$', line) and
				2855	not Search(r'\bfor\b', line)):
				2856	error(filename, linenum, 'whitespace/semicolon', 5,
				2857	'Extra space before last semicolon. If this should be an empty '
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2858	'statement, use {} instead.')
				2859
				2860	# In range-based for, we wanted spaces before and after the colon, but
				2861	# not around "::" tokens that might appear.
				2862	if (Search('for \(.[^:]:[^: ]', line) or
				2863	Search('for \(.[^: ]:[^:]', line)):
				2864	error(filename, linenum, 'whitespace/forcolon', 2,
				2865	'Missing space around colon in range-based for loop')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2866
				2867
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2868	def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
				2869	"""Checks for additional blank line issues related to sections.
				2870
				2871	Currently the only thing checked here is blank line before protected/private.
				2872
				2873	Args:
				2874	filename: The name of the current file.
				2875	clean_lines: A CleansedLines instance containing the file.
				2876	class_info: A _ClassInfo objects.
				2877	linenum: The number of the line to check.
				2878	error: The function to call with any errors found.
				2879	"""
				2880	# Skip checks if the class is small, where small means 25 lines or less.
				2881	# 25 lines seems like a good cutoff since that's the usual height of
				2882	# terminals, and any class that can't fit in one screen can't really
				2883	# be considered "small".
				2884	#
				2885	# Also skip checks if we are on the first line. This accounts for
				2886	# classes that look like
				2887	# class Foo { public: ... };
				2888	#
				2889	# If we didn't find the end of the class, last_line would be zero,
				2890	# and the check will be skipped by the first condition.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2891	if (class_info.last_line - class_info.starting_linenum <= 24 or
				2892	linenum <= class_info.starting_linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2893	return
				2894
				2895	matched = Match(r'\s*(public\|protected\|private):', clean_lines.lines[linenum])
				2896	if matched:
				2897	# Issue warning if the line before public/protected/private was
				2898	# not a blank line, but don't do this if the previous line contains
				2899	# "class" or "struct". This can happen two ways:
				2900	# - We are at the beginning of the class.
				2901	# - We are forward-declaring an inner class that is semantically
				2902	# private, but needed to be public for implementation reasons.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2903	# Also ignores cases where the previous line ends with a backslash as can be
				2904	# common when defining classes in C macros.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2905	prev_line = clean_lines.lines[linenum - 1]
				2906	if (not IsBlankLine(prev_line) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2907	not Search(r'\b(class\|struct)\b', prev_line) and
				2908	not Search(r'\\$', prev_line)):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2909	# Try a bit harder to find the beginning of the class. This is to
				2910	# account for multi-line base-specifier lists, e.g.:
				2911	# class Derived
				2912	# : public Base {
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2913	end_class_head = class_info.starting_linenum
				2914	for i in range(class_info.starting_linenum, linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2915	if Search(r'\{\s*$', clean_lines.lines[i]):
				2916	end_class_head = i
				2917	break
				2918	if end_class_head < linenum - 1:
				2919	error(filename, linenum, 'whitespace/blank_line', 3,
				2920	'"%s:" should be preceded by a blank line' % matched.group(1))
				2921
				2922
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2923	def GetPreviousNonBlankLine(clean_lines, linenum):
				2924	"""Return the most recent non-blank line and its line number.
				2925
				2926	Args:
				2927	clean_lines: A CleansedLines instance containing the file contents.
				2928	linenum: The number of the line to check.
				2929
				2930	Returns:
				2931	A tuple with two elements. The first element is the contents of the last
				2932	non-blank line before the current line, or the empty string if this is the
				2933	first non-blank line. The second is the line number of that line, or -1
				2934	if this is the first non-blank line.
				2935	"""
				2936
				2937	prevlinenum = linenum - 1
				2938	while prevlinenum >= 0:
				2939	prevline = clean_lines.elided[prevlinenum]
				2940	if not IsBlankLine(prevline): # if not a blank line...
				2941	return (prevline, prevlinenum)
				2942	prevlinenum -= 1
				2943	return ('', -1)
				2944
				2945
				2946	def CheckBraces(filename, clean_lines, linenum, error):
				2947	"""Looks for misplaced braces (e.g. at the end of line).
				2948
				2949	Args:
				2950	filename: The name of the current file.
				2951	clean_lines: A CleansedLines instance containing the file.
				2952	linenum: The number of the line to check.
				2953	error: The function to call with any errors found.
				2954	"""
				2955
				2956	line = clean_lines.elided[linenum] # get rid of comments and strings
				2957
				2958	if Match(r'\s{\s$', line):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2959	# We allow an open brace to start a line in the case where someone is using
				2960	# braces in a block to explicitly create a new scope, which is commonly used
				2961	# to control the lifetime of stack-allocated variables. Braces are also
				2962	# used for brace initializers inside function calls. We don't detect this
				2963	# perfectly: we just don't complain if the last non-whitespace character on
				2964	# the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2965	# previous line starts a preprocessor block.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2966	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2967	if (not Search(r'[,;:}{(]\s*$', prevline) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2968	not Match(r'\s*#', prevline)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2969	error(filename, linenum, 'whitespace/braces', 4,
				2970	'{ should almost always be at the end of the previous line')
				2971
				2972	# An else clause should be on the same line as the preceding closing brace.
				2973	if Match(r'\selse\s', line):
				2974	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				2975	if Match(r'\s}\s$', prevline):
				2976	error(filename, linenum, 'whitespace/newline', 4,
				2977	'An else should appear on the same line as the preceding }')
				2978
				2979	# If braces come on one side of an else, they should be on both.
				2980	# However, we have to worry about "else if" that spans multiple lines!
				2981	if Search(r'}\selse[^{]$', line) or Match(r'[^}]else\s{', line):
				2982	if Search(r'}\selse if([^{])$', line): # could be multi-line if
				2983	# find the ( after the if
				2984	pos = line.find('else if')
				2985	pos = line.find('(', pos)
				2986	if pos > 0:
				2987	(endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
				2988	if endline[endpos:].find('{') == -1: # must be brace after if
				2989	error(filename, linenum, 'readability/braces', 5,
				2990	'If an else has a brace on one side, it should have it on both')
				2991	else: # common case: else not followed by a multi-line if
				2992	error(filename, linenum, 'readability/braces', 5,
				2993	'If an else has a brace on one side, it should have it on both')
				2994
				2995	# Likewise, an else should never have the else clause on the same line
				2996	if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
				2997	error(filename, linenum, 'whitespace/newline', 4,
				2998	'Else clause should never be on same line as else (use 2 lines)')
				2999
				3000	# In the same way, a do/while should never be on one line
				3001	if Match(r'\s*do [^\s{]', line):
				3002	error(filename, linenum, 'whitespace/newline', 4,
				3003	'do/while clauses should not be on a single line')
				3004
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3005	# Block bodies should not be followed by a semicolon. Due to C++11
				3006	# brace initialization, there are more places where semicolons are
				3007	# required than not, so we use a whitelist approach to check these
				3008	# rather than a blacklist. These are the places where "};" should
				3009	# be replaced by just "}":
				3010	# 1. Some flavor of block following closing parenthesis:
				3011	# for (;;) {};
				3012	# while (...) {};
				3013	# switch (...) {};
				3014	# Function(...) {};
				3015	# if (...) {};
				3016	# if (...) else if (...) {};
				3017	#
				3018	# 2. else block:
				3019	# if (...) else {};
				3020	#
				3021	# 3. const member function:
				3022	# Function(...) const {};
				3023	#
				3024	# 4. Block following some statement:
				3025	# x = 42;
				3026	# {};
				3027	#
				3028	# 5. Block at the beginning of a function:
				3029	# Function(...) {
				3030	# {};
				3031	# }
				3032	#
				3033	# Note that naively checking for the preceding "{" will also match
				3034	# braces inside multi-dimensional arrays, but this is fine since
				3035	# that expression will not contain semicolons.
				3036	#
				3037	# 6. Block following another block:
				3038	# while (true) {}
				3039	# {};
				3040	#
				3041	# 7. End of namespaces:
				3042	# namespace {};
				3043	#
				3044	# These semicolons seems far more common than other kinds of
				3045	# redundant semicolons, possibly due to people converting classes
				3046	# to namespaces. For now we do not warn for this case.
				3047	#
				3048	# Try matching case 1 first.
				3049	match = Match(r'^(.\)\s)\{', line)
				3050	if match:
				3051	# Matched closing parenthesis (case 1). Check the token before the
				3052	# matching opening parenthesis, and don't warn if it looks like a
				3053	# macro. This avoids these false positives:
				3054	# - macro that defines a base class
				3055	# - multi-line macro that defines a base class
				3056	# - macro that defines the whole class-head
				3057	#
				3058	# But we still issue warnings for macros that we know are safe to
				3059	# warn, specifically:
				3060	# - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
				3061	# - TYPED_TEST
				3062	# - INTERFACE_DEF
				3063	# - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
				3064	#
				3065	# We implement a whitelist of safe macros instead of a blacklist of
				3066	# unsafe macros, even though the latter appears less frequently in
				3067	# google code and would have been easier to implement. This is because
				3068	# the downside for getting the whitelist wrong means some extra
				3069	# semicolons, while the downside for getting the blacklist wrong
				3070	# would result in compile errors.
				3071	#
				3072	# In addition to macros, we also don't want to warn on compound
				3073	# literals.
				3074	closing_brace_pos = match.group(1).rfind(')')
				3075	opening_parenthesis = ReverseCloseExpression(
				3076	clean_lines, linenum, closing_brace_pos)
				3077	if opening_parenthesis[2] > -1:
				3078	line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
				3079	macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
				3080	if ((macro and
				3081	macro.group(1) not in (
				3082	'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
				3083	'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
				3084	'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
				3085	Search(r'\s+=\s*$', line_prefix)):
				3086	match = None
				3087
				3088	else:
				3089	# Try matching cases 2-3.
				3090	match = Match(r'^(.(?:else\|\)\sconst)\s*)\{', line)
				3091	if not match:
				3092	# Try matching cases 4-6. These are always matched on separate lines.
				3093	#
				3094	# Note that we can't simply concatenate the previous line to the
				3095	# current line and do a single match, otherwise we may output
				3096	# duplicate warnings for the blank line case:
				3097	# if (cond) {
				3098	# // blank line
				3099	# }
				3100	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				3101	if prevline and Search(r'[;{}]\s*$', prevline):
				3102	match = Match(r'^(\s*)\{', line)
				3103
				3104	# Check matching closing brace
				3105	if match:
				3106	(endline, endlinenum, endpos) = CloseExpression(
				3107	clean_lines, linenum, len(match.group(1)))
				3108	if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
				3109	# Current {} pair is eligible for semicolon check, and we have found
				3110	# the redundant semicolon, output warning here.
				3111	#
				3112	# Note: because we are scanning forward for opening braces, and
				3113	# outputting warnings for the matching closing brace, if there are
				3114	# nested blocks with trailing semicolons, we will get the error
				3115	# messages in reversed order.
				3116	error(filename, endlinenum, 'readability/braces', 4,
				3117	"You don't need a ; after a }")
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3118
				3119
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3120	def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
				3121	"""Look for empty loop/conditional body with only a single semicolon.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3122
				3123	Args:
				3124	filename: The name of the current file.
				3125	clean_lines: A CleansedLines instance containing the file.
				3126	linenum: The number of the line to check.
				3127	error: The function to call with any errors found.
				3128	"""
				3129
				3130	# Search for loop keywords at the beginning of the line. Because only
				3131	# whitespaces are allowed before the keywords, this will also ignore most
				3132	# do-while-loops, since those lines should start with closing brace.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3133	#
				3134	# We also check "if" blocks here, since an empty conditional block
				3135	# is likely an error.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3136	line = clean_lines.elided[linenum]
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3137	matched = Match(r'\s(for\|while\|if)\s\(', line)
				3138	if matched:
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3139	# Find the end of the conditional expression
				3140	(end_line, end_linenum, end_pos) = CloseExpression(
				3141	clean_lines, linenum, line.find('('))
				3142
				3143	# Output warning if what follows the condition expression is a semicolon.
				3144	# No warning for all other cases, including whitespace or newline, since we
				3145	# have a separate check for semicolons preceded by whitespace.
				3146	if end_pos >= 0 and Match(r';', end_line[end_pos:]):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3147	if matched.group(1) == 'if':
				3148	error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
				3149	'Empty conditional bodies should use {}')
				3150	else:
				3151	error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
				3152	'Empty loop bodies should use {} or continue')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3153
				3154
				3155	def CheckCheck(filename, clean_lines, linenum, error):
				3156	"""Checks the use of CHECK and EXPECT macros.
				3157
				3158	Args:
				3159	filename: The name of the current file.
				3160	clean_lines: A CleansedLines instance containing the file.
				3161	linenum: The number of the line to check.
				3162	error: The function to call with any errors found.
				3163	"""
				3164
				3165	# Decide the set of replacement macros that should be suggested
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3166	lines = clean_lines.elided
				3167	check_macro = None
				3168	start_pos = -1
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3169	for macro in _CHECK_MACROS:
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3170	i = lines[linenum].find(macro)
				3171	if i >= 0:
				3172	check_macro = macro
				3173
				3174	# Find opening parenthesis. Do a regular expression match here
				3175	# to make sure that we are matching the expected CHECK macro, as
				3176	# opposed to some other macro that happens to contain the CHECK
				3177	# substring.
				3178	matched = Match(r'^(.\b' + check_macro + r'\s)\(', lines[linenum])
				3179	if not matched:
				3180	continue
				3181	start_pos = len(matched.group(1))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3182	break
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3183	if not check_macro or start_pos < 0:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3184	# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
				3185	return
				3186
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3187	# Find end of the boolean expression by matching parentheses
				3188	(last_line, end_line, end_pos) = CloseExpression(
				3189	clean_lines, linenum, start_pos)
				3190	if end_pos < 0:
				3191	return
				3192	if linenum == end_line:
				3193	expression = lines[linenum][start_pos + 1:end_pos - 1]
				3194	else:
				3195	expression = lines[linenum][start_pos + 1:]
				3196	for i in xrange(linenum + 1, end_line):
				3197	expression += lines[i]
				3198	expression += last_line[0:end_pos - 1]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3199
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3200	# Parse expression so that we can take parentheses into account.
				3201	# This avoids false positives for inputs like "CHECK((a < 4) == b)",
				3202	# which is not replaceable by CHECK_LE.
				3203	lhs = ''
				3204	rhs = ''
				3205	operator = None
				3206	while expression:
				3207	matched = Match(r'^\s(<<\|<<=\|>>\|>>=\|->\\|->\|&&\|\\|\\|\|'
				3208	r'==\|!=\|>=\|>\|<=\|<\|\()(.*)$', expression)
				3209	if matched:
				3210	token = matched.group(1)
				3211	if token == '(':
				3212	# Parenthesized operand
				3213	expression = matched.group(2)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3214	(end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3215	if end < 0:
				3216	return # Unmatched parenthesis
				3217	lhs += '(' + expression[0:end]
				3218	expression = expression[end:]
				3219	elif token in ('&&', '\|\|'):
				3220	# Logical and/or operators. This means the expression
				3221	# contains more than one term, for example:
				3222	# CHECK(42 < a && a < b);
				3223	#
				3224	# These are not replaceable with CHECK_LE, so bail out early.
				3225	return
				3226	elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
				3227	# Non-relational operator
				3228	lhs += token
				3229	expression = matched.group(2)
				3230	else:
				3231	# Relational operator
				3232	operator = token
				3233	rhs = matched.group(2)
				3234	break
				3235	else:
				3236	# Unparenthesized operand. Instead of appending to lhs one character
				3237	# at a time, we do another regular expression match to consume several
				3238	# characters at once if possible. Trivial benchmark shows that this
				3239	# is more efficient when the operands are longer than a single
				3240	# character, which is generally the case.
				3241	matched = Match(r'^([^-=!<>()&\|]+)(.*)$', expression)
				3242	if not matched:
				3243	matched = Match(r'^(\s\S)(.)$', expression)
				3244	if not matched:
				3245	break
				3246	lhs += matched.group(1)
				3247	expression = matched.group(2)
				3248
				3249	# Only apply checks if we got all parts of the boolean expression
				3250	if not (lhs and operator and rhs):
				3251	return
				3252
				3253	# Check that rhs do not contain logical operators. We already know
				3254	# that lhs is fine since the loop above parses out && and \|\|.
				3255	if rhs.find('&&') > -1 or rhs.find('\|\|') > -1:
				3256	return
				3257
				3258	# At least one of the operands must be a constant literal. This is
				3259	# to avoid suggesting replacements for unprintable things like
				3260	# CHECK(variable != iterator)
				3261	#
				3262	# The following pattern matches decimal, hex integers, strings, and
				3263	# characters (in that order).
				3264	lhs = lhs.strip()
				3265	rhs = rhs.strip()
				3266	match_constant = r'^([-+]?(\d+\|0[xX][0-9a-fA-F]+)[lLuU]{0,3}\|"."\|\'.\')$'
				3267	if Match(match_constant, lhs) or Match(match_constant, rhs):
				3268	# Note: since we know both lhs and rhs, we can provide a more
				3269	# descriptive error message like:
				3270	# Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
				3271	# Instead of:
				3272	# Consider using CHECK_EQ instead of CHECK(a == b)
				3273	#
				3274	# We are still keeping the less descriptive message because if lhs
				3275	# or rhs gets long, the error message might become unreadable.
				3276	error(filename, linenum, 'readability/check', 2,
				3277	'Consider using %s instead of %s(a %s b)' % (
				3278	_CHECK_REPLACEMENT[check_macro][operator],
				3279	check_macro, operator))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3280
				3281
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3282	def CheckAltTokens(filename, clean_lines, linenum, error):
				3283	"""Check alternative keywords being used in boolean expressions.
				3284
				3285	Args:
				3286	filename: The name of the current file.
				3287	clean_lines: A CleansedLines instance containing the file.
				3288	linenum: The number of the line to check.
				3289	error: The function to call with any errors found.
				3290	"""
				3291	line = clean_lines.elided[linenum]
				3292
				3293	# Avoid preprocessor lines
				3294	if Match(r'^\s*#', line):
				3295	return
				3296
				3297	# Last ditch effort to avoid multi-line comments. This will not help
				3298	# if the comment started before the current line or ended after the
				3299	# current line, but it catches most of the false positives. At least,
				3300	# it provides a way to workaround this warning for people who use
				3301	# multi-line comments in preprocessor macros.
				3302	#
				3303	# TODO(unknown): remove this once cpplint has better support for
				3304	# multi-line comments.
				3305	if line.find('/') >= 0 or line.find('/') >= 0:
				3306	return
				3307
				3308	for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
				3309	error(filename, linenum, 'readability/alt_tokens', 2,
				3310	'Use operator %s instead of %s' % (
				3311	_ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
				3312
				3313
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3314	def GetLineWidth(line):
				3315	"""Determines the width of the line in column positions.
				3316
				3317	Args:
				3318	line: A string, which may be a Unicode string.
				3319
				3320	Returns:
				3321	The width of the line in column positions, accounting for Unicode
				3322	combining characters and wide characters.
				3323	"""
				3324	if isinstance(line, unicode):
				3325	width = 0
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3326	for uc in unicodedata.normalize('NFC', line):
				3327	if unicodedata.east_asian_width(uc) in ('W', 'F'):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3328	width += 2
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3329	elif not unicodedata.combining(uc):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3330	width += 1
				3331	return width
				3332	else:
				3333	return len(line)
				3334
				3335
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3336	def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3337	error):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3338	"""Checks rules from the 'C++ style rules' section of cppguide.html.
				3339
				3340	Most of these rules are hard to test (naming, comment style), but we
				3341	do what we can. In particular we check for 2-space indents, line lengths,
				3342	tab usage, spaces inside code, etc.
				3343
				3344	Args:
				3345	filename: The name of the current file.
				3346	clean_lines: A CleansedLines instance containing the file.
				3347	linenum: The number of the line to check.
				3348	file_extension: The extension (without the dot) of the filename.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3349	nesting_state: A _NestingState instance which maintains information about
				3350	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3351	error: The function to call with any errors found.
				3352	"""
				3353
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3354	# Don't use "elided" lines here, otherwise we can't check commented lines.
				3355	# Don't want to use "raw" either, because we don't want to check inside C++11
				3356	# raw strings,
				3357	raw_lines = clean_lines.lines_without_raw_strings
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3358	line = raw_lines[linenum]
				3359
				3360	if line.find('\t') != -1:
				3361	error(filename, linenum, 'whitespace/tab', 1,
				3362	'Tab found; better to use spaces')
				3363
				3364	# One or three blank spaces at the beginning of the line is weird; it's
				3365	# hard to reconcile that with 2-space indents.
				3366	# NOTE: here are the conditions rob pike used for his tests. Mine aren't
				3367	# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
				3368	# if(RLENGTH > 20) complain = 0;
				3369	# if(match($0, " +(error\|private\|public\|protected):")) complain = 0;
				3370	# if(match(prev, "&& *$")) complain = 0;
				3371	# if(match(prev, "\\\|\\\| *$")) complain = 0;
				3372	# if(match(prev, "[\",=><] *$")) complain = 0;
				3373	# if(match($0, " <<")) complain = 0;
				3374	# if(match(prev, " +for \\(")) complain = 0;
				3375	# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
				3376	initial_spaces = 0
				3377	cleansed_line = clean_lines.elided[linenum]
				3378	while initial_spaces < len(line) and line[initial_spaces] == ' ':
				3379	initial_spaces += 1
				3380	if line and line[-1].isspace():
				3381	error(filename, linenum, 'whitespace/end_of_line', 4,
				3382	'Line ends in whitespace. Consider deleting these extra spaces.')
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3383	# There are certain situations we allow one space, notably for section labels
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3384	elif ((initial_spaces == 1 or initial_spaces == 3) and
				3385	not Match(r'\s\w+\s:\s*$', cleansed_line)):
				3386	error(filename, linenum, 'whitespace/indent', 3,
				3387	'Weird number of spaces at line-start. '
				3388	'Are you using a 2-space indent?')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3389
				3390	# Check if the line is a header guard.
				3391	is_header_guard = False
				3392	if file_extension == 'h':
				3393	cppvar = GetHeaderGuardCPPVariable(filename)
				3394	if (line.startswith('#ifndef %s' % cppvar) or
				3395	line.startswith('#define %s' % cppvar) or
				3396	line.startswith('#endif // %s' % cppvar)):
				3397	is_header_guard = True
				3398	# #include lines and header guards can be long, since there's no clean way to
				3399	# split them.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3400	#
				3401	# URLs can be long too. It's possible to split these, but it makes them
				3402	# harder to cut&paste.
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3403	#
				3404	# The "$Id:...$" comment may also get very long without it being the
				3405	# developers fault.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3406	if (not line.startswith('#include') and not is_header_guard and
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3407	not Match(r'^\s//.http(s?)://\S*$', line) and
				3408	not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3409	line_width = GetLineWidth(line)
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame]	3410	extended_length = int((_line_length * 1.25))
				3411	if line_width > extended_length:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3412	error(filename, linenum, 'whitespace/line_length', 4,
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame]	3413	'Lines should very rarely be longer than %i characters' %
				3414	extended_length)
				3415	elif line_width > _line_length:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3416	error(filename, linenum, 'whitespace/line_length', 2,
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame]	3417	'Lines should be <= %i characters long' % _line_length)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3418
				3419	if (cleansed_line.count(';') > 1 and
				3420	# for loops are allowed two ;'s (and may run over two lines).
				3421	cleansed_line.find('for') == -1 and
				3422	(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
				3423	GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
				3424	# It's ok to have many commands in a switch case that fits in 1 line
				3425	not ((cleansed_line.find('case ') != -1 or
				3426	cleansed_line.find('default:') != -1) and
				3427	cleansed_line.find('break;') != -1)):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3428	error(filename, linenum, 'whitespace/newline', 0,
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3429	'More than one command on the same line')
				3430
				3431	# Some more style checks
				3432	CheckBraces(filename, clean_lines, linenum, error)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3433	CheckEmptyBlockBody(filename, clean_lines, linenum, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3434	CheckAccess(filename, clean_lines, linenum, nesting_state, error)
				3435	CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3436	CheckCheck(filename, clean_lines, linenum, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3437	CheckAltTokens(filename, clean_lines, linenum, error)
				3438	classinfo = nesting_state.InnermostClass()
				3439	if classinfo:
				3440	CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3441
				3442
				3443	_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
				3444	_RE_PATTERN_INCLUDE = re.compile(r'^\s#\sinclude\s([<"])([^>"])[>"].*$')
				3445	# Matches the first component of a filename delimited by -s and _s. That is:
				3446	# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
				3447	# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
				3448	# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
				3449	# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
				3450	_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
				3451
				3452
				3453	def _DropCommonSuffixes(filename):
				3454	"""Drops common suffixes like _test.cc or -inl.h from filename.
				3455
				3456	For example:
				3457	>>> _DropCommonSuffixes('foo/foo-inl.h')
				3458	'foo/foo'
				3459	>>> _DropCommonSuffixes('foo/bar/foo.cc')
				3460	'foo/bar/foo'
				3461	>>> _DropCommonSuffixes('foo/foo_internal.h')
				3462	'foo/foo'
				3463	>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
				3464	'foo/foo_unusualinternal'
				3465
				3466	Args:
				3467	filename: The input filename.
				3468
				3469	Returns:
				3470	The filename with the common suffix removed.
				3471	"""
				3472	for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
				3473	'inl.h', 'impl.h', 'internal.h'):
				3474	if (filename.endswith(suffix) and len(filename) > len(suffix) and
				3475	filename[-len(suffix) - 1] in ('-', '_')):
				3476	return filename[:-len(suffix) - 1]
				3477	return os.path.splitext(filename)[0]
				3478
				3479
				3480	def _IsTestFilename(filename):
				3481	"""Determines if the given filename has a suffix that identifies it as a test.
				3482
				3483	Args:
				3484	filename: The input filename.
				3485
				3486	Returns:
				3487	True if 'filename' looks like a test, False otherwise.
				3488	"""
				3489	if (filename.endswith('_test.cc') or
				3490	filename.endswith('_unittest.cc') or
				3491	filename.endswith('_regtest.cc')):
				3492	return True
				3493	else:
				3494	return False
				3495
				3496
				3497	def _ClassifyInclude(fileinfo, include, is_system):
				3498	"""Figures out what kind of header 'include' is.
				3499
				3500	Args:
				3501	fileinfo: The current file cpplint is running over. A FileInfo instance.
				3502	include: The path to a #included file.
				3503	is_system: True if the #include used <> rather than "".
				3504
				3505	Returns:
				3506	One of the _XXX_HEADER constants.
				3507
				3508	For example:
				3509	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
				3510	_C_SYS_HEADER
				3511	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
				3512	_CPP_SYS_HEADER
				3513	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
				3514	_LIKELY_MY_HEADER
				3515	>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
				3516	... 'bar/foo_other_ext.h', False)
				3517	_POSSIBLE_MY_HEADER
				3518	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
				3519	_OTHER_HEADER
				3520	"""
				3521	# This is a list of all standard c++ header files, except
				3522	# those already checked for above.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3523	is_cpp_h = include in _CPP_HEADERS
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3524
				3525	if is_system:
				3526	if is_cpp_h:
				3527	return _CPP_SYS_HEADER
				3528	else:
				3529	return _C_SYS_HEADER
				3530
				3531	# If the target file and the include we're checking share a
				3532	# basename when we drop common extensions, and the include
				3533	# lives in . , then it's likely to be owned by the target file.
				3534	target_dir, target_base = (
				3535	os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
				3536	include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
				3537	if target_base == include_base and (
				3538	include_dir == target_dir or
				3539	include_dir == os.path.normpath(target_dir + '/../public')):
				3540	return _LIKELY_MY_HEADER
				3541
				3542	# If the target and include share some initial basename
				3543	# component, it's possible the target is implementing the
				3544	# include, so it's allowed to be first, but we'll never
				3545	# complain if it's not there.
				3546	target_first_component = _RE_FIRST_COMPONENT.match(target_base)
				3547	include_first_component = _RE_FIRST_COMPONENT.match(include_base)
				3548	if (target_first_component and include_first_component and
				3549	target_first_component.group(0) ==
				3550	include_first_component.group(0)):
				3551	return _POSSIBLE_MY_HEADER
				3552
				3553	return _OTHER_HEADER
				3554
				3555
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3556
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3557	def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
				3558	"""Check rules that are applicable to #include lines.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3559
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3560	Strings on #include lines are NOT removed from elided line, to make
				3561	certain tasks easier. However, to prevent false positives, checks
				3562	applicable to #include lines in CheckLanguage must be put here.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3563
				3564	Args:
				3565	filename: The name of the current file.
				3566	clean_lines: A CleansedLines instance containing the file.
				3567	linenum: The number of the line to check.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3568	include_state: An _IncludeState instance in which the headers are inserted.
				3569	error: The function to call with any errors found.
				3570	"""
				3571	fileinfo = FileInfo(filename)
				3572
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3573	line = clean_lines.lines[linenum]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3574
				3575	# "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3576	if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3577	error(filename, linenum, 'build/include', 4,
				3578	'Include the directory when naming .h files')
				3579
				3580	# we shouldn't include a file more than once. actually, there are a
				3581	# handful of instances where doing so is okay, but in general it's
				3582	# not.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3583	match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3584	if match:
				3585	include = match.group(2)
				3586	is_system = (match.group(1) == '<')
				3587	if include in include_state:
				3588	error(filename, linenum, 'build/include', 4,
				3589	'"%s" already included at %s:%s' %
				3590	(include, filename, include_state[include]))
				3591	else:
				3592	include_state[include] = linenum
				3593
				3594	# We want to ensure that headers appear in the right order:
				3595	# 1) for foo.cc, foo.h (preferred location)
				3596	# 2) c system files
				3597	# 3) cpp system files
				3598	# 4) for foo.cc, foo.h (deprecated location)
				3599	# 5) other google headers
				3600	#
				3601	# We classify each include statement as one of those 5 types
				3602	# using a number of techniques. The include_state object keeps
				3603	# track of the highest type seen, and complains if we see a
				3604	# lower type after that.
				3605	error_message = include_state.CheckNextIncludeOrder(
				3606	_ClassifyInclude(fileinfo, include, is_system))
				3607	if error_message:
				3608	error(filename, linenum, 'build/include_order', 4,
				3609	'%s. Should be: %s.h, c system, c++ system, other.' %
				3610	(error_message, fileinfo.BaseName()))
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3611	canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
				3612	if not include_state.IsInAlphabeticalOrder(
				3613	clean_lines, linenum, canonical_include):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3614	error(filename, linenum, 'build/include_alpha', 4,
				3615	'Include "%s" not in alphabetical order' % include)
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3616	include_state.SetLastHeader(canonical_include)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3617
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3618	# Look for any of the stream classes that are part of standard C++.
				3619	match = _RE_PATTERN_INCLUDE.match(line)
				3620	if match:
				3621	include = match.group(2)
				3622	if Match(r'(f\|ind\|io\|i\|o\|parse\|pf\|stdio\|str\|)?stream$', include):
				3623	# Many unit tests use cout, so we exempt them.
				3624	if not _IsTestFilename(filename):
				3625	error(filename, linenum, 'readability/streams', 3,
				3626	'Streams are highly discouraged.')
				3627
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3628
				3629	def _GetTextInside(text, start_pattern):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3630	r"""Retrieves all the text between matching open and close parentheses.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3631
				3632	Given a string of lines and a regular expression string, retrieve all the text
				3633	following the expression and between opening punctuation symbols like
				3634	(, [, or {, and the matching close-punctuation symbol. This properly nested
				3635	occurrences of the punctuations, so for the text like
				3636	printf(a(), b(c()));
				3637	a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
				3638	start_pattern must match string having an open punctuation symbol at the end.
				3639
				3640	Args:
				3641	text: The lines to extract text. Its comments and strings must be elided.
				3642	It can be single line and can span multiple lines.
				3643	start_pattern: The regexp string indicating where to start extracting
				3644	the text.
				3645	Returns:
				3646	The extracted text.
				3647	None if either the opening string or ending punctuation could not be found.
				3648	"""
				3649	# TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
				3650	# rewritten to use _GetTextInside (and use inferior regexp matching today).
				3651
				3652	# Give opening punctuations to get the matching close-punctuations.
				3653	matching_punctuation = {'(': ')', '{': '}', '[': ']'}
				3654	closing_punctuation = set(matching_punctuation.itervalues())
				3655
				3656	# Find the position to start extracting text.
				3657	match = re.search(start_pattern, text, re.M)
				3658	if not match: # start_pattern not found in text.
				3659	return None
				3660	start_position = match.end(0)
				3661
				3662	assert start_position > 0, (
				3663	'start_pattern must ends with an opening punctuation.')
				3664	assert text[start_position - 1] in matching_punctuation, (
				3665	'start_pattern must ends with an opening punctuation.')
				3666	# Stack of closing punctuations we expect to have in text after position.
				3667	punctuation_stack = [matching_punctuation[text[start_position - 1]]]
				3668	position = start_position
				3669	while punctuation_stack and position < len(text):
				3670	if text[position] == punctuation_stack[-1]:
				3671	punctuation_stack.pop()
				3672	elif text[position] in closing_punctuation:
				3673	# A closing punctuation without matching opening punctuations.
				3674	return None
				3675	elif text[position] in matching_punctuation:
				3676	punctuation_stack.append(matching_punctuation[text[position]])
				3677	position += 1
				3678	if punctuation_stack:
				3679	# Opening punctuations left without matching close-punctuations.
				3680	return None
				3681	# punctuations match.
				3682	return text[start_position:position - 1]
				3683
				3684
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3685	# Patterns for matching call-by-reference parameters.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3686	#
				3687	# Supports nested templates up to 2 levels deep using this messy pattern:
				3688	# < (?: < (?: < [^<>]*
				3689	# >
				3690	# \| [^<>] )*
				3691	# >
				3692	# \| [^<>] )*
				3693	# >
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3694	_RE_PATTERN_IDENT = r'[_a-zA-Z]\w' # =~ [[:alpha:]][[:alnum:]]
				3695	_RE_PATTERN_TYPE = (
				3696	r'(?:const\s+)?(?:typename\s+\|class\s+\|struct\s+\|union\s+\|enum\s+)?'
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3697	r'(?:\w\|'
				3698	r'\s<(?:<(?:<[^<>]>\|[^<>])>\|[^<>])>\|'
				3699	r'::)+')
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3700	# A call-by-reference parameter ends with '& identifier'.
				3701	_RE_PATTERN_REF_PARAM = re.compile(
				3702	r'(' + _RE_PATTERN_TYPE + r'(?:\s(?:\bconst\b\|[]))\s'
				3703	r'&\s' + _RE_PATTERN_IDENT + r')\s(?:=[^,()]+)?[,)]')
				3704	# A call-by-const-reference parameter either ends with 'const& identifier'
				3705	# or looks like 'const type& identifier' when 'type' is atomic.
				3706	_RE_PATTERN_CONST_REF_PARAM = (
				3707	r'(?:.\s\bconst\s&\s' + _RE_PATTERN_IDENT +
				3708	r'\|const\s+' + _RE_PATTERN_TYPE + r'\s&\s' + _RE_PATTERN_IDENT + r')')
				3709
				3710
				3711	def CheckLanguage(filename, clean_lines, linenum, file_extension,
				3712	include_state, nesting_state, error):
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3713	"""Checks rules from the 'C++ language rules' section of cppguide.html.
				3714
				3715	Some of these rules are hard to test (function overloading, using
				3716	uint32 inappropriately), but we do the best we can.
				3717
				3718	Args:
				3719	filename: The name of the current file.
				3720	clean_lines: A CleansedLines instance containing the file.
				3721	linenum: The number of the line to check.
				3722	file_extension: The extension (without the dot) of the filename.
				3723	include_state: An _IncludeState instance in which the headers are inserted.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3724	nesting_state: A _NestingState instance which maintains information about
				3725	the current stack of nested blocks being parsed.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3726	error: The function to call with any errors found.
				3727	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3728	# If the line is empty or consists of entirely a comment, no need to
				3729	# check it.
				3730	line = clean_lines.elided[linenum]
				3731	if not line:
				3732	return
				3733
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3734	match = _RE_PATTERN_INCLUDE.search(line)
				3735	if match:
				3736	CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
				3737	return
				3738
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3739	# Reset include state across preprocessor directives. This is meant
				3740	# to silence warnings for conditional includes.
				3741	if Match(r'^\s#\s(?:ifdef\|elif\|else\|endif)\b', line):
				3742	include_state.ResetSection()
				3743
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3744	# Make Windows paths like Unix.
				3745	fullname = os.path.abspath(filename).replace('\\', '/')
				3746
				3747	# TODO(unknown): figure out if they're using default arguments in fn proto.
				3748
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3749	# Check to see if they're using an conversion function cast.
				3750	# I just try to capture the most common basic types, though there are more.
				3751	# Parameterless conversion functions, such as bool(), are allowed as they are
				3752	# probably a member operator declaration or default constructor.
				3753	match = Search(
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3754	r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3755	r'(int\|float\|double\|bool\|char\|int32\|uint32\|int64\|uint64)'
				3756	r'(\([^)].*)', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3757	if match:
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3758	matched_new = match.group(1)
				3759	matched_type = match.group(2)
				3760	matched_funcptr = match.group(3)
				3761
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3762	# gMock methods are defined using some variant of MOCK_METHODx(name, type)
				3763	# where type may be float(), int(string), etc. Without context they are
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3764	# virtually indistinguishable from int(x) casts. Likewise, gMock's
				3765	# MockCallback takes a template parameter of the form return_type(arg_type),
				3766	# which looks much like the cast we're trying to detect.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3767	#
				3768	# std::function<> wrapper has a similar problem.
				3769	#
				3770	# Return types for function pointers also look like casts if they
				3771	# don't have an extra space.
				3772	if (matched_new is None and # If new operator, then this isn't a cast
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3773	not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3774	Search(r'\bMockCallback<.*>', line) or
				3775	Search(r'\bstd::function<.*>', line)) and
				3776	not (matched_funcptr and
				3777	Match(r'$(?:[^() ]+::\s\\s)?[^() ]+$\s\(',
				3778	matched_funcptr))):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3779	# Try a bit harder to catch gmock lines: the only place where
				3780	# something looks like an old-style cast is where we declare the
				3781	# return type of the mocked method, and the only time when we
				3782	# are missing context is if MOCK_METHOD was split across
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3783	# multiple lines. The missing MOCK_METHOD is usually one or two
				3784	# lines back, so scan back one or two lines.
				3785	#
				3786	# It's not possible for gmock macros to appear in the first 2
				3787	# lines, since the class head + section name takes up 2 lines.
				3788	if (linenum < 2 or
				3789	not (Match(r'^\sMOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s$',
				3790	clean_lines.elided[linenum - 1]) or
				3791	Match(r'^\sMOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s$',
				3792	clean_lines.elided[linenum - 2]))):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3793	error(filename, linenum, 'readability/casting', 4,
				3794	'Using deprecated casting style. '
				3795	'Use static_cast<%s>(...) instead' %
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3796	matched_type)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3797
				3798	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3799	'static_cast',
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3800	r'$(int\|float\|double\|bool\|char\|u?int(16\|32\|64))$', error)
				3801
				3802	# This doesn't catch all cases. Consider (const char * const)"hello".
				3803	#
				3804	# (char *) "foo" should always be a const_cast (reinterpret_cast won't
				3805	# compile).
				3806	if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3807	'const_cast', r'$(char\s?\+\s?)$\s"', error):
				3808	pass
				3809	else:
				3810	# Check pointer casts for other than string constants
				3811	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3812	'reinterpret_cast', r'$(\w+\s?\*+\s?)$', error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3813
				3814	# In addition, we look for people taking the address of a cast. This
				3815	# is dangerous -- casts can assign to temporaries, so the pointer doesn't
				3816	# point where you think.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3817	match = Search(
				3818	r'(?:&$([^)]+)$[\w(])\|'
				3819	r'(?:&(static\|dynamic\|down\|reinterpret)_cast\b)', line)
				3820	if match and match.group(1) != '*':
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3821	error(filename, linenum, 'runtime/casting', 4,
				3822	('Are you taking an address of a cast? '
				3823	'This is dangerous: could be a temp var. '
				3824	'Take the address before doing the cast, rather than after'))
				3825
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3826	# Create an extended_line, which is the concatenation of the current and
				3827	# next lines, for more effective checking of code that may span more than one
				3828	# line.
				3829	if linenum + 1 < clean_lines.NumLines():
				3830	extended_line = line + clean_lines.elided[linenum + 1]
				3831	else:
				3832	extended_line = line
				3833
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3834	# Check for people declaring static/global STL strings at the top level.
				3835	# This is dangerous because the C++ language does not guarantee that
				3836	# globals with constructors are initialized before the first access.
				3837	match = Match(
				3838	r'((?:\|static +)(?:\|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
				3839	line)
				3840	# Make sure it's not a function.
				3841	# Function template specialization looks like: "string foo<Type>(...".
				3842	# Class template definitions look like: "string Foo<Type>::Method(...".
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3843	#
				3844	# Also ignore things that look like operators. These are matched separately
				3845	# because operator names cross non-word boundaries. If we change the pattern
				3846	# above, we would decrease the accuracy of matching identifiers.
				3847	if (match and
				3848	not Search(r'\boperator\W', line) and
				3849	not Match(r'\s(<.>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]\|$)', match.group(3))):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3850	error(filename, linenum, 'runtime/string', 4,
				3851	'For a static/global string constant, use a C style string instead: '
				3852	'"%schar %s[]".' %
				3853	(match.group(1), match.group(2)))
				3854
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3855	if Search(r'\b([A-Za-z0-9_]*_)$\1$', line):
				3856	error(filename, linenum, 'runtime/init', 4,
				3857	'You seem to be initializing a member variable with itself.')
				3858
				3859	if file_extension == 'h':
				3860	# TODO(unknown): check that 1-arg constructors are explicit.
				3861	# How to tell it's a constructor?
				3862	# (handled in CheckForNonStandardConstructs for now)
				3863	# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
				3864	# (level 1 error)
				3865	pass
				3866
				3867	# Check if people are using the verboten C basic types. The only exception
				3868	# we regularly allow is "unsigned short port" for port.
				3869	if Search(r'\bshort port\b', line):
				3870	if not Search(r'\bunsigned short port\b', line):
				3871	error(filename, linenum, 'runtime/int', 4,
				3872	'Use "unsigned short" for ports, not "short"')
				3873	else:
				3874	match = Search(r'\b(short\|long(?! +double)\|long long)\b', line)
				3875	if match:
				3876	error(filename, linenum, 'runtime/int', 4,
				3877	'Use int16/int64/etc, rather than the C type %s' % match.group(1))
				3878
				3879	# When snprintf is used, the second argument shouldn't be a literal.
				3880	match = Search(r'snprintf\s\(([^,]),\s([0-9])\s*,', line)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	3881	if match and match.group(2) != '0':
				3882	# If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3883	error(filename, linenum, 'runtime/printf', 3,
				3884	'If you can, use sizeof(%s) instead of %s as the 2nd arg '
				3885	'to snprintf.' % (match.group(1), match.group(2)))
				3886
				3887	# Check if some verboten C functions are being used.
				3888	if Search(r'\bsprintf\b', line):
				3889	error(filename, linenum, 'runtime/printf', 5,
				3890	'Never use sprintf. Use snprintf instead.')
				3891	match = Search(r'\b(strcpy\|strcat)\b', line)
				3892	if match:
				3893	error(filename, linenum, 'runtime/printf', 4,
				3894	'Almost always, snprintf is better than %s' % match.group(1))
				3895
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3896	# Check if some verboten operator overloading is going on
				3897	# TODO(unknown): catch out-of-line unary operator&:
				3898	# class X {};
				3899	# int operator&(const X& x) { return 42; } // unary operator&
				3900	# The trick is it's hard to tell apart from binary operator&:
				3901	# class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
				3902	if Search(r'\boperator\s&\s$\s*$', line):
				3903	error(filename, linenum, 'runtime/operator', 4,
				3904	'Unary operator& is dangerous. Do not use it.')
				3905
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3906	# Check for suspicious usage of "if" like
				3907	# } if (a == b) {
				3908	if Search(r'\}\sif\s\(', line):
				3909	error(filename, linenum, 'readability/braces', 4,
				3910	'Did you mean "else if"? If not, start a new line for "if".')
				3911
				3912	# Check for potential format string bugs like printf(foo).
				3913	# We constrain the pattern not to pick things like DocidForPrintf(foo).
				3914	# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3915	# TODO(sugawarayu): Catch the following case. Need to change the calling
				3916	# convention of the whole function to process multiple line to handle it.
				3917	# printf(
				3918	# boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
				3919	printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
				3920	if printf_args:
				3921	match = Match(r'([\w.\->()]+)$', printf_args)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3922	if match and match.group(1) != '__VA_ARGS__':
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3923	function_name = re.search(r'\b((?:string)?printf)\s*\(',
				3924	line, re.I).group(1)
				3925	error(filename, linenum, 'runtime/printf', 4,
				3926	'Potential format string bug. Do %s("%%s", %s) instead.'
				3927	% (function_name, match.group(1)))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3928
				3929	# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
				3930	match = Search(r'memset\s$([^,]),\s([^,]),\s0\s$', line)
				3931	if match and not Match(r"^''\|-?[0-9]+\|0x[0-9A-Fa-f]$", match.group(2)):
				3932	error(filename, linenum, 'runtime/memset', 4,
				3933	'Did you mean "memset(%s, 0, %s)"?'
				3934	% (match.group(1), match.group(2)))
				3935
				3936	if Search(r'\busing namespace\b', line):
				3937	error(filename, linenum, 'build/namespaces', 5,
				3938	'Do not use namespace using-directives. '
				3939	'Use using-declarations instead.')
				3940
				3941	# Detect variable-length arrays.
				3942	match = Match(r'\s(.+::)?(\w+) [a-z]\w\[(.+)];', line)
				3943	if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
				3944	match.group(3).find(']') == -1):
				3945	# Split the size using space and arithmetic operators as delimiters.
				3946	# If any of the resulting tokens are not compile time constants then
				3947	# report the error.
				3948	tokens = re.split(r'\s\|\+\|\-\|\*\|\/\|<<\|>>]', match.group(3))
				3949	is_const = True
				3950	skip_next = False
				3951	for tok in tokens:
				3952	if skip_next:
				3953	skip_next = False
				3954	continue
				3955
				3956	if Search(r'sizeof$.+$', tok): continue
				3957	if Search(r'arraysize$\w+$', tok): continue
				3958
				3959	tok = tok.lstrip('(')
				3960	tok = tok.rstrip(')')
				3961	if not tok: continue
				3962	if Match(r'\d+', tok): continue
				3963	if Match(r'0[xX][0-9a-fA-F]+', tok): continue
				3964	if Match(r'k[A-Z0-9]\w*', tok): continue
				3965	if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
				3966	if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
				3967	# A catch all for tricky sizeof cases, including 'sizeof expression',
				3968	# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3969	# requires skipping the next token because we split on ' ' and '*'.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3970	if tok.startswith('sizeof'):
				3971	skip_next = True
				3972	continue
				3973	is_const = False
				3974	break
				3975	if not is_const:
				3976	error(filename, linenum, 'runtime/arrays', 1,
				3977	'Do not use variable-length arrays. Use an appropriately named '
				3978	"('k' followed by CamelCase) compile-time constant for the size.")
				3979
				3980	# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
				3981	# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
				3982	# in the class declaration.
				3983	match = Match(
				3984	(r'\s*'
				3985	r'(DISALLOW_(EVIL_CONSTRUCTORS\|COPY_AND_ASSIGN\|IMPLICIT_CONSTRUCTORS))'
				3986	r'$.*$;$'),
				3987	line)
				3988	if match and linenum + 1 < clean_lines.NumLines():
				3989	next_line = clean_lines.elided[linenum + 1]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3990	# We allow some, but not all, declarations of variables to be present
				3991	# in the statement that defines the class. The [\w\,\s] fragment of
				3992	# the regular expression below allows users to declare instances of
				3993	# the class or pointers to instances, but not less common types such
				3994	# as function pointers or arrays. It's a tradeoff between allowing
				3995	# reasonable code and avoiding trying to parse more C++ using regexps.
				3996	if not Search(r'^\s}[\w\,\s]*;', next_line):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3997	error(filename, linenum, 'readability/constructors', 3,
				3998	match.group(1) + ' should be the last thing in the class')
				3999
				4000	# Check for use of unnamed namespaces in header files. Registration
				4001	# macros are typically OK, so we allow use of "namespace {" on lines
				4002	# that end with backslashes.
				4003	if (file_extension == 'h'
				4004	and Search(r'\bnamespace\s*{', line)
				4005	and line[-1] != '\\'):
				4006	error(filename, linenum, 'build/namespaces', 4,
				4007	'Do not use unnamed namespaces in header files. See '
				4008	'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
				4009	' for more information.')
				4010
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4011	def CheckForNonConstReference(filename, clean_lines, linenum,
				4012	nesting_state, error):
				4013	"""Check for non-const references.
				4014
				4015	Separate from CheckLanguage since it scans backwards from current
				4016	line, instead of scanning forward.
				4017
				4018	Args:
				4019	filename: The name of the current file.
				4020	clean_lines: A CleansedLines instance containing the file.
				4021	linenum: The number of the line to check.
				4022	nesting_state: A _NestingState instance which maintains information about
				4023	the current stack of nested blocks being parsed.
				4024	error: The function to call with any errors found.
				4025	"""
				4026	# Do nothing if there is no '&' on current line.
				4027	line = clean_lines.elided[linenum]
				4028	if '&' not in line:
				4029	return
				4030
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4031	# Long type names may be broken across multiple lines, usually in one
				4032	# of these forms:
				4033	# LongType
				4034	# ::LongTypeContinued &identifier
				4035	# LongType::
				4036	# LongTypeContinued &identifier
				4037	# LongType<
				4038	# ...>::LongTypeContinued &identifier
				4039	#
				4040	# If we detected a type split across two lines, join the previous
				4041	# line to current line so that we can match const references
				4042	# accordingly.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4043	#
				4044	# Note that this only scans back one line, since scanning back
				4045	# arbitrary number of lines would be expensive. If you have a type
				4046	# that spans more than 2 lines, please use a typedef.
				4047	if linenum > 1:
				4048	previous = None
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4049	if Match(r'\s::(?:[\w<>]\|::)+\s&\s*\S', line):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4050	# previous_line\n + ::current_line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4051	previous = Search(r'\b((?:const\s)?(?:[\w<>]\|::)+[\w<>])\s$',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4052	clean_lines.elided[linenum - 1])
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4053	elif Match(r'\s[a-zA-Z_]([\w<>]\|::)+\s&\s*\S', line):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4054	# previous_line::\n + current_line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4055	previous = Search(r'\b((?:const\s)?(?:[\w<>]\|::)+::)\s$',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4056	clean_lines.elided[linenum - 1])
				4057	if previous:
				4058	line = previous.group(1) + line.lstrip()
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4059	else:
				4060	# Check for templated parameter that is split across multiple lines
				4061	endpos = line.rfind('>')
				4062	if endpos > -1:
				4063	(_, startline, startpos) = ReverseCloseExpression(
				4064	clean_lines, linenum, endpos)
				4065	if startpos > -1 and startline < linenum:
				4066	# Found the matching < on an earlier line, collect all
				4067	# pieces up to current line.
				4068	line = ''
				4069	for i in xrange(startline, linenum + 1):
				4070	line += clean_lines.elided[i].strip()
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4071
				4072	# Check for non-const references in function parameters. A single '&' may
				4073	# found in the following places:
				4074	# inside expression: binary & for bitwise AND
				4075	# inside expression: unary & for taking the address of something
				4076	# inside declarators: reference parameter
				4077	# We will exclude the first two cases by checking that we are not inside a
				4078	# function body, including one that was just introduced by a trailing '{'.
				4079	# TODO(unknwon): Doesn't account for preprocessor directives.
				4080	# TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
				4081	check_params = False
				4082	if not nesting_state.stack:
				4083	check_params = True # top level
				4084	elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
				4085	isinstance(nesting_state.stack[-1], _NamespaceInfo)):
				4086	check_params = True # within class or namespace
				4087	elif Match(r'.{\s$', line):
				4088	if (len(nesting_state.stack) == 1 or
				4089	isinstance(nesting_state.stack[-2], _ClassInfo) or
				4090	isinstance(nesting_state.stack[-2], _NamespaceInfo)):
				4091	check_params = True # just opened global/class/namespace block
				4092	# We allow non-const references in a few standard places, like functions
				4093	# called "swap()" or iostream operators like "<<" or ">>". Do not check
				4094	# those function parameters.
				4095	#
				4096	# We also accept & in static_assert, which looks like a function but
				4097	# it's actually a declaration expression.
				4098	whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?\|'
				4099	r'operator\s*[<>][<>]\|'
				4100	r'static_assert\|COMPILE_ASSERT'
				4101	r')\s*\(')
				4102	if Search(whitelisted_functions, line):
				4103	check_params = False
				4104	elif not Search(r'\S+\([^)]*$', line):
				4105	# Don't see a whitelisted function on this line. Actually we
				4106	# didn't see any function name on this line, so this is likely a
				4107	# multi-line parameter list. Try a bit harder to catch this case.
				4108	for i in xrange(2):
				4109	if (linenum > i and
				4110	Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
				4111	check_params = False
				4112	break
				4113
				4114	if check_params:
				4115	decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
				4116	for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
				4117	if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
				4118	error(filename, linenum, 'runtime/references', 2,
				4119	'Is this a non-const reference? '
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4120	'If so, make const or use a pointer: ' +
				4121	ReplaceAll(' *<', '<', parameter))
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4122
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4123
				4124	def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
				4125	error):
				4126	"""Checks for a C-style cast by looking for the pattern.
				4127
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4128	Args:
				4129	filename: The name of the current file.
				4130	linenum: The number of the line to check.
				4131	line: The line of code to check.
				4132	raw_line: The raw line of code to check, with comments.
				4133	cast_type: The string for the C++ cast to recommend. This is either
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4134	reinterpret_cast, static_cast, or const_cast, depending.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4135	pattern: The regular expression used to find C-style casts.
				4136	error: The function to call with any errors found.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4137
				4138	Returns:
				4139	True if an error was emitted.
				4140	False otherwise.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4141	"""
				4142	match = Search(pattern, line)
				4143	if not match:
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4144	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4145
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4146	# Exclude lines with sizeof, since sizeof looks like a cast.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4147	sizeof_match = Match(r'.sizeof\s$', line[0:match.start(1) - 1])
				4148	if sizeof_match:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4149	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4150
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4151	# operator++(int) and operator--(int)
				4152	if (line[0:match.start(1) - 1].endswith(' operator++') or
				4153	line[0:match.start(1) - 1].endswith(' operator--')):
				4154	return False
				4155
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4156	# A single unnamed argument for a function tends to look like old
				4157	# style cast. If we see those, don't issue warnings for deprecated
				4158	# casts, instead issue warnings for unnamed arguments where
				4159	# appropriate.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4160	#
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4161	# These are things that we want warnings for, since the style guide
				4162	# explicitly require all parameters to be named:
				4163	# Function(int);
				4164	# Function(int) {
				4165	# ConstMember(int) const;
				4166	# ConstMember(int) const {
				4167	# ExceptionMember(int) throw (...);
				4168	# ExceptionMember(int) throw (...) {
				4169	# PureVirtual(int) = 0;
				4170	#
				4171	# These are functions of some sort, where the compiler would be fine
				4172	# if they had named parameters, but people often omit those
				4173	# identifiers to reduce clutter:
				4174	# (FunctionPointer)(int);
				4175	# (FunctionPointer)(int) = value;
				4176	# Function((function_pointer_arg)(int))
				4177	# <TemplateArgument(int)>;
				4178	# <(FunctionPointerTemplateArgument)(int)>;
				4179	remainder = line[match.end(0):]
				4180	if Match(r'^\s*(?:;\|const\b\|throw\b\|=\|>\|\{\|\))', remainder):
				4181	# Looks like an unnamed parameter.
				4182
				4183	# Don't warn on any kind of template arguments.
				4184	if Match(r'^\s*>', remainder):
				4185	return False
				4186
				4187	# Don't warn on assignments to function pointers, but keep warnings for
				4188	# unnamed parameters to pure virtual functions. Note that this pattern
				4189	# will also pass on assignments of "0" to function pointers, but the
				4190	# preferred values for those would be "nullptr" or "NULL".
				4191	matched_zero = Match(r'^\s=\s(\S+)\s;', remainder)
				4192	if matched_zero and matched_zero.group(1) != '0':
				4193	return False
				4194
				4195	# Don't warn on function pointer declarations. For this we need
				4196	# to check what came before the "(type)" string.
				4197	if Match(r'.\)\s$', line[0:match.start(0)]):
				4198	return False
				4199
				4200	# Don't warn if the parameter is named with block comments, e.g.:
				4201	# Function(int /unused_param/);
				4202	if '/*' in raw_line:
				4203	return False
				4204
				4205	# Passed all filters, issue warning here.
				4206	error(filename, linenum, 'readability/function', 3,
				4207	'All parameters should be named in a function')
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4208	return True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4209
				4210	# At this point, all that should be left is actual casts.
				4211	error(filename, linenum, 'readability/casting', 4,
				4212	'Using C-style cast. Use %s<%s>(...) instead' %
				4213	(cast_type, match.group(1)))
				4214
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4215	return True
				4216
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4217
				4218	_HEADERS_CONTAINING_TEMPLATES = (
				4219	('<deque>', ('deque',)),
				4220	('<functional>', ('unary_function', 'binary_function',
				4221	'plus', 'minus', 'multiplies', 'divides', 'modulus',
				4222	'negate',
				4223	'equal_to', 'not_equal_to', 'greater', 'less',
				4224	'greater_equal', 'less_equal',
				4225	'logical_and', 'logical_or', 'logical_not',
				4226	'unary_negate', 'not1', 'binary_negate', 'not2',
				4227	'bind1st', 'bind2nd',
				4228	'pointer_to_unary_function',
				4229	'pointer_to_binary_function',
				4230	'ptr_fun',
				4231	'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
				4232	'mem_fun_ref_t',
				4233	'const_mem_fun_t', 'const_mem_fun1_t',
				4234	'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
				4235	'mem_fun_ref',
				4236	)),
				4237	('<limits>', ('numeric_limits',)),
				4238	('<list>', ('list',)),
				4239	('<map>', ('map', 'multimap',)),
				4240	('<memory>', ('allocator',)),
				4241	('<queue>', ('queue', 'priority_queue',)),
				4242	('<set>', ('set', 'multiset',)),
				4243	('<stack>', ('stack',)),
				4244	('<string>', ('char_traits', 'basic_string',)),
				4245	('<utility>', ('pair',)),
				4246	('<vector>', ('vector',)),
				4247
				4248	# gcc extensions.
				4249	# Note: std::hash is their hash, ::hash is our hash
				4250	('<hash_map>', ('hash_map', 'hash_multimap',)),
				4251	('<hash_set>', ('hash_set', 'hash_multiset',)),
				4252	('<slist>', ('slist',)),
				4253	)
				4254
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4255	_RE_PATTERN_STRING = re.compile(r'\bstring\b')
				4256
				4257	_re_pattern_algorithm_header = []
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	4258	for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
				4259	'transform'):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4260	# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
				4261	# type::max().
				4262	_re_pattern_algorithm_header.append(
				4263	(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?$[^$]'),
				4264	_template,
				4265	'<algorithm>'))
				4266
				4267	_re_pattern_templates = []
				4268	for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
				4269	for _template in _templates:
				4270	_re_pattern_templates.append(
				4271	(re.compile(r'(\<\|\b)' + _template + r'\s*\<'),
				4272	_template + '<>',
				4273	_header))
				4274
				4275
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4276	def FilesBelongToSameModule(filename_cc, filename_h):
				4277	"""Check if these two filenames belong to the same module.
				4278
				4279	The concept of a 'module' here is a as follows:
				4280	foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
				4281	same 'module' if they are in the same directory.
				4282	some/path/public/xyzzy and some/path/internal/xyzzy are also considered
				4283	to belong to the same module here.
				4284
				4285	If the filename_cc contains a longer path than the filename_h, for example,
				4286	'/absolute/path/to/base/sysinfo.cc', and this file would include
				4287	'base/sysinfo.h', this function also produces the prefix needed to open the
				4288	header. This is used by the caller of this function to more robustly open the
				4289	header file. We don't have access to the real include paths in this context,
				4290	so we need this guesswork here.
				4291
				4292	Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
				4293	according to this implementation. Because of this, this function gives
				4294	some false positives. This should be sufficiently rare in practice.
				4295
				4296	Args:
				4297	filename_cc: is the path for the .cc file
				4298	filename_h: is the path for the header path
				4299
				4300	Returns:
				4301	Tuple with a bool and a string:
				4302	bool: True if filename_cc and filename_h belong to the same module.
				4303	string: the additional prefix needed to open the header file.
				4304	"""
				4305
				4306	if not filename_cc.endswith('.cc'):
				4307	return (False, '')
				4308	filename_cc = filename_cc[:-len('.cc')]
				4309	if filename_cc.endswith('_unittest'):
				4310	filename_cc = filename_cc[:-len('_unittest')]
				4311	elif filename_cc.endswith('_test'):
				4312	filename_cc = filename_cc[:-len('_test')]
				4313	filename_cc = filename_cc.replace('/public/', '/')
				4314	filename_cc = filename_cc.replace('/internal/', '/')
				4315
				4316	if not filename_h.endswith('.h'):
				4317	return (False, '')
				4318	filename_h = filename_h[:-len('.h')]
				4319	if filename_h.endswith('-inl'):
				4320	filename_h = filename_h[:-len('-inl')]
				4321	filename_h = filename_h.replace('/public/', '/')
				4322	filename_h = filename_h.replace('/internal/', '/')
				4323
				4324	files_belong_to_same_module = filename_cc.endswith(filename_h)
				4325	common_path = ''
				4326	if files_belong_to_same_module:
				4327	common_path = filename_cc[:-len(filename_h)]
				4328	return files_belong_to_same_module, common_path
				4329
				4330
				4331	def UpdateIncludeState(filename, include_state, io=codecs):
				4332	"""Fill up the include_state with new includes found from the file.
				4333
				4334	Args:
				4335	filename: the name of the header to read.
				4336	include_state: an _IncludeState instance in which the headers are inserted.
				4337	io: The io factory to use to read the file. Provided for testability.
				4338
				4339	Returns:
				4340	True if a header was succesfully added. False otherwise.
				4341	"""
				4342	headerfile = None
				4343	try:
				4344	headerfile = io.open(filename, 'r', 'utf8', 'replace')
				4345	except IOError:
				4346	return False
				4347	linenum = 0
				4348	for line in headerfile:
				4349	linenum += 1
				4350	clean_line = CleanseComments(line)
				4351	match = _RE_PATTERN_INCLUDE.search(clean_line)
				4352	if match:
				4353	include = match.group(2)
				4354	# The value formatting is cute, but not really used right now.
				4355	# What matters here is that the key is in include_state.
				4356	include_state.setdefault(include, '%s:%d' % (filename, linenum))
				4357	return True
				4358
				4359
				4360	def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
				4361	io=codecs):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4362	"""Reports for missing stl includes.
				4363
				4364	This function will output warnings to make sure you are including the headers
				4365	necessary for the stl containers and functions that you use. We only give one
				4366	reason to include a header. For example, if you use both equal_to<> and
				4367	less<> in a .h file, only one (the latter in the file) of these will be
				4368	reported as a reason to include the <functional>.
				4369
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4370	Args:
				4371	filename: The name of the current file.
				4372	clean_lines: A CleansedLines instance containing the file.
				4373	include_state: An _IncludeState instance.
				4374	error: The function to call with any errors found.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4375	io: The IO factory to use to read the header file. Provided for unittest
				4376	injection.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4377	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4378	required = {} # A map of header name to linenumber and the template entity.
				4379	# Example of required: { '<functional>': (1219, 'less<>') }
				4380
				4381	for linenum in xrange(clean_lines.NumLines()):
				4382	line = clean_lines.elided[linenum]
				4383	if not line or line[0] == '#':
				4384	continue
				4385
				4386	# String is special -- it is a non-templatized type in STL.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4387	matched = _RE_PATTERN_STRING.search(line)
				4388	if matched:
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4389	# Don't warn about strings in non-STL namespaces:
				4390	# (We check only the first match per line; good enough.)
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4391	prefix = line[:matched.start()]
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4392	if prefix.endswith('std::') or not prefix.endswith('::'):
				4393	required['<string>'] = (linenum, 'string')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4394
				4395	for pattern, template, header in _re_pattern_algorithm_header:
				4396	if pattern.search(line):
				4397	required[header] = (linenum, template)
				4398
				4399	# The following function is just a speed up, no semantics are changed.
				4400	if not '<' in line: # Reduces the cpu time usage by skipping lines.
				4401	continue
				4402
				4403	for pattern, template, header in _re_pattern_templates:
				4404	if pattern.search(line):
				4405	required[header] = (linenum, template)
				4406
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4407	# The policy is that if you #include something in foo.h you don't need to
				4408	# include it again in foo.cc. Here, we will look at possible includes.
				4409	# Let's copy the include_state so it is only messed up within this function.
				4410	include_state = include_state.copy()
				4411
				4412	# Did we find the header for this file (if any) and succesfully load it?
				4413	header_found = False
				4414
				4415	# Use the absolute path so that matching works properly.
erg@google.com	90ecb62	2012-01-30 19:34:23 +0000	[diff] [blame]	4416	abs_filename = FileInfo(filename).FullName()
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4417
				4418	# For Emacs's flymake.
				4419	# If cpplint is invoked from Emacs's flymake, a temporary file is generated
				4420	# by flymake and that file name might end with '_flymake.cc'. In that case,
				4421	# restore original file name here so that the corresponding header file can be
				4422	# found.
				4423	# e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
				4424	# instead of 'foo_flymake.h'
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4425	abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4426
				4427	# include_state is modified during iteration, so we iterate over a copy of
				4428	# the keys.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4429	header_keys = include_state.keys()
				4430	for header in header_keys:
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4431	(same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
				4432	fullpath = common_path + header
				4433	if same_module and UpdateIncludeState(fullpath, include_state, io):
				4434	header_found = True
				4435
				4436	# If we can't find the header file for a .cc, assume it's because we don't
				4437	# know where to look. In that case we'll give up as we're not sure they
				4438	# didn't include it in the .h file.
				4439	# TODO(unknown): Do a better job of finding .h files so we are confident that
				4440	# not having the .h file means there isn't one.
				4441	if filename.endswith('.cc') and not header_found:
				4442	return
				4443
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4444	# All the lines have been processed, report the errors found.
				4445	for required_header_unstripped in required:
				4446	template = required[required_header_unstripped][1]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4447	if required_header_unstripped.strip('<>"') not in include_state:
				4448	error(filename, required[required_header_unstripped][0],
				4449	'build/include_what_you_use', 4,
				4450	'Add #include ' + required_header_unstripped + ' for ' + template)
				4451
				4452
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4453	_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
				4454
				4455
				4456	def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
				4457	"""Check that make_pair's template arguments are deduced.
				4458
				4459	G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
				4460	specified explicitly, and such use isn't intended in any case.
				4461
				4462	Args:
				4463	filename: The name of the current file.
				4464	clean_lines: A CleansedLines instance containing the file.
				4465	linenum: The number of the line to check.
				4466	error: The function to call with any errors found.
				4467	"""
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4468	line = clean_lines.elided[linenum]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4469	match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
				4470	if match:
				4471	error(filename, linenum, 'build/explicit_make_pair',
				4472	4, # 4 = high confidence
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4473	'For C++11-compatibility, omit template arguments from make_pair'
				4474	' OR use pair directly OR if appropriate, construct a pair directly')
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4475
				4476
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4477	def ProcessLine(filename, file_extension, clean_lines, line,
				4478	include_state, function_state, nesting_state, error,
				4479	extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4480	"""Processes a single line in the file.
				4481
				4482	Args:
				4483	filename: Filename of the file that is being processed.
				4484	file_extension: The extension (dot not included) of the file.
				4485	clean_lines: An array of strings, each representing a line of the file,
				4486	with comments stripped.
				4487	line: Number of line being processed.
				4488	include_state: An _IncludeState instance in which the headers are inserted.
				4489	function_state: A _FunctionState instance which counts function lines, etc.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4490	nesting_state: A _NestingState instance which maintains information about
				4491	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4492	error: A callable to which errors are reported, which takes 4 arguments:
				4493	filename, line number, error level, and message
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4494	extra_check_functions: An array of additional check functions that will be
				4495	run on each source line. Each function takes 4
				4496	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4497	"""
				4498	raw_lines = clean_lines.raw_lines
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4499	ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4500	nesting_state.Update(filename, clean_lines, line, error)
				4501	if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
				4502	return
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4503	CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4504	CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4505	CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4506	CheckLanguage(filename, clean_lines, line, file_extension, include_state,
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4507	nesting_state, error)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4508	CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4509	CheckForNonStandardConstructs(filename, clean_lines, line,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4510	nesting_state, error)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4511	CheckVlogArguments(filename, clean_lines, line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4512	CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	4513	CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4514	CheckMakePairUsesDeduction(filename, clean_lines, line, error)
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4515	for check_fn in extra_check_functions:
				4516	check_fn(filename, clean_lines, line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4517
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4518	def ProcessFileData(filename, file_extension, lines, error,
				4519	extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4520	"""Performs lint checks and reports any errors to the given error function.
				4521
				4522	Args:
				4523	filename: Filename of the file that is being processed.
				4524	file_extension: The extension (dot not included) of the file.
				4525	lines: An array of strings, each representing a line of the file, with the
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4526	last element being empty if the file is terminated with a newline.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4527	error: A callable to which errors are reported, which takes 4 arguments:
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4528	filename, line number, error level, and message
				4529	extra_check_functions: An array of additional check functions that will be
				4530	run on each source line. Each function takes 4
				4531	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4532	"""
				4533	lines = (['// marker so line numbers and indices both start at 1'] + lines +
				4534	['// marker so line numbers end in a known way'])
				4535
				4536	include_state = _IncludeState()
				4537	function_state = _FunctionState()
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4538	nesting_state = _NestingState()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4539
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4540	ResetNolintSuppressions()
				4541
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4542	CheckForCopyright(filename, lines, error)
				4543
				4544	if file_extension == 'h':
				4545	CheckForHeaderGuard(filename, lines, error)
				4546
				4547	RemoveMultiLineComments(filename, lines, error)
				4548	clean_lines = CleansedLines(lines)
				4549	for line in xrange(clean_lines.NumLines()):
				4550	ProcessLine(filename, file_extension, clean_lines, line,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4551	include_state, function_state, nesting_state, error,
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4552	extra_check_functions)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4553	nesting_state.CheckCompletedBlocks(filename, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4554
				4555	CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
				4556
				4557	# We check here rather than inside ProcessLine so that we see raw
				4558	# lines rather than "cleaned" lines.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4559	CheckForBadCharacters(filename, lines, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4560
				4561	CheckForNewlineAtEOF(filename, lines, error)
				4562
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4563	def ProcessFile(filename, vlevel, extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4564	"""Does google-lint on a single file.
				4565
				4566	Args:
				4567	filename: The name of the file to parse.
				4568
				4569	vlevel: The level of errors to report. Every error of confidence
				4570	>= verbose_level will be reported. 0 is a good default.
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4571
				4572	extra_check_functions: An array of additional check functions that will be
				4573	run on each source line. Each function takes 4
				4574	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4575	"""
				4576
				4577	_SetVerboseLevel(vlevel)
				4578
				4579	try:
				4580	# Support the UNIX convention of using "-" for stdin. Note that
				4581	# we are not opening the file with universal newline support
				4582	# (which codecs doesn't support anyway), so the resulting lines do
				4583	# contain trailing '\r' characters if we are reading a file that
				4584	# has CRLF endings.
				4585	# If after the split a trailing '\r' is present, it is removed
				4586	# below. If it is not expected to be present (i.e. os.linesep !=
				4587	# '\r\n' as in Windows), a warning is issued below if this file
				4588	# is processed.
				4589
				4590	if filename == '-':
				4591	lines = codecs.StreamReaderWriter(sys.stdin,
				4592	codecs.getreader('utf8'),
				4593	codecs.getwriter('utf8'),
				4594	'replace').read().split('\n')
				4595	else:
				4596	lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
				4597
				4598	carriage_return_found = False
				4599	# Remove trailing '\r'.
				4600	for linenum in range(len(lines)):
				4601	if lines[linenum].endswith('\r'):
				4602	lines[linenum] = lines[linenum].rstrip('\r')
				4603	carriage_return_found = True
				4604
				4605	except IOError:
				4606	sys.stderr.write(
				4607	"Skipping input '%s': Can't open for reading\n" % filename)
				4608	return
				4609
				4610	# Note, if no dot is found, this will give the entire filename as the ext.
				4611	file_extension = filename[filename.rfind('.') + 1:]
				4612
				4613	# When reading from stdin, the extension is unknown, so no cpplint tests
				4614	# should rely on the extension.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4615	valid_extensions = ['cc', 'h', 'cpp', 'cu', 'cuh']
				4616	if filename != '-' and file_extension not in valid_extensions:
				4617	sys.stderr.write('Ignoring %s; not a valid file name '
				4618	'(.cc, .h, .cpp, .cu, .cuh)\n' % filename)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4619	else:
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4620	ProcessFileData(filename, file_extension, lines, Error,
				4621	extra_check_functions)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4622	if carriage_return_found and os.linesep != '\r\n':
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4623	# Use 0 for linenum since outputting only one error for potentially
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4624	# several lines.
				4625	Error(filename, 0, 'whitespace/newline', 1,
				4626	'One or more unexpected \\r (^M) found;'
				4627	'better to use only a \\n')
				4628
				4629	sys.stderr.write('Done processing %s\n' % filename)
				4630
				4631
				4632	def PrintUsage(message):
				4633	"""Prints a brief usage string and exits, optionally with an error message.
				4634
				4635	Args:
				4636	message: The optional error message.
				4637	"""
				4638	sys.stderr.write(_USAGE)
				4639	if message:
				4640	sys.exit('\nFATAL ERROR: ' + message)
				4641	else:
				4642	sys.exit(1)
				4643
				4644
				4645	def PrintCategories():
				4646	"""Prints a list of all the error-categories used by error messages.
				4647
				4648	These are the categories used to filter messages via --filter.
				4649	"""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4650	sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4651	sys.exit(0)
				4652
				4653
				4654	def ParseArguments(args):
				4655	"""Parses the command line arguments.
				4656
				4657	This may set the output format and verbosity level as side-effects.
				4658
				4659	Args:
				4660	args: The command line arguments:
				4661
				4662	Returns:
				4663	The list of filenames to lint.
				4664	"""
				4665	try:
				4666	(opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4667	'counting=',
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	4668	'filter=',
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame]	4669	'root=',
				4670	'linelength='])
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4671	except getopt.GetoptError:
				4672	PrintUsage('Invalid arguments.')
				4673
				4674	verbosity = _VerboseLevel()
				4675	output_format = _OutputFormat()
				4676	filters = ''
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4677	counting_style = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4678
				4679	for (opt, val) in opts:
				4680	if opt == '--help':
				4681	PrintUsage(None)
				4682	elif opt == '--output':
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4683	if val not in ('emacs', 'vs7', 'eclipse'):
erg@google.com	02c27fd	2013-05-28 21:34:34 +0000	[diff] [blame]	4684	PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4685	output_format = val
				4686	elif opt == '--verbose':
				4687	verbosity = int(val)
				4688	elif opt == '--filter':
				4689	filters = val
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	4690	if not filters:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4691	PrintCategories()
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4692	elif opt == '--counting':
				4693	if val not in ('total', 'toplevel', 'detailed'):
				4694	PrintUsage('Valid counting options are total, toplevel, and detailed')
				4695	counting_style = val
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	4696	elif opt == '--root':
				4697	global _root
				4698	_root = val
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame]	4699	elif opt == '--linelength':
				4700	global _line_length
				4701	try:
				4702	_line_length = int(val)
				4703	except ValueError:
				4704	PrintUsage('Line length must be digits.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4705
				4706	if not filenames:
				4707	PrintUsage('No files were specified.')
				4708
				4709	_SetOutputFormat(output_format)
				4710	_SetVerboseLevel(verbosity)
				4711	_SetFilters(filters)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4712	_SetCountingStyle(counting_style)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4713
				4714	return filenames
				4715
				4716
				4717	def main():
				4718	filenames = ParseArguments(sys.argv[1:])
				4719
				4720	# Change stderr to write with replacement characters so we don't die
				4721	# if we try to print something containing non-ASCII characters.
				4722	sys.stderr = codecs.StreamReaderWriter(sys.stderr,
				4723	codecs.getreader('utf8'),
				4724	codecs.getwriter('utf8'),
				4725	'replace')
				4726
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4727	_cpplint_state.ResetErrorCounts()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4728	for filename in filenames:
				4729	ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4730	_cpplint_state.PrintErrorCounts()
				4731
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4732	sys.exit(_cpplint_state.error_count > 0)
				4733
				4734
				4735	if __name__ == '__main__':
				4736	main()