Blame - cpplint/cpplint.py - platform/external/google-styleguide

blob: 7ca38625effc0f63808ecdd67fdb9a52f2036f95 [file] [log] [blame]

erg@google.com	720121a	2012-05-11 16:31:47 +0000	[diff] [blame]	1	#!/usr/bin/python
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2	#
erg@google.com	8f91ab2	2011-09-06 21:04:45 +0000	[diff] [blame]	3	# Copyright (c) 2009 Google Inc. All rights reserved.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	8	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	18	#
erg@google.com	969161c	2009-06-26 22:06:46 +0000	[diff] [blame]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	30
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	31	"""Does google-lint on c++ files.
				32
				33	The goal of this script is to identify places in the code that may
				34	be in non-compliance with google style. It does not attempt to fix
				35	up these problems -- the point is to educate. It does also not
				36	attempt to find all problems, or to ensure that everything it does
				37	find is legitimately a problem.
				38
				39	In particular, we can get very confused by /* and // inside strings!
				40	We do a small hack, which is to ignore //'s with "'s after them on the
				41	same line, but it is far from perfect (in either direction).
				42	"""
				43
				44	import codecs
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	45	import copy
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	46	import getopt
				47	import math # for log
				48	import os
				49	import re
				50	import sre_compile
				51	import string
				52	import sys
				53	import unicodedata
				54
				55
				56	_USAGE = """
				57	Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame^]	58	[--counting=total\|toplevel\|detailed] [--root=subdir]
				59	[--linelength=digits]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	60	<file> [file] ...
				61
				62	The style guidelines this tries to follow are those in
				63	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
				64
				65	Every problem is given a confidence score from 1-5, with 5 meaning we are
				66	certain of the problem, and 1 meaning it could be a legitimate construct.
				67	This will miss some errors, and is not a substitute for a code review.
				68
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	69	To suppress false-positive errors of a certain category, add a
				70	'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
				71	suppresses errors of all categories on that line.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	72
				73	The files passed in will be linted; at least one file must be provided.
				74	Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
				75
				76	Flags:
				77
				78	output=vs7
				79	By default, the output is formatted to ease emacs parsing. Visual Studio
				80	compatible output (vs7) may also be used. Other formats are unsupported.
				81
				82	verbose=#
				83	Specify a number 0-5 to restrict errors to certain verbosity levels.
				84
				85	filter=-x,+y,...
				86	Specify a comma-separated list of category-filters to apply: only
				87	error messages whose category names pass the filters will be printed.
				88	(Category names are printed with the message and look like
				89	"[whitespace/indent]".) Filters are evaluated left to right.
				90	"-FOO" and "FOO" means "do not print categories that start with FOO".
				91	"+FOO" means "do print categories that start with FOO".
				92
				93	Examples: --filter=-whitespace,+whitespace/braces
				94	--filter=whitespace,runtime/printf,+runtime/printf_format
				95	--filter=-,+build/include_what_you_use
				96
				97	To see a list of all the categories used in cpplint, pass no arg:
				98	--filter=
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	99
				100	counting=total\|toplevel\|detailed
				101	The total number of errors found is always printed. If
				102	'toplevel' is provided, then the count of errors in each of
				103	the top-level categories like 'build' and 'whitespace' will
				104	also be printed. If 'detailed' is provided, then a count
				105	is provided for each category like 'build/class'.
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	106
				107	root=subdir
				108	The root directory used for deriving header guard CPP variable.
				109	By default, the header guard CPP variable is calculated as the relative
				110	path to the directory that contains .git, .hg, or .svn. When this flag
				111	is specified, the relative path is calculated from the specified
				112	directory. If the specified directory does not exist, this flag is
				113	ignored.
				114
				115	Examples:
				116	Assuing that src/.git exists, the header guard CPP variables for
				117	src/chrome/browser/ui/browser.h are:
				118
				119	No flag => CHROME_BROWSER_UI_BROWSER_H_
				120	--root=chrome => BROWSER_UI_BROWSER_H_
				121	--root=chrome/browser => UI_BROWSER_H_
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame^]	122
				123	linelength=digits
				124	This is the allowed line length for the project. The default value is
				125	80 characters.
				126
				127	Examples:
				128	--linelength=120
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	129	"""
				130
				131	# We categorize each error message we print. Here are the categories.
				132	# We want an explicit list so we can list them all in cpplint --filter=.
				133	# If you add a new error message with a new category, add it to the list
				134	# here! cpplint_unittest.py should tell you if you forget to do this.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	135	_ERROR_CATEGORIES = [
				136	'build/class',
				137	'build/deprecated',
				138	'build/endif_comment',
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	139	'build/explicit_make_pair',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	140	'build/forward_decl',
				141	'build/header_guard',
				142	'build/include',
				143	'build/include_alpha',
				144	'build/include_order',
				145	'build/include_what_you_use',
				146	'build/namespaces',
				147	'build/printf_format',
				148	'build/storage_class',
				149	'legal/copyright',
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	150	'readability/alt_tokens',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	151	'readability/braces',
				152	'readability/casting',
				153	'readability/check',
				154	'readability/constructors',
				155	'readability/fn_size',
				156	'readability/function',
				157	'readability/multiline_comment',
				158	'readability/multiline_string',
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	159	'readability/namespace',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	160	'readability/nolint',
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	161	'readability/nul',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	162	'readability/streams',
				163	'readability/todo',
				164	'readability/utf8',
				165	'runtime/arrays',
				166	'runtime/casting',
				167	'runtime/explicit',
				168	'runtime/int',
				169	'runtime/init',
				170	'runtime/invalid_increment',
				171	'runtime/member_string_references',
				172	'runtime/memset',
				173	'runtime/operator',
				174	'runtime/printf',
				175	'runtime/printf_format',
				176	'runtime/references',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	177	'runtime/string',
				178	'runtime/threadsafe_fn',
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	179	'runtime/vlog',
				180	'whitespace/blank_line',
				181	'whitespace/braces',
				182	'whitespace/comma',
				183	'whitespace/comments',
				184	'whitespace/empty_conditional_body',
				185	'whitespace/empty_loop_body',
				186	'whitespace/end_of_line',
				187	'whitespace/ending_newline',
				188	'whitespace/forcolon',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	189	'whitespace/indent',
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	190	'whitespace/line_length',
				191	'whitespace/newline',
				192	'whitespace/operators',
				193	'whitespace/parens',
				194	'whitespace/semicolon',
				195	'whitespace/tab',
				196	'whitespace/todo'
				197	]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	198
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	199	# The default state of the category filter. This is overrided by the --filter=
				200	# flag. By default all errors are on, so only add here categories that should be
				201	# off by default (i.e., categories that must be enabled by the --filter= flags).
				202	# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	203	_DEFAULT_FILTERS = ['-build/include_alpha']
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	204
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	205	# We used to check for high-bit characters, but after much discussion we
				206	# decided those were OK, as long as they were in UTF-8 and didn't represent
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	207	# hard-coded international strings, which belong in a separate i18n file.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	208
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	209
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	210	# C++ headers
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	211	_CPP_HEADERS = frozenset([
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	212	# Legacy
				213	'algobase.h',
				214	'algo.h',
				215	'alloc.h',
				216	'builtinbuf.h',
				217	'bvector.h',
				218	'complex.h',
				219	'defalloc.h',
				220	'deque.h',
				221	'editbuf.h',
				222	'fstream.h',
				223	'function.h',
				224	'hash_map',
				225	'hash_map.h',
				226	'hash_set',
				227	'hash_set.h',
				228	'hashtable.h',
				229	'heap.h',
				230	'indstream.h',
				231	'iomanip.h',
				232	'iostream.h',
				233	'istream.h',
				234	'iterator.h',
				235	'list.h',
				236	'map.h',
				237	'multimap.h',
				238	'multiset.h',
				239	'ostream.h',
				240	'pair.h',
				241	'parsestream.h',
				242	'pfstream.h',
				243	'procbuf.h',
				244	'pthread_alloc',
				245	'pthread_alloc.h',
				246	'rope',
				247	'rope.h',
				248	'ropeimpl.h',
				249	'set.h',
				250	'slist',
				251	'slist.h',
				252	'stack.h',
				253	'stdiostream.h',
				254	'stl_alloc.h',
				255	'stl_relops.h',
				256	'streambuf.h',
				257	'stream.h',
				258	'strfile.h',
				259	'strstream.h',
				260	'tempbuf.h',
				261	'tree.h',
				262	'type_traits.h',
				263	'vector.h',
				264	# 17.6.1.2 C++ library headers
				265	'algorithm',
				266	'array',
				267	'atomic',
				268	'bitset',
				269	'chrono',
				270	'codecvt',
				271	'complex',
				272	'condition_variable',
				273	'deque',
				274	'exception',
				275	'forward_list',
				276	'fstream',
				277	'functional',
				278	'future',
				279	'initializer_list',
				280	'iomanip',
				281	'ios',
				282	'iosfwd',
				283	'iostream',
				284	'istream',
				285	'iterator',
				286	'limits',
				287	'list',
				288	'locale',
				289	'map',
				290	'memory',
				291	'mutex',
				292	'new',
				293	'numeric',
				294	'ostream',
				295	'queue',
				296	'random',
				297	'ratio',
				298	'regex',
				299	'set',
				300	'sstream',
				301	'stack',
				302	'stdexcept',
				303	'streambuf',
				304	'string',
				305	'strstream',
				306	'system_error',
				307	'thread',
				308	'tuple',
				309	'typeindex',
				310	'typeinfo',
				311	'type_traits',
				312	'unordered_map',
				313	'unordered_set',
				314	'utility',
erg@google.com	5d00c56	2013-07-12 19:57:05 +0000	[diff] [blame]	315	'valarray',
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	316	'vector',
				317	# 17.6.1.2 C++ headers for C library facilities
				318	'cassert',
				319	'ccomplex',
				320	'cctype',
				321	'cerrno',
				322	'cfenv',
				323	'cfloat',
				324	'cinttypes',
				325	'ciso646',
				326	'climits',
				327	'clocale',
				328	'cmath',
				329	'csetjmp',
				330	'csignal',
				331	'cstdalign',
				332	'cstdarg',
				333	'cstdbool',
				334	'cstddef',
				335	'cstdint',
				336	'cstdio',
				337	'cstdlib',
				338	'cstring',
				339	'ctgmath',
				340	'ctime',
				341	'cuchar',
				342	'cwchar',
				343	'cwctype',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	344	])
				345
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	346	# Assertion macros. These are defined in base/logging.h and
				347	# testing/base/gunit.h. Note that the _M versions need to come first
				348	# for substring matching to work.
				349	_CHECK_MACROS = [
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	350	'DCHECK', 'CHECK',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	351	'EXPECT_TRUE_M', 'EXPECT_TRUE',
				352	'ASSERT_TRUE_M', 'ASSERT_TRUE',
				353	'EXPECT_FALSE_M', 'EXPECT_FALSE',
				354	'ASSERT_FALSE_M', 'ASSERT_FALSE',
				355	]
				356
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	357	# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	358	_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
				359
				360	for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
				361	('>=', 'GE'), ('>', 'GT'),
				362	('<=', 'LE'), ('<', 'LT')]:
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	363	_CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	364	_CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
				365	_CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
				366	_CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
				367	_CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
				368	_CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
				369
				370	for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
				371	('>=', 'LT'), ('>', 'LE'),
				372	('<=', 'GT'), ('<', 'GE')]:
				373	_CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
				374	_CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
				375	_CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
				376	_CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
				377
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	378	# Alternative tokens and their replacements. For full list, see section 2.5
				379	# Alternative tokens [lex.digraph] in the C++ standard.
				380	#
				381	# Digraphs (such as '%:') are not included here since it's a mess to
				382	# match those on a word boundary.
				383	_ALT_TOKEN_REPLACEMENT = {
				384	'and': '&&',
				385	'bitor': '\|',
				386	'or': '\|\|',
				387	'xor': '^',
				388	'compl': '~',
				389	'bitand': '&',
				390	'and_eq': '&=',
				391	'or_eq': '\|=',
				392	'xor_eq': '^=',
				393	'not': '!',
				394	'not_eq': '!='
				395	}
				396
				397	# Compile regular expression that matches all the above keywords. The "[ =()]"
				398	# bit is meant to avoid matching these keywords outside of boolean expressions.
				399	#
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	400	# False positives include C-style multi-line comments and multi-line strings
				401	# but those have always been troublesome for cpplint.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	402	_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
				403	r'[ =()](' + ('\|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]\|$)')
				404
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	405
				406	# These constants define types of headers for use with
				407	# _IncludeState.CheckNextIncludeOrder().
				408	_C_SYS_HEADER = 1
				409	_CPP_SYS_HEADER = 2
				410	_LIKELY_MY_HEADER = 3
				411	_POSSIBLE_MY_HEADER = 4
				412	_OTHER_HEADER = 5
				413
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	414	# These constants define the current inline assembly state
				415	_NO_ASM = 0 # Outside of inline assembly block
				416	_INSIDE_ASM = 1 # Inside inline assembly block
				417	_END_ASM = 2 # Last line of inline assembly block
				418	_BLOCK_ASM = 3 # The whole block is an inline assembly block
				419
				420	# Match start of assembly blocks
				421	_MATCH_ASM = re.compile(r'^\s*(?:asm\|_asm\|__asm\|__asm__)'
				422	r'(?:\s+(volatile\|__volatile__))?'
				423	r'\s*[{(]')
				424
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	425
				426	_regexp_compile_cache = {}
				427
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	428	# Finds occurrences of NOLINT or NOLINT(...).
				429	_RE_SUPPRESSION = re.compile(r'\bNOLINT\b($[^)]*$)?')
				430
				431	# {str, set(int)}: a map from error categories to sets of linenumbers
				432	# on which those errors are expected and should be suppressed.
				433	_error_suppressions = {}
				434
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	435	# The root directory used for deriving header guard CPP variable.
				436	# This is set by --root flag.
				437	_root = None
				438
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame^]	439	# The allowed line length of files.
				440	# This is set by --linelength flag.
				441	_line_length = 80
				442
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	443	def ParseNolintSuppressions(filename, raw_line, linenum, error):
				444	"""Updates the global list of error-suppressions.
				445
				446	Parses any NOLINT comments on the current line, updating the global
				447	error_suppressions store. Reports an error if the NOLINT comment
				448	was malformed.
				449
				450	Args:
				451	filename: str, the name of the input file.
				452	raw_line: str, the line of input text, with comments.
				453	linenum: int, the number of the current line.
				454	error: function, an error handler.
				455	"""
				456	# FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	457	matched = _RE_SUPPRESSION.search(raw_line)
				458	if matched:
				459	category = matched.group(1)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	460	if category in (None, '(*)'): # => "suppress all"
				461	_error_suppressions.setdefault(None, set()).add(linenum)
				462	else:
				463	if category.startswith('(') and category.endswith(')'):
				464	category = category[1:-1]
				465	if category in _ERROR_CATEGORIES:
				466	_error_suppressions.setdefault(category, set()).add(linenum)
				467	else:
				468	error(filename, linenum, 'readability/nolint', 5,
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	469	'Unknown NOLINT error category: %s' % category)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	470
				471
				472	def ResetNolintSuppressions():
				473	"Resets the set of NOLINT suppressions to empty."
				474	_error_suppressions.clear()
				475
				476
				477	def IsErrorSuppressedByNolint(category, linenum):
				478	"""Returns true if the specified error category is suppressed on this line.
				479
				480	Consults the global error_suppressions map populated by
				481	ParseNolintSuppressions/ResetNolintSuppressions.
				482
				483	Args:
				484	category: str, the category of the error.
				485	linenum: int, the current line number.
				486	Returns:
				487	bool, True iff the error should be suppressed due to a NOLINT comment.
				488	"""
				489	return (linenum in _error_suppressions.get(category, set()) or
				490	linenum in _error_suppressions.get(None, set()))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	491
				492	def Match(pattern, s):
				493	"""Matches the string with the pattern, caching the compiled regexp."""
				494	# The regexp compilation caching is inlined in both Match and Search for
				495	# performance reasons; factoring it out into a separate function turns out
				496	# to be noticeably expensive.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	497	if pattern not in _regexp_compile_cache:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	498	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				499	return _regexp_compile_cache[pattern].match(s)
				500
				501
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	502	def ReplaceAll(pattern, rep, s):
				503	"""Replaces instances of pattern in a string with a replacement.
				504
				505	The compiled regex is kept in a cache shared by Match and Search.
				506
				507	Args:
				508	pattern: regex pattern
				509	rep: replacement text
				510	s: search string
				511
				512	Returns:
				513	string with replacements made (or original string if no replacements)
				514	"""
				515	if pattern not in _regexp_compile_cache:
				516	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				517	return _regexp_compile_cache[pattern].sub(rep, s)
				518
				519
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	520	def Search(pattern, s):
				521	"""Searches the string for the pattern, caching the compiled regexp."""
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	522	if pattern not in _regexp_compile_cache:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	523	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				524	return _regexp_compile_cache[pattern].search(s)
				525
				526
				527	class _IncludeState(dict):
				528	"""Tracks line numbers for includes, and the order in which includes appear.
				529
				530	As a dict, an _IncludeState object serves as a mapping between include
				531	filename and line number on which that file was included.
				532
				533	Call CheckNextIncludeOrder() once for each header in the file, passing
				534	in the type constants defined above. Calls in an illegal order will
				535	raise an _IncludeError with an appropriate error message.
				536
				537	"""
				538	# self._section will move monotonically through this set. If it ever
				539	# needs to move backwards, CheckNextIncludeOrder will raise an error.
				540	_INITIAL_SECTION = 0
				541	_MY_H_SECTION = 1
				542	_C_SECTION = 2
				543	_CPP_SECTION = 3
				544	_OTHER_H_SECTION = 4
				545
				546	_TYPE_NAMES = {
				547	_C_SYS_HEADER: 'C system header',
				548	_CPP_SYS_HEADER: 'C++ system header',
				549	_LIKELY_MY_HEADER: 'header this file implements',
				550	_POSSIBLE_MY_HEADER: 'header this file may implement',
				551	_OTHER_HEADER: 'other header',
				552	}
				553	_SECTION_NAMES = {
				554	_INITIAL_SECTION: "... nothing. (This can't be an error.)",
				555	_MY_H_SECTION: 'a header this file implements',
				556	_C_SECTION: 'C system header',
				557	_CPP_SECTION: 'C++ system header',
				558	_OTHER_H_SECTION: 'other header',
				559	}
				560
				561	def __init__(self):
				562	dict.__init__(self)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	563	self.ResetSection()
				564
				565	def ResetSection(self):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	566	# The name of the current section.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	567	self._section = self._INITIAL_SECTION
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	568	# The path of last found header.
				569	self._last_header = ''
				570
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	571	def SetLastHeader(self, header_path):
				572	self._last_header = header_path
				573
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	574	def CanonicalizeAlphabeticalOrder(self, header_path):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	575	"""Returns a path canonicalized for alphabetical comparison.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	576
				577	- replaces "-" with "_" so they both cmp the same.
				578	- removes '-inl' since we don't require them to be after the main header.
				579	- lowercase everything, just in case.
				580
				581	Args:
				582	header_path: Path to be canonicalized.
				583
				584	Returns:
				585	Canonicalized path.
				586	"""
				587	return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
				588
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	589	def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	590	"""Check if a header is in alphabetical order with the previous header.
				591
				592	Args:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	593	clean_lines: A CleansedLines instance containing the file.
				594	linenum: The number of the line to check.
				595	header_path: Canonicalized header to be checked.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	596
				597	Returns:
				598	Returns true if the header is in alphabetical order.
				599	"""
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	600	# If previous section is different from current section, _last_header will
				601	# be reset to empty string, so it's always less than current header.
				602	#
				603	# If previous line was a blank line, assume that the headers are
				604	# intentionally sorted the way they are.
				605	if (self._last_header > header_path and
				606	not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	607	return False
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	608	return True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	609
				610	def CheckNextIncludeOrder(self, header_type):
				611	"""Returns a non-empty error message if the next header is out of order.
				612
				613	This function also updates the internal state to be ready to check
				614	the next include.
				615
				616	Args:
				617	header_type: One of the _XXX_HEADER constants defined above.
				618
				619	Returns:
				620	The empty string if the header is in the right order, or an
				621	error message describing what's wrong.
				622
				623	"""
				624	error_message = ('Found %s after %s' %
				625	(self._TYPE_NAMES[header_type],
				626	self._SECTION_NAMES[self._section]))
				627
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	628	last_section = self._section
				629
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	630	if header_type == _C_SYS_HEADER:
				631	if self._section <= self._C_SECTION:
				632	self._section = self._C_SECTION
				633	else:
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	634	self._last_header = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	635	return error_message
				636	elif header_type == _CPP_SYS_HEADER:
				637	if self._section <= self._CPP_SECTION:
				638	self._section = self._CPP_SECTION
				639	else:
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	640	self._last_header = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	641	return error_message
				642	elif header_type == _LIKELY_MY_HEADER:
				643	if self._section <= self._MY_H_SECTION:
				644	self._section = self._MY_H_SECTION
				645	else:
				646	self._section = self._OTHER_H_SECTION
				647	elif header_type == _POSSIBLE_MY_HEADER:
				648	if self._section <= self._MY_H_SECTION:
				649	self._section = self._MY_H_SECTION
				650	else:
				651	# This will always be the fallback because we're not sure
				652	# enough that the header is associated with this file.
				653	self._section = self._OTHER_H_SECTION
				654	else:
				655	assert header_type == _OTHER_HEADER
				656	self._section = self._OTHER_H_SECTION
				657
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	658	if last_section != self._section:
				659	self._last_header = ''
				660
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	661	return ''
				662
				663
				664	class _CppLintState(object):
				665	"""Maintains module-wide state.."""
				666
				667	def __init__(self):
				668	self.verbose_level = 1 # global setting.
				669	self.error_count = 0 # global count of reported errors
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	670	# filters to apply when emitting error messages
				671	self.filters = _DEFAULT_FILTERS[:]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	672	self.counting = 'total' # In what way are we counting errors?
				673	self.errors_by_category = {} # string to int dict storing error counts
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	674
				675	# output format:
				676	# "emacs" - format that emacs can parse (default)
				677	# "vs7" - format that Microsoft Visual Studio 7 can parse
				678	self.output_format = 'emacs'
				679
				680	def SetOutputFormat(self, output_format):
				681	"""Sets the output format for errors."""
				682	self.output_format = output_format
				683
				684	def SetVerboseLevel(self, level):
				685	"""Sets the module's verbosity, and returns the previous setting."""
				686	last_verbose_level = self.verbose_level
				687	self.verbose_level = level
				688	return last_verbose_level
				689
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	690	def SetCountingStyle(self, counting_style):
				691	"""Sets the module's counting options."""
				692	self.counting = counting_style
				693
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	694	def SetFilters(self, filters):
				695	"""Sets the error-message filters.
				696
				697	These filters are applied when deciding whether to emit a given
				698	error message.
				699
				700	Args:
				701	filters: A string of comma-separated filters (eg "+whitespace/indent").
				702	Each filter should start with + or -; else we die.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	703
				704	Raises:
				705	ValueError: The comma-separated filters did not all start with '+' or '-'.
				706	E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	707	"""
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	708	# Default filters always have less priority than the flag ones.
				709	self.filters = _DEFAULT_FILTERS[:]
				710	for filt in filters.split(','):
				711	clean_filt = filt.strip()
				712	if clean_filt:
				713	self.filters.append(clean_filt)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	714	for filt in self.filters:
				715	if not (filt.startswith('+') or filt.startswith('-')):
				716	raise ValueError('Every filter in --filters must start with + or -'
				717	' (%s does not)' % filt)
				718
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	719	def ResetErrorCounts(self):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	720	"""Sets the module's error statistic back to zero."""
				721	self.error_count = 0
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	722	self.errors_by_category = {}
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	723
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	724	def IncrementErrorCount(self, category):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	725	"""Bumps the module's error statistic."""
				726	self.error_count += 1
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	727	if self.counting in ('toplevel', 'detailed'):
				728	if self.counting != 'detailed':
				729	category = category.split('/')[0]
				730	if category not in self.errors_by_category:
				731	self.errors_by_category[category] = 0
				732	self.errors_by_category[category] += 1
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	733
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	734	def PrintErrorCounts(self):
				735	"""Print a summary of errors by category, and the total."""
				736	for category, count in self.errors_by_category.iteritems():
				737	sys.stderr.write('Category \'%s\' errors found: %d\n' %
				738	(category, count))
				739	sys.stderr.write('Total errors found: %d\n' % self.error_count)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	740
				741	_cpplint_state = _CppLintState()
				742
				743
				744	def _OutputFormat():
				745	"""Gets the module's output format."""
				746	return _cpplint_state.output_format
				747
				748
				749	def _SetOutputFormat(output_format):
				750	"""Sets the module's output format."""
				751	_cpplint_state.SetOutputFormat(output_format)
				752
				753
				754	def _VerboseLevel():
				755	"""Returns the module's verbosity setting."""
				756	return _cpplint_state.verbose_level
				757
				758
				759	def _SetVerboseLevel(level):
				760	"""Sets the module's verbosity, and returns the previous setting."""
				761	return _cpplint_state.SetVerboseLevel(level)
				762
				763
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	764	def _SetCountingStyle(level):
				765	"""Sets the module's counting options."""
				766	_cpplint_state.SetCountingStyle(level)
				767
				768
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	769	def _Filters():
				770	"""Returns the module's list of output filters, as a list."""
				771	return _cpplint_state.filters
				772
				773
				774	def _SetFilters(filters):
				775	"""Sets the module's error-message filters.
				776
				777	These filters are applied when deciding whether to emit a given
				778	error message.
				779
				780	Args:
				781	filters: A string of comma-separated filters (eg "whitespace/indent").
				782	Each filter should start with + or -; else we die.
				783	"""
				784	_cpplint_state.SetFilters(filters)
				785
				786
				787	class _FunctionState(object):
				788	"""Tracks current function name and the number of lines in its body."""
				789
				790	_NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
				791	_TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
				792
				793	def __init__(self):
				794	self.in_a_function = False
				795	self.lines_in_function = 0
				796	self.current_function = ''
				797
				798	def Begin(self, function_name):
				799	"""Start analyzing function body.
				800
				801	Args:
				802	function_name: The name of the function being tracked.
				803	"""
				804	self.in_a_function = True
				805	self.lines_in_function = 0
				806	self.current_function = function_name
				807
				808	def Count(self):
				809	"""Count line in current function body."""
				810	if self.in_a_function:
				811	self.lines_in_function += 1
				812
				813	def Check(self, error, filename, linenum):
				814	"""Report if too many lines in function body.
				815
				816	Args:
				817	error: The function to call with any errors found.
				818	filename: The name of the current file.
				819	linenum: The number of the line to check.
				820	"""
				821	if Match(r'T(EST\|est)', self.current_function):
				822	base_trigger = self._TEST_TRIGGER
				823	else:
				824	base_trigger = self._NORMAL_TRIGGER
				825	trigger = base_trigger * 2**_VerboseLevel()
				826
				827	if self.lines_in_function > trigger:
				828	error_level = int(math.log(self.lines_in_function / base_trigger, 2))
				829	# 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
				830	if error_level > 5:
				831	error_level = 5
				832	error(filename, linenum, 'readability/fn_size', error_level,
				833	'Small and focused functions are preferred:'
				834	' %s has %d non-comment lines'
				835	' (error triggered by exceeding %d lines).' % (
				836	self.current_function, self.lines_in_function, trigger))
				837
				838	def End(self):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	839	"""Stop analyzing function body."""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	840	self.in_a_function = False
				841
				842
				843	class _IncludeError(Exception):
				844	"""Indicates a problem with the include order in a file."""
				845	pass
				846
				847
				848	class FileInfo:
				849	"""Provides utility functions for filenames.
				850
				851	FileInfo provides easy access to the components of a file's path
				852	relative to the project root.
				853	"""
				854
				855	def __init__(self, filename):
				856	self._filename = filename
				857
				858	def FullName(self):
				859	"""Make Windows paths like Unix."""
				860	return os.path.abspath(self._filename).replace('\\', '/')
				861
				862	def RepositoryName(self):
				863	"""FullName after removing the local path to the repository.
				864
				865	If we have a real absolute path name here we can try to do something smart:
				866	detecting the root of the checkout and truncating /path/to/checkout from
				867	the name so that we get header guards that don't include things like
				868	"C:\Documents and Settings\..." or "/home/username/..." in them and thus
				869	people on different computers who have checked the source out to different
				870	locations won't see bogus errors.
				871	"""
				872	fullname = self.FullName()
				873
				874	if os.path.exists(fullname):
				875	project_dir = os.path.dirname(fullname)
				876
				877	if os.path.exists(os.path.join(project_dir, ".svn")):
				878	# If there's a .svn file in the current directory, we recursively look
				879	# up the directory tree for the top of the SVN checkout
				880	root_dir = project_dir
				881	one_up_dir = os.path.dirname(root_dir)
				882	while os.path.exists(os.path.join(one_up_dir, ".svn")):
				883	root_dir = os.path.dirname(root_dir)
				884	one_up_dir = os.path.dirname(one_up_dir)
				885
				886	prefix = os.path.commonprefix([root_dir, project_dir])
				887	return fullname[len(prefix) + 1:]
				888
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	889	# Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
				890	# searching up from the current path.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	891	root_dir = os.path.dirname(fullname)
				892	while (root_dir != os.path.dirname(root_dir) and
erg@google.com	5e16969	2010-01-28 20:17:01 +0000	[diff] [blame]	893	not os.path.exists(os.path.join(root_dir, ".git")) and
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	894	not os.path.exists(os.path.join(root_dir, ".hg")) and
				895	not os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	896	root_dir = os.path.dirname(root_dir)
erg@google.com	42e59b0	2010-10-04 22:18:07 +0000	[diff] [blame]	897
				898	if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@google.com	3dc7426	2011-11-30 01:12:00 +0000	[diff] [blame]	899	os.path.exists(os.path.join(root_dir, ".hg")) or
				900	os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com	42e59b0	2010-10-04 22:18:07 +0000	[diff] [blame]	901	prefix = os.path.commonprefix([root_dir, project_dir])
				902	return fullname[len(prefix) + 1:]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	903
				904	# Don't know what to do; header guard warnings may be wrong...
				905	return fullname
				906
				907	def Split(self):
				908	"""Splits the file into the directory, basename, and extension.
				909
				910	For 'chrome/browser/browser.cc', Split() would
				911	return ('chrome/browser', 'browser', '.cc')
				912
				913	Returns:
				914	A tuple of (directory, basename, extension).
				915	"""
				916
				917	googlename = self.RepositoryName()
				918	project, rest = os.path.split(googlename)
				919	return (project,) + os.path.splitext(rest)
				920
				921	def BaseName(self):
				922	"""File base name - text after the final slash, before the final period."""
				923	return self.Split()[1]
				924
				925	def Extension(self):
				926	"""File extension - text following the final period."""
				927	return self.Split()[2]
				928
				929	def NoExtension(self):
				930	"""File has no source file extension."""
				931	return '/'.join(self.Split()[0:2])
				932
				933	def IsSource(self):
				934	"""File has a source file extension."""
				935	return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
				936
				937
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	938	def _ShouldPrintError(category, confidence, linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	939	"""If confidence >= verbose, category passes filter and is not suppressed."""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	940
				941	# There are three ways we might decide not to print an error message:
				942	# a "NOLINT(category)" comment appears in the source,
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	943	# the verbosity level isn't high enough, or the filters filter it out.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	944	if IsErrorSuppressedByNolint(category, linenum):
				945	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	946	if confidence < _cpplint_state.verbose_level:
				947	return False
				948
				949	is_filtered = False
				950	for one_filter in _Filters():
				951	if one_filter.startswith('-'):
				952	if category.startswith(one_filter[1:]):
				953	is_filtered = True
				954	elif one_filter.startswith('+'):
				955	if category.startswith(one_filter[1:]):
				956	is_filtered = False
				957	else:
				958	assert False # should have been checked for in SetFilter.
				959	if is_filtered:
				960	return False
				961
				962	return True
				963
				964
				965	def Error(filename, linenum, category, confidence, message):
				966	"""Logs the fact we've found a lint error.
				967
				968	We log where the error was found, and also our confidence in the error,
				969	that is, how certain we are this is a legitimate style regression, and
				970	not a misidentification or a use that's sometimes justified.
				971
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	972	False positives can be suppressed by the use of
				973	"cpplint(category)" comments on the offending line. These are
				974	parsed into _error_suppressions.
				975
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	976	Args:
				977	filename: The name of the file containing the error.
				978	linenum: The number of the line containing the error.
				979	category: A string used to describe the "category" this bug
				980	falls under: "whitespace", say, or "runtime". Categories
				981	may have a hierarchy separated by slashes: "whitespace/indent".
				982	confidence: A number from 1-5 representing a confidence score for
				983	the error, with 5 meaning that we are certain of the problem,
				984	and 1 meaning that it could be a legitimate construct.
				985	message: The error message.
				986	"""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	987	if _ShouldPrintError(category, confidence, linenum):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	988	_cpplint_state.IncrementErrorCount(category)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	989	if _cpplint_state.output_format == 'vs7':
				990	sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
				991	filename, linenum, message, category, confidence))
erg@google.com	02c27fd	2013-05-28 21:34:34 +0000	[diff] [blame]	992	elif _cpplint_state.output_format == 'eclipse':
				993	sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
				994	filename, linenum, message, category, confidence))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	995	else:
				996	sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
				997	filename, linenum, message, category, confidence))
				998
				999
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1000	# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1001	_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
				1002	r'\\([abfnrtv?"\\\']\|\d+\|x[0-9a-fA-F]+)')
				1003	# Matches strings. Escape codes should already be removed by ESCAPES.
				1004	_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
				1005	# Matches characters. Escape codes should already be removed by ESCAPES.
				1006	_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
				1007	# Matches multi-line C++ comments.
				1008	# This RE is a little bit more complicated than one might expect, because we
				1009	# have to take care of space removals tools so we can handle comments inside
				1010	# statements better.
				1011	# The current rule is: We only clear spaces from both sides when we're at the
				1012	# end of the line. Otherwise, we try to remove spaces from the right side,
				1013	# if this doesn't work we try on left side but only if there's a non-character
				1014	# on the right.
				1015	_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
				1016	r"""(\s/\.\/\s*$\|
				1017	/\.\*/\s+\|
				1018	\s+/\.\*/(?=\W)\|
				1019	/\.\*/)""", re.VERBOSE)
				1020
				1021
				1022	def IsCppString(line):
				1023	"""Does line terminate so, that the next symbol is in string constant.
				1024
				1025	This function does not consider single-line nor multi-line comments.
				1026
				1027	Args:
				1028	line: is a partial line of code starting from the 0..n.
				1029
				1030	Returns:
				1031	True, if next character appended to 'line' is inside a
				1032	string constant.
				1033	"""
				1034
				1035	line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
				1036	return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
				1037
				1038
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1039	def CleanseRawStrings(raw_lines):
				1040	"""Removes C++11 raw strings from lines.
				1041
				1042	Before:
				1043	static const char kData[] = R"(
				1044	multi-line string
				1045	)";
				1046
				1047	After:
				1048	static const char kData[] = ""
				1049	(replaced by blank line)
				1050	"";
				1051
				1052	Args:
				1053	raw_lines: list of raw lines.
				1054
				1055	Returns:
				1056	list of lines with C++11 raw strings replaced by empty strings.
				1057	"""
				1058
				1059	delimiter = None
				1060	lines_without_raw_strings = []
				1061	for line in raw_lines:
				1062	if delimiter:
				1063	# Inside a raw string, look for the end
				1064	end = line.find(delimiter)
				1065	if end >= 0:
				1066	# Found the end of the string, match leading space for this
				1067	# line and resume copying the original lines, and also insert
				1068	# a "" on the last line.
				1069	leading_space = Match(r'^(\s*)\S', line)
				1070	line = leading_space.group(1) + '""' + line[end + len(delimiter):]
				1071	delimiter = None
				1072	else:
				1073	# Haven't found the end yet, append a blank line.
				1074	line = ''
				1075
				1076	else:
				1077	# Look for beginning of a raw string.
				1078	# See 2.14.15 [lex.string] for syntax.
				1079	matched = Match(r'^(.)\b(?:R\|u8R\|uR\|UR\|LR)"([^\s\\()])\((.*)$', line)
				1080	if matched:
				1081	delimiter = ')' + matched.group(2) + '"'
				1082
				1083	end = matched.group(3).find(delimiter)
				1084	if end >= 0:
				1085	# Raw string ended on same line
				1086	line = (matched.group(1) + '""' +
				1087	matched.group(3)[end + len(delimiter):])
				1088	delimiter = None
				1089	else:
				1090	# Start of a multi-line raw string
				1091	line = matched.group(1) + '""'
				1092
				1093	lines_without_raw_strings.append(line)
				1094
				1095	# TODO(unknown): if delimiter is not None here, we might want to
				1096	# emit a warning for unterminated string.
				1097	return lines_without_raw_strings
				1098
				1099
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1100	def FindNextMultiLineCommentStart(lines, lineix):
				1101	"""Find the beginning marker for a multiline comment."""
				1102	while lineix < len(lines):
				1103	if lines[lineix].strip().startswith('/*'):
				1104	# Only return this marker if the comment goes beyond this line
				1105	if lines[lineix].strip().find('*/', 2) < 0:
				1106	return lineix
				1107	lineix += 1
				1108	return len(lines)
				1109
				1110
				1111	def FindNextMultiLineCommentEnd(lines, lineix):
				1112	"""We are inside a comment, find the end marker."""
				1113	while lineix < len(lines):
				1114	if lines[lineix].strip().endswith('*/'):
				1115	return lineix
				1116	lineix += 1
				1117	return len(lines)
				1118
				1119
				1120	def RemoveMultiLineCommentsFromRange(lines, begin, end):
				1121	"""Clears a range of lines for multi-line comments."""
				1122	# Having // dummy comments makes the lines non-empty, so we will not get
				1123	# unnecessary blank line warnings later in the code.
				1124	for i in range(begin, end):
				1125	lines[i] = '// dummy'
				1126
				1127
				1128	def RemoveMultiLineComments(filename, lines, error):
				1129	"""Removes multiline (c-style) comments from lines."""
				1130	lineix = 0
				1131	while lineix < len(lines):
				1132	lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
				1133	if lineix_begin >= len(lines):
				1134	return
				1135	lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
				1136	if lineix_end >= len(lines):
				1137	error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
				1138	'Could not find end of multi-line comment')
				1139	return
				1140	RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
				1141	lineix = lineix_end + 1
				1142
				1143
				1144	def CleanseComments(line):
				1145	"""Removes //-comments and single-line C-style /* */ comments.
				1146
				1147	Args:
				1148	line: A line of C++ source.
				1149
				1150	Returns:
				1151	The line with single-line comments removed.
				1152	"""
				1153	commentpos = line.find('//')
				1154	if commentpos != -1 and not IsCppString(line[:commentpos]):
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	1155	line = line[:commentpos].rstrip()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1156	# get rid of /* ... */
				1157	return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
				1158
				1159
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1160	class CleansedLines(object):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1161	"""Holds 3 copies of all lines with different preprocessing applied to them.
				1162
				1163	1) elided member contains lines without strings and comments,
				1164	2) lines member contains lines without comments, and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1165	3) raw_lines member contains all the lines without processing.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1166	All these three members are of <type 'list'>, and of the same length.
				1167	"""
				1168
				1169	def __init__(self, lines):
				1170	self.elided = []
				1171	self.lines = []
				1172	self.raw_lines = lines
				1173	self.num_lines = len(lines)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1174	self.lines_without_raw_strings = CleanseRawStrings(lines)
				1175	for linenum in range(len(self.lines_without_raw_strings)):
				1176	self.lines.append(CleanseComments(
				1177	self.lines_without_raw_strings[linenum]))
				1178	elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1179	self.elided.append(CleanseComments(elided))
				1180
				1181	def NumLines(self):
				1182	"""Returns the number of lines represented."""
				1183	return self.num_lines
				1184
				1185	@staticmethod
				1186	def _CollapseStrings(elided):
				1187	"""Collapses strings and chars on a line to simple "" or '' blocks.
				1188
				1189	We nix strings first so we're not fooled by text like '"http://"'
				1190
				1191	Args:
				1192	elided: The line being processed.
				1193
				1194	Returns:
				1195	The line with collapsed strings.
				1196	"""
				1197	if not _RE_PATTERN_INCLUDE.match(elided):
				1198	# Remove escaped characters first to make quote/single quote collapsing
				1199	# basic. Things that look like escaped characters shouldn't occur
				1200	# outside of strings and chars.
				1201	elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
				1202	elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
				1203	elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
				1204	return elided
				1205
				1206
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1207	def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
				1208	"""Find the position just after the matching endchar.
				1209
				1210	Args:
				1211	line: a CleansedLines line.
				1212	startpos: start searching at this position.
				1213	depth: nesting level at startpos.
				1214	startchar: expression opening character.
				1215	endchar: expression closing character.
				1216
				1217	Returns:
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1218	On finding matching endchar: (index just after matching endchar, 0)
				1219	Otherwise: (-1, new depth at end of this line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1220	"""
				1221	for i in xrange(startpos, len(line)):
				1222	if line[i] == startchar:
				1223	depth += 1
				1224	elif line[i] == endchar:
				1225	depth -= 1
				1226	if depth == 0:
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1227	return (i + 1, 0)
				1228	return (-1, depth)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1229
				1230
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1231	def CloseExpression(clean_lines, linenum, pos):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1232	"""If input points to ( or { or [ or <, finds the position that closes it.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1233
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1234	If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1235	linenum/pos that correspond to the closing of the expression.
				1236
				1237	Args:
				1238	clean_lines: A CleansedLines instance containing the file.
				1239	linenum: The number of the line to check.
				1240	pos: A position on the line.
				1241
				1242	Returns:
				1243	A tuple (line, linenum, pos) pointer past the closing brace, or
				1244	(line, len(lines), -1) if we never find a close. Note we ignore
				1245	strings and comments when matching; and the line we return is the
				1246	'cleansed' line at linenum.
				1247	"""
				1248
				1249	line = clean_lines.elided[linenum]
				1250	startchar = line[pos]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1251	if startchar not in '({[<':
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1252	return (line, clean_lines.NumLines(), -1)
				1253	if startchar == '(': endchar = ')'
				1254	if startchar == '[': endchar = ']'
				1255	if startchar == '{': endchar = '}'
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1256	if startchar == '<': endchar = '>'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1257
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1258	# Check first line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1259	(end_pos, num_open) = FindEndOfExpressionInLine(
				1260	line, pos, 0, startchar, endchar)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1261	if end_pos > -1:
				1262	return (line, linenum, end_pos)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1263
				1264	# Continue scanning forward
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1265	while linenum < clean_lines.NumLines() - 1:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1266	linenum += 1
				1267	line = clean_lines.elided[linenum]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1268	(end_pos, num_open) = FindEndOfExpressionInLine(
				1269	line, 0, num_open, startchar, endchar)
				1270	if end_pos > -1:
				1271	return (line, linenum, end_pos)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1272
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1273	# Did not find endchar before end of file, give up
				1274	return (line, clean_lines.NumLines(), -1)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1275
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1276
				1277	def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
				1278	"""Find position at the matching startchar.
				1279
				1280	This is almost the reverse of FindEndOfExpressionInLine, but note
				1281	that the input position and returned position differs by 1.
				1282
				1283	Args:
				1284	line: a CleansedLines line.
				1285	endpos: start searching at this position.
				1286	depth: nesting level at endpos.
				1287	startchar: expression opening character.
				1288	endchar: expression closing character.
				1289
				1290	Returns:
				1291	On finding matching startchar: (index at matching startchar, 0)
				1292	Otherwise: (-1, new depth at beginning of this line)
				1293	"""
				1294	for i in xrange(endpos, -1, -1):
				1295	if line[i] == endchar:
				1296	depth += 1
				1297	elif line[i] == startchar:
				1298	depth -= 1
				1299	if depth == 0:
				1300	return (i, 0)
				1301	return (-1, depth)
				1302
				1303
				1304	def ReverseCloseExpression(clean_lines, linenum, pos):
				1305	"""If input points to ) or } or ] or >, finds the position that opens it.
				1306
				1307	If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
				1308	linenum/pos that correspond to the opening of the expression.
				1309
				1310	Args:
				1311	clean_lines: A CleansedLines instance containing the file.
				1312	linenum: The number of the line to check.
				1313	pos: A position on the line.
				1314
				1315	Returns:
				1316	A tuple (line, linenum, pos) pointer at the opening brace, or
				1317	(line, 0, -1) if we never find the matching opening brace. Note
				1318	we ignore strings and comments when matching; and the line we
				1319	return is the 'cleansed' line at linenum.
				1320	"""
				1321	line = clean_lines.elided[linenum]
				1322	endchar = line[pos]
				1323	if endchar not in ')}]>':
				1324	return (line, 0, -1)
				1325	if endchar == ')': startchar = '('
				1326	if endchar == ']': startchar = '['
				1327	if endchar == '}': startchar = '{'
				1328	if endchar == '>': startchar = '<'
				1329
				1330	# Check last line
				1331	(start_pos, num_open) = FindStartOfExpressionInLine(
				1332	line, pos, 0, startchar, endchar)
				1333	if start_pos > -1:
				1334	return (line, linenum, start_pos)
				1335
				1336	# Continue scanning backward
				1337	while linenum > 0:
				1338	linenum -= 1
				1339	line = clean_lines.elided[linenum]
				1340	(start_pos, num_open) = FindStartOfExpressionInLine(
				1341	line, len(line) - 1, num_open, startchar, endchar)
				1342	if start_pos > -1:
				1343	return (line, linenum, start_pos)
				1344
				1345	# Did not find startchar before beginning of file, give up
				1346	return (line, 0, -1)
				1347
				1348
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1349	def CheckForCopyright(filename, lines, error):
				1350	"""Logs an error if no Copyright message appears at the top of the file."""
				1351
				1352	# We'll say it should occur by line 10. Don't forget there's a
				1353	# dummy line at the front.
				1354	for line in xrange(1, min(len(lines), 11)):
				1355	if re.search(r'Copyright', lines[line], re.I): break
				1356	else: # means no copyright line was found
				1357	error(filename, 0, 'legal/copyright', 5,
				1358	'No copyright message found. '
				1359	'You should have a line: "Copyright [year] <Copyright Owner>"')
				1360
				1361
				1362	def GetHeaderGuardCPPVariable(filename):
				1363	"""Returns the CPP variable that should be used as a header guard.
				1364
				1365	Args:
				1366	filename: The name of a C++ header file.
				1367
				1368	Returns:
				1369	The CPP variable that should be used as a header guard in the
				1370	named file.
				1371
				1372	"""
				1373
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1374	# Restores original filename in case that cpplint is invoked from Emacs's
				1375	# flymake.
				1376	filename = re.sub(r'_flymake\.h$', '.h', filename)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1377	filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1378
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1379	fileinfo = FileInfo(filename)
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	1380	file_path_from_root = fileinfo.RepositoryName()
				1381	if _root:
				1382	file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
				1383	return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1384
				1385
				1386	def CheckForHeaderGuard(filename, lines, error):
				1387	"""Checks that the file contains a header guard.
				1388
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	1389	Logs an error if no #ifndef header guard is present. For other
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1390	headers, checks that the full pathname is used.
				1391
				1392	Args:
				1393	filename: The name of the C++ header file.
				1394	lines: An array of strings, each representing a line of the file.
				1395	error: The function to call with any errors found.
				1396	"""
				1397
				1398	cppvar = GetHeaderGuardCPPVariable(filename)
				1399
				1400	ifndef = None
				1401	ifndef_linenum = 0
				1402	define = None
				1403	endif = None
				1404	endif_linenum = 0
				1405	for linenum, line in enumerate(lines):
				1406	linesplit = line.split()
				1407	if len(linesplit) >= 2:
				1408	# find the first occurrence of #ifndef and #define, save arg
				1409	if not ifndef and linesplit[0] == '#ifndef':
				1410	# set ifndef to the header guard presented on the #ifndef line.
				1411	ifndef = linesplit[1]
				1412	ifndef_linenum = linenum
				1413	if not define and linesplit[0] == '#define':
				1414	define = linesplit[1]
				1415	# find the last occurrence of #endif, save entire line
				1416	if line.startswith('#endif'):
				1417	endif = line
				1418	endif_linenum = linenum
				1419
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1420	if not ifndef:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1421	error(filename, 0, 'build/header_guard', 5,
				1422	'No #ifndef header guard found, suggested CPP variable is: %s' %
				1423	cppvar)
				1424	return
				1425
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1426	if not define:
				1427	error(filename, 0, 'build/header_guard', 5,
				1428	'No #define header guard found, suggested CPP variable is: %s' %
				1429	cppvar)
				1430	return
				1431
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1432	# The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
				1433	# for backward compatibility.
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1434	if ifndef != cppvar:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1435	error_level = 0
				1436	if ifndef != cppvar + '_':
				1437	error_level = 5
				1438
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1439	ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
				1440	error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1441	error(filename, ifndef_linenum, 'build/header_guard', error_level,
				1442	'#ifndef header guard has wrong style, please use: %s' % cppvar)
				1443
erg@google.com	dc28970	2012-01-26 20:30:03 +0000	[diff] [blame]	1444	if define != ifndef:
				1445	error(filename, 0, 'build/header_guard', 5,
				1446	'#ifndef and #define don\'t match, suggested CPP variable is: %s' %
				1447	cppvar)
				1448	return
				1449
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1450	if endif != ('#endif // %s' % cppvar):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1451	error_level = 0
				1452	if endif != ('#endif // %s' % (cppvar + '_')):
				1453	error_level = 5
				1454
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	1455	ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
				1456	error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1457	error(filename, endif_linenum, 'build/header_guard', error_level,
				1458	'#endif line should be "#endif // %s"' % cppvar)
				1459
				1460
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1461	def CheckForBadCharacters(filename, lines, error):
				1462	"""Logs an error for each line containing bad characters.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1463
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1464	Two kinds of bad characters:
				1465
				1466	1. Unicode replacement characters: These indicate that either the file
				1467	contained invalid UTF-8 (likely) or Unicode replacement characters (which
				1468	it shouldn't). Note that it's possible for this to throw off line
				1469	numbering if the invalid UTF-8 occurred adjacent to a newline.
				1470
				1471	2. NUL bytes. These are problematic for some tools.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1472
				1473	Args:
				1474	filename: The name of the current file.
				1475	lines: An array of strings, each representing a line of the file.
				1476	error: The function to call with any errors found.
				1477	"""
				1478	for linenum, line in enumerate(lines):
				1479	if u'\ufffd' in line:
				1480	error(filename, linenum, 'readability/utf8', 5,
				1481	'Line contains invalid UTF-8 (or Unicode replacement character).')
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1482	if '\0' in line:
				1483	error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1484
				1485
				1486	def CheckForNewlineAtEOF(filename, lines, error):
				1487	"""Logs an error if there is no newline char at the end of the file.
				1488
				1489	Args:
				1490	filename: The name of the current file.
				1491	lines: An array of strings, each representing a line of the file.
				1492	error: The function to call with any errors found.
				1493	"""
				1494
				1495	# The array lines() was created by adding two newlines to the
				1496	# original file (go figure), then splitting on \n.
				1497	# To verify that the file ends in \n, we just have to make sure the
				1498	# last-but-two element of lines() exists and is empty.
				1499	if len(lines) < 3 or lines[-2]:
				1500	error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
				1501	'Could not find a newline character at the end of the file.')
				1502
				1503
				1504	def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
				1505	"""Logs an error if we see /* ... */ or "..." that extend past one line.
				1506
				1507	/* ... */ comments are legit inside macros, for one line.
				1508	Otherwise, we prefer // comments, so it's ok to warn about the
				1509	other. Likewise, it's ok for strings to extend across multiple
				1510	lines, as long as a line continuation character (backslash)
				1511	terminates each line. Although not currently prohibited by the C++
				1512	style guide, it's ugly and unnecessary. We don't do well with either
				1513	in this lint program, so we warn about both.
				1514
				1515	Args:
				1516	filename: The name of the current file.
				1517	clean_lines: A CleansedLines instance containing the file.
				1518	linenum: The number of the line to check.
				1519	error: The function to call with any errors found.
				1520	"""
				1521	line = clean_lines.elided[linenum]
				1522
				1523	# Remove all \\ (escaped backslashes) from the line. They are OK, and the
				1524	# second (escaped) slash may trigger later \" detection erroneously.
				1525	line = line.replace('\\\\', '')
				1526
				1527	if line.count('/') > line.count('/'):
				1528	error(filename, linenum, 'readability/multiline_comment', 5,
				1529	'Complex multi-line /.../-style comment found. '
				1530	'Lint may give bogus warnings. '
				1531	'Consider replacing these with //-style comments, '
				1532	'with #if 0...#endif, '
				1533	'or with more clearly structured multi-line comments.')
				1534
				1535	if (line.count('"') - line.count('\\"')) % 2:
				1536	error(filename, linenum, 'readability/multiline_string', 5,
				1537	'Multi-line string ("...") found. This lint script doesn\'t '
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1538	'do well with such strings, and may give bogus warnings. '
				1539	'Use C++11 raw strings or concatenation instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1540
				1541
				1542	threading_list = (
				1543	('asctime(', 'asctime_r('),
				1544	('ctime(', 'ctime_r('),
				1545	('getgrgid(', 'getgrgid_r('),
				1546	('getgrnam(', 'getgrnam_r('),
				1547	('getlogin(', 'getlogin_r('),
				1548	('getpwnam(', 'getpwnam_r('),
				1549	('getpwuid(', 'getpwuid_r('),
				1550	('gmtime(', 'gmtime_r('),
				1551	('localtime(', 'localtime_r('),
				1552	('rand(', 'rand_r('),
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1553	('strtok(', 'strtok_r('),
				1554	('ttyname(', 'ttyname_r('),
				1555	)
				1556
				1557
				1558	def CheckPosixThreading(filename, clean_lines, linenum, error):
				1559	"""Checks for calls to thread-unsafe functions.
				1560
				1561	Much code has been originally written without consideration of
				1562	multi-threading. Also, engineers are relying on their old experience;
				1563	they have learned posix before threading extensions were added. These
				1564	tests guide the engineers to use thread-safe functions (when using
				1565	posix directly).
				1566
				1567	Args:
				1568	filename: The name of the current file.
				1569	clean_lines: A CleansedLines instance containing the file.
				1570	linenum: The number of the line to check.
				1571	error: The function to call with any errors found.
				1572	"""
				1573	line = clean_lines.elided[linenum]
				1574	for single_thread_function, multithread_safe_function in threading_list:
				1575	ix = line.find(single_thread_function)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1576	# Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1577	if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
				1578	line[ix - 1] not in ('_', '.', '>'))):
				1579	error(filename, linenum, 'runtime/threadsafe_fn', 2,
				1580	'Consider using ' + multithread_safe_function +
				1581	'...) instead of ' + single_thread_function +
				1582	'...) for improved thread safety.')
				1583
				1584
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	1585	def CheckVlogArguments(filename, clean_lines, linenum, error):
				1586	"""Checks that VLOG() is only used for defining a logging level.
				1587
				1588	For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
				1589	VLOG(FATAL) are not.
				1590
				1591	Args:
				1592	filename: The name of the current file.
				1593	clean_lines: A CleansedLines instance containing the file.
				1594	linenum: The number of the line to check.
				1595	error: The function to call with any errors found.
				1596	"""
				1597	line = clean_lines.elided[linenum]
				1598	if Search(r'\bVLOG$(INFO\|ERROR\|WARNING\|DFATAL\|FATAL)$', line):
				1599	error(filename, linenum, 'runtime/vlog', 5,
				1600	'VLOG() should be used with numeric verbosity level. '
				1601	'Use LOG() if you want symbolic severity levels.')
				1602
				1603
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1604	# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1605	# incrementing a value.
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1606	_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1607	r'^\s\\w+(\+\+\|--);')
				1608
				1609
				1610	def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1611	"""Checks for invalid increment *count++.
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1612
				1613	For example following function:
				1614	void increment_counter(int* count) {
				1615	*count++;
				1616	}
				1617	is invalid, because it effectively does count++, moving pointer, and should
				1618	be replaced with ++count, (count)++ or *count += 1.
				1619
				1620	Args:
				1621	filename: The name of the current file.
				1622	clean_lines: A CleansedLines instance containing the file.
				1623	linenum: The number of the line to check.
				1624	error: The function to call with any errors found.
				1625	"""
				1626	line = clean_lines.elided[linenum]
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	1627	if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	1628	error(filename, linenum, 'runtime/invalid_increment', 5,
				1629	'Changing pointer instead of value (or unused value of operator*).')
				1630
				1631
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1632	class _BlockInfo(object):
				1633	"""Stores information about a generic block of code."""
				1634
				1635	def __init__(self, seen_open_brace):
				1636	self.seen_open_brace = seen_open_brace
				1637	self.open_parentheses = 0
				1638	self.inline_asm = _NO_ASM
				1639
				1640	def CheckBegin(self, filename, clean_lines, linenum, error):
				1641	"""Run checks that applies to text up to the opening brace.
				1642
				1643	This is mostly for checking the text after the class identifier
				1644	and the "{", usually where the base class is specified. For other
				1645	blocks, there isn't much to check, so we always pass.
				1646
				1647	Args:
				1648	filename: The name of the current file.
				1649	clean_lines: A CleansedLines instance containing the file.
				1650	linenum: The number of the line to check.
				1651	error: The function to call with any errors found.
				1652	"""
				1653	pass
				1654
				1655	def CheckEnd(self, filename, clean_lines, linenum, error):
				1656	"""Run checks that applies to text after the closing brace.
				1657
				1658	This is mostly used for checking end of namespace comments.
				1659
				1660	Args:
				1661	filename: The name of the current file.
				1662	clean_lines: A CleansedLines instance containing the file.
				1663	linenum: The number of the line to check.
				1664	error: The function to call with any errors found.
				1665	"""
				1666	pass
				1667
				1668
				1669	class _ClassInfo(_BlockInfo):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1670	"""Stores information about a class."""
				1671
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1672	def __init__(self, name, class_or_struct, clean_lines, linenum):
				1673	_BlockInfo.__init__(self, False)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1674	self.name = name
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1675	self.starting_linenum = linenum
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1676	self.is_derived = False
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1677	if class_or_struct == 'struct':
				1678	self.access = 'public'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1679	self.is_struct = True
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1680	else:
				1681	self.access = 'private'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1682	self.is_struct = False
				1683
				1684	# Remember initial indentation level for this class. Using raw_lines here
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1685	# instead of elided to account for leading comments.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1686	initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
				1687	if initial_indent:
				1688	self.class_indent = len(initial_indent.group(1))
				1689	else:
				1690	self.class_indent = 0
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1691
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	1692	# Try to find the end of the class. This will be confused by things like:
				1693	# class A {
				1694	# } *x = { ...
				1695	#
				1696	# But it's still good enough for CheckSectionSpacing.
				1697	self.last_line = 0
				1698	depth = 0
				1699	for i in range(linenum, clean_lines.NumLines()):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1700	line = clean_lines.elided[i]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	1701	depth += line.count('{') - line.count('}')
				1702	if not depth:
				1703	self.last_line = i
				1704	break
				1705
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1706	def CheckBegin(self, filename, clean_lines, linenum, error):
				1707	# Look for a bare ':'
				1708	if Search('(^\|[^:]):($\|[^:])', clean_lines.elided[linenum]):
				1709	self.is_derived = True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1710
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1711	def CheckEnd(self, filename, clean_lines, linenum, error):
				1712	# Check that closing brace is aligned with beginning of the class.
				1713	# Only do this if the closing brace is indented by only whitespaces.
				1714	# This means we will not check single-line class definitions.
				1715	indent = Match(r'^( *)\}', clean_lines.elided[linenum])
				1716	if indent and len(indent.group(1)) != self.class_indent:
				1717	if self.is_struct:
				1718	parent = 'struct ' + self.name
				1719	else:
				1720	parent = 'class ' + self.name
				1721	error(filename, linenum, 'whitespace/indent', 3,
				1722	'Closing brace should be aligned with beginning of %s' % parent)
				1723
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1724
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1725	class _NamespaceInfo(_BlockInfo):
				1726	"""Stores information about a namespace."""
				1727
				1728	def __init__(self, name, linenum):
				1729	_BlockInfo.__init__(self, False)
				1730	self.name = name or ''
				1731	self.starting_linenum = linenum
				1732
				1733	def CheckEnd(self, filename, clean_lines, linenum, error):
				1734	"""Check end of namespace comments."""
				1735	line = clean_lines.raw_lines[linenum]
				1736
				1737	# Check how many lines is enclosed in this namespace. Don't issue
				1738	# warning for missing namespace comments if there aren't enough
				1739	# lines. However, do apply checks if there is already an end of
				1740	# namespace comment and it's incorrect.
				1741	#
				1742	# TODO(unknown): We always want to check end of namespace comments
				1743	# if a namespace is large, but sometimes we also want to apply the
				1744	# check if a short namespace contained nontrivial things (something
				1745	# other than forward declarations). There is currently no logic on
				1746	# deciding what these nontrivial things are, so this check is
				1747	# triggered by namespace size only, which works most of the time.
				1748	if (linenum - self.starting_linenum < 10
				1749	and not Match(r'};\s(//\|/\).\bnamespace\b', line)):
				1750	return
				1751
				1752	# Look for matching comment at end of namespace.
				1753	#
				1754	# Note that we accept C style "/* */" comments for terminating
				1755	# namespaces, so that code that terminate namespaces inside
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1756	# preprocessor macros can be cpplint clean.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1757	#
				1758	# We also accept stuff like "// end of namespace <name>." with the
				1759	# period at the end.
				1760	#
				1761	# Besides these, we don't accept anything else, otherwise we might
				1762	# get false negatives when existing comment is a substring of the
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1763	# expected namespace.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1764	if self.name:
				1765	# Named namespace
				1766	if not Match((r'};\s(//\|/\).\bnamespace\s+' + re.escape(self.name) +
				1767	r'[\/\.\\\s]$'),
				1768	line):
				1769	error(filename, linenum, 'readability/namespace', 5,
				1770	'Namespace should be terminated with "// namespace %s"' %
				1771	self.name)
				1772	else:
				1773	# Anonymous namespace
				1774	if not Match(r'};\s(//\|/\).\bnamespace[\/\.\\\s]$', line):
				1775	error(filename, linenum, 'readability/namespace', 5,
				1776	'Namespace should be terminated with "// namespace"')
				1777
				1778
				1779	class _PreprocessorInfo(object):
				1780	"""Stores checkpoints of nesting stacks when #if/#else is seen."""
				1781
				1782	def __init__(self, stack_before_if):
				1783	# The entire nesting stack before #if
				1784	self.stack_before_if = stack_before_if
				1785
				1786	# The entire nesting stack up to #else
				1787	self.stack_before_else = []
				1788
				1789	# Whether we have already seen #else or #elif
				1790	self.seen_else = False
				1791
				1792
				1793	class _NestingState(object):
				1794	"""Holds states related to parsing braces."""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1795
				1796	def __init__(self):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1797	# Stack for tracking all braces. An object is pushed whenever we
				1798	# see a "{", and popped when we see a "}". Only 3 types of
				1799	# objects are possible:
				1800	# - _ClassInfo: a class or struct.
				1801	# - _NamespaceInfo: a namespace.
				1802	# - _BlockInfo: some other type of block.
				1803	self.stack = []
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	1804
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1805	# Stack of _PreprocessorInfo objects.
				1806	self.pp_stack = []
				1807
				1808	def SeenOpenBrace(self):
				1809	"""Check if we have seen the opening brace for the innermost block.
				1810
				1811	Returns:
				1812	True if we have seen the opening brace, False if the innermost
				1813	block is still expecting an opening brace.
				1814	"""
				1815	return (not self.stack) or self.stack[-1].seen_open_brace
				1816
				1817	def InNamespaceBody(self):
				1818	"""Check if we are currently one level inside a namespace body.
				1819
				1820	Returns:
				1821	True if top of the stack is a namespace block, False otherwise.
				1822	"""
				1823	return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
				1824
				1825	def UpdatePreprocessor(self, line):
				1826	"""Update preprocessor stack.
				1827
				1828	We need to handle preprocessors due to classes like this:
				1829	#ifdef SWIG
				1830	struct ResultDetailsPageElementExtensionPoint {
				1831	#else
				1832	struct ResultDetailsPageElementExtensionPoint : public Extension {
				1833	#endif
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1834
				1835	We make the following assumptions (good enough for most files):
				1836	- Preprocessor condition evaluates to true from #if up to first
				1837	#else/#elif/#endif.
				1838
				1839	- Preprocessor condition evaluates to false from #else/#elif up
				1840	to #endif. We still perform lint checks on these lines, but
				1841	these do not affect nesting stack.
				1842
				1843	Args:
				1844	line: current line to check.
				1845	"""
				1846	if Match(r'^\s#\s(if\|ifdef\|ifndef)\b', line):
				1847	# Beginning of #if block, save the nesting stack here. The saved
				1848	# stack will allow us to restore the parsing state in the #else case.
				1849	self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
				1850	elif Match(r'^\s#\s(else\|elif)\b', line):
				1851	# Beginning of #else block
				1852	if self.pp_stack:
				1853	if not self.pp_stack[-1].seen_else:
				1854	# This is the first #else or #elif block. Remember the
				1855	# whole nesting stack up to this point. This is what we
				1856	# keep after the #endif.
				1857	self.pp_stack[-1].seen_else = True
				1858	self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
				1859
				1860	# Restore the stack to how it was before the #if
				1861	self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
				1862	else:
				1863	# TODO(unknown): unexpected #else, issue warning?
				1864	pass
				1865	elif Match(r'^\s#\sendif\b', line):
				1866	# End of #if or #else blocks.
				1867	if self.pp_stack:
				1868	# If we saw an #else, we will need to restore the nesting
				1869	# stack to its former state before the #else, otherwise we
				1870	# will just continue from where we left off.
				1871	if self.pp_stack[-1].seen_else:
				1872	# Here we can just use a shallow copy since we are the last
				1873	# reference to it.
				1874	self.stack = self.pp_stack[-1].stack_before_else
				1875	# Drop the corresponding #if
				1876	self.pp_stack.pop()
				1877	else:
				1878	# TODO(unknown): unexpected #endif, issue warning?
				1879	pass
				1880
				1881	def Update(self, filename, clean_lines, linenum, error):
				1882	"""Update nesting state with current line.
				1883
				1884	Args:
				1885	filename: The name of the current file.
				1886	clean_lines: A CleansedLines instance containing the file.
				1887	linenum: The number of the line to check.
				1888	error: The function to call with any errors found.
				1889	"""
				1890	line = clean_lines.elided[linenum]
				1891
				1892	# Update pp_stack first
				1893	self.UpdatePreprocessor(line)
				1894
				1895	# Count parentheses. This is to avoid adding struct arguments to
				1896	# the nesting stack.
				1897	if self.stack:
				1898	inner_block = self.stack[-1]
				1899	depth_change = line.count('(') - line.count(')')
				1900	inner_block.open_parentheses += depth_change
				1901
				1902	# Also check if we are starting or ending an inline assembly block.
				1903	if inner_block.inline_asm in (_NO_ASM, _END_ASM):
				1904	if (depth_change != 0 and
				1905	inner_block.open_parentheses == 1 and
				1906	_MATCH_ASM.match(line)):
				1907	# Enter assembly block
				1908	inner_block.inline_asm = _INSIDE_ASM
				1909	else:
				1910	# Not entering assembly block. If previous line was _END_ASM,
				1911	# we will now shift to _NO_ASM state.
				1912	inner_block.inline_asm = _NO_ASM
				1913	elif (inner_block.inline_asm == _INSIDE_ASM and
				1914	inner_block.open_parentheses == 0):
				1915	# Exit assembly block
				1916	inner_block.inline_asm = _END_ASM
				1917
				1918	# Consume namespace declaration at the beginning of the line. Do
				1919	# this in a loop so that we catch same line declarations like this:
				1920	# namespace proto2 { namespace bridge { class MessageSet; } }
				1921	while True:
				1922	# Match start of namespace. The "\b\s*" below catches namespace
				1923	# declarations even if it weren't followed by a whitespace, this
				1924	# is so that we don't confuse our namespace checker. The
				1925	# missing spaces will be flagged by CheckSpacing.
				1926	namespace_decl_match = Match(r'^\snamespace\b\s([:\w]+)?(.*)$', line)
				1927	if not namespace_decl_match:
				1928	break
				1929
				1930	new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
				1931	self.stack.append(new_namespace)
				1932
				1933	line = namespace_decl_match.group(2)
				1934	if line.find('{') != -1:
				1935	new_namespace.seen_open_brace = True
				1936	line = line[line.find('{') + 1:]
				1937
				1938	# Look for a class declaration in whatever is left of the line
				1939	# after parsing namespaces. The regexp accounts for decorated classes
				1940	# such as in:
				1941	# class LOCKABLE API Object {
				1942	# };
				1943	#
				1944	# Templates with class arguments may confuse the parser, for example:
				1945	# template <class T
				1946	# class Comparator = less<T>,
				1947	# class Vector = vector<T> >
				1948	# class HeapQueue {
				1949	#
				1950	# Because this parser has no nesting state about templates, by the
				1951	# time it saw "class Comparator", it may think that it's a new class.
				1952	# Nested templates have a similar problem:
				1953	# template <
				1954	# typename ExportedType,
				1955	# typename TupleType,
				1956	# template <typename, typename> class ImplTemplate>
				1957	#
				1958	# To avoid these cases, we ignore classes that are followed by '=' or '>'
				1959	class_decl_match = Match(
				1960	r'\s(template\s<[\w\s<>,:]>\s)?'
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1961	r'(class\|struct)\s+([A-Z_]+\s+)(\w+(?:::\w+))'
				1962	r'(([^=>]\|<[^<>]>\|<[^<>]<[^<>]>\s>)*)$', line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1963	if (class_decl_match and
				1964	(not self.stack or self.stack[-1].open_parentheses == 0)):
				1965	self.stack.append(_ClassInfo(
				1966	class_decl_match.group(4), class_decl_match.group(2),
				1967	clean_lines, linenum))
				1968	line = class_decl_match.group(5)
				1969
				1970	# If we have not yet seen the opening brace for the innermost block,
				1971	# run checks here.
				1972	if not self.SeenOpenBrace():
				1973	self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
				1974
				1975	# Update access control if we are inside a class/struct
				1976	if self.stack and isinstance(self.stack[-1], _ClassInfo):
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1977	classinfo = self.stack[-1]
				1978	access_match = Match(
				1979	r'^(.)\b(public\|private\|protected\|signals)(\s+(?:slots\s)?)?'
				1980	r':(?:[^:]\|$)',
				1981	line)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	1982	if access_match:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1983	classinfo.access = access_match.group(2)
				1984
				1985	# Check that access keywords are indented +1 space. Skip this
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	1986	# check if the keywords are not preceded by whitespaces.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	1987	indent = access_match.group(1)
				1988	if (len(indent) != classinfo.class_indent + 1 and
				1989	Match(r'^\s*$', indent)):
				1990	if classinfo.is_struct:
				1991	parent = 'struct ' + classinfo.name
				1992	else:
				1993	parent = 'class ' + classinfo.name
				1994	slots = ''
				1995	if access_match.group(3):
				1996	slots = access_match.group(3)
				1997	error(filename, linenum, 'whitespace/indent', 3,
				1998	'%s%s: should be indented +1 space inside %s' % (
				1999	access_match.group(2), slots, parent))
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2000
				2001	# Consume braces or semicolons from what's left of the line
				2002	while True:
				2003	# Match first brace, semicolon, or closed parenthesis.
				2004	matched = Match(r'^[^{;)}]([{;)}])(.)$', line)
				2005	if not matched:
				2006	break
				2007
				2008	token = matched.group(1)
				2009	if token == '{':
				2010	# If namespace or class hasn't seen a opening brace yet, mark
				2011	# namespace/class head as complete. Push a new block onto the
				2012	# stack otherwise.
				2013	if not self.SeenOpenBrace():
				2014	self.stack[-1].seen_open_brace = True
				2015	else:
				2016	self.stack.append(_BlockInfo(True))
				2017	if _MATCH_ASM.match(line):
				2018	self.stack[-1].inline_asm = _BLOCK_ASM
				2019	elif token == ';' or token == ')':
				2020	# If we haven't seen an opening brace yet, but we already saw
				2021	# a semicolon, this is probably a forward declaration. Pop
				2022	# the stack for these.
				2023	#
				2024	# Similarly, if we haven't seen an opening brace yet, but we
				2025	# already saw a closing parenthesis, then these are probably
				2026	# function arguments with extra "class" or "struct" keywords.
				2027	# Also pop these stack for these.
				2028	if not self.SeenOpenBrace():
				2029	self.stack.pop()
				2030	else: # token == '}'
				2031	# Perform end of block checks and pop the stack.
				2032	if self.stack:
				2033	self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
				2034	self.stack.pop()
				2035	line = matched.group(2)
				2036
				2037	def InnermostClass(self):
				2038	"""Get class info on the top of the stack.
				2039
				2040	Returns:
				2041	A _ClassInfo object if we are inside a class, or None otherwise.
				2042	"""
				2043	for i in range(len(self.stack), 0, -1):
				2044	classinfo = self.stack[i - 1]
				2045	if isinstance(classinfo, _ClassInfo):
				2046	return classinfo
				2047	return None
				2048
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2049	def CheckCompletedBlocks(self, filename, error):
				2050	"""Checks that all classes and namespaces have been completely parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2051
				2052	Call this when all lines in a file have been processed.
				2053	Args:
				2054	filename: The name of the current file.
				2055	error: The function to call with any errors found.
				2056	"""
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2057	# Note: This test can result in false positives if #ifdef constructs
				2058	# get in the way of brace matching. See the testBuildClass test in
				2059	# cpplint_unittest.py for an example of this.
				2060	for obj in self.stack:
				2061	if isinstance(obj, _ClassInfo):
				2062	error(filename, obj.starting_linenum, 'build/class', 5,
				2063	'Failed to find complete declaration of class %s' %
				2064	obj.name)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2065	elif isinstance(obj, _NamespaceInfo):
				2066	error(filename, obj.starting_linenum, 'build/namespaces', 5,
				2067	'Failed to find complete declaration of namespace %s' %
				2068	obj.name)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2069
				2070
				2071	def CheckForNonStandardConstructs(filename, clean_lines, linenum,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2072	nesting_state, error):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2073	r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2074
				2075	Complain about several constructs which gcc-2 accepts, but which are
				2076	not standard C++. Warning about these in lint is one way to ease the
				2077	transition to new compilers.
				2078	- put storage class first (e.g. "static const" instead of "const static").
				2079	- "%lld" instead of %qd" in printf-type functions.
				2080	- "%1$d" is non-standard in printf-type functions.
				2081	- "\%" is an undefined character escape sequence.
				2082	- text after #endif is not allowed.
				2083	- invalid inner-style forward declaration.
				2084	- >? and <? operators, and their >?= and <?= cousins.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2085
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2086	Additionally, check for constructor/destructor style violations and reference
				2087	members, as it is very convenient to do so while checking for
				2088	gcc-2 compliance.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2089
				2090	Args:
				2091	filename: The name of the current file.
				2092	clean_lines: A CleansedLines instance containing the file.
				2093	linenum: The number of the line to check.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2094	nesting_state: A _NestingState instance which maintains information about
				2095	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2096	error: A callable to which errors are reported, which takes 4 arguments:
				2097	filename, line number, error level, and message
				2098	"""
				2099
				2100	# Remove comments from the line, but leave in strings for now.
				2101	line = clean_lines.lines[linenum]
				2102
				2103	if Search(r'printf\s\(.".%[-+ ]?\dq', line):
				2104	error(filename, linenum, 'runtime/printf_format', 3,
				2105	'%q in format strings is deprecated. Use %ll instead.')
				2106
				2107	if Search(r'printf\s\(.".*%\d+\$', line):
				2108	error(filename, linenum, 'runtime/printf_format', 2,
				2109	'%N$ formats are unconventional. Try rewriting to avoid them.')
				2110
				2111	# Remove escaped backslashes before looking for undefined escapes.
				2112	line = line.replace('\\\\', '')
				2113
				2114	if Search(r'("\|\').*\\(%\|\[\|\(\|{)', line):
				2115	error(filename, linenum, 'build/printf_format', 3,
				2116	'%, [, (, and { are undefined character escapes. Unescape them.')
				2117
				2118	# For the rest, work with both comments and strings removed.
				2119	line = clean_lines.elided[linenum]
				2120
				2121	if Search(r'\b(const\|volatile\|void\|char\|short\|int\|long'
				2122	r'\|float\|double\|signed\|unsigned'
				2123	r'\|schar\|u?int8\|u?int16\|u?int32\|u?int64)'
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2124	r'\s+(register\|static\|extern\|typedef)\b',
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2125	line):
				2126	error(filename, linenum, 'build/storage_class', 5,
				2127	'Storage class (static, extern, typedef, etc) should be first.')
				2128
				2129	if Match(r'\s#\sendif\s*[^/\s]+', line):
				2130	error(filename, linenum, 'build/endif_comment', 5,
				2131	'Uncommented text after #endif is non-standard. Use a comment.')
				2132
				2133	if Match(r'\sclass\s+(\w+\s::\s)+\w+\s;', line):
				2134	error(filename, linenum, 'build/forward_decl', 5,
				2135	'Inner-style forward declarations are invalid. Remove this line.')
				2136
				2137	if Search(r'(\w+\|[+-]?\d+(\.\d)?)\s(<\|>)\?=?\s(\w+\|[+-]?\d+)(\.\d)?',
				2138	line):
				2139	error(filename, linenum, 'build/deprecated', 3,
				2140	'>? and <? (max and min) operators are non-standard and deprecated.')
				2141
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	2142	if Search(r'^\sconst\sstring\s&\s\w+\s*;', line):
				2143	# TODO(unknown): Could it be expanded safely to arbitrary references,
				2144	# without triggering too many false positives? The first
				2145	# attempt triggered 5 warnings for mostly benign code in the regtest, hence
				2146	# the restriction.
				2147	# Here's the original regexp, for the reference:
				2148	# type_name = r'\w+((\s::\s\w+)\|(\s<\s\w+?\s*>))?'
				2149	# r'\sconst\s' + type_name + '\s&\s\w+\s*;'
				2150	error(filename, linenum, 'runtime/member_string_references', 2,
				2151	'const string& members are dangerous. It is much better to use '
				2152	'alternatives, such as pointers or simple constants.')
				2153
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2154	# Everything else in this function operates on class declarations.
				2155	# Return early if the top of the nesting stack is not a class, or if
				2156	# the class head is not completed yet.
				2157	classinfo = nesting_state.InnermostClass()
				2158	if not classinfo or not classinfo.seen_open_brace:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2159	return
				2160
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2161	# The class may have been declared with namespace or classname qualifiers.
				2162	# The constructor and destructor will not have those qualifiers.
				2163	base_classname = classinfo.name.split('::')[-1]
				2164
				2165	# Look for single-argument constructors that aren't marked explicit.
				2166	# Technically a valid construct, but against style.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2167	args = Match(r'\s+(?:inline\s+)?%s\s*$([^,()]+)$'
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2168	% re.escape(base_classname),
				2169	line)
				2170	if (args and
				2171	args.group(1) != 'void' and
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	2172	not Match(r'(const\s+)?%s(\s+const)?\s(?:<\w+>\s)?&'
				2173	% re.escape(base_classname), args.group(1).strip())):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2174	error(filename, linenum, 'runtime/explicit', 5,
				2175	'Single-argument constructors should be marked explicit.')
				2176
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2177
				2178	def CheckSpacingForFunctionCall(filename, line, linenum, error):
				2179	"""Checks for the correctness of various spacing around function calls.
				2180
				2181	Args:
				2182	filename: The name of the current file.
				2183	line: The text of the line to check.
				2184	linenum: The number of the line to check.
				2185	error: The function to call with any errors found.
				2186	"""
				2187
				2188	# Since function calls often occur inside if/for/while/switch
				2189	# expressions - which have their own, more liberal conventions - we
				2190	# first see if we should be looking inside such an expression for a
				2191	# function call, to which we can apply more strict standards.
				2192	fncall = line # if there's no control flow construct, look at whole line
				2193	for pattern in (r'\bif\s$(.)$\s*{',
				2194	r'\bfor\s$(.)$\s*{',
				2195	r'\bwhile\s$(.)$\s*[{;]',
				2196	r'\bswitch\s$(.)$\s*{'):
				2197	match = Search(pattern, line)
				2198	if match:
				2199	fncall = match.group(1) # look inside the parens for function calls
				2200	break
				2201
				2202	# Except in if/for/while/switch, there should never be space
				2203	# immediately inside parens (eg "f( 3, 4 )"). We make an exception
				2204	# for nested parens ( (a+b) + c ). Likewise, there should never be
				2205	# a space before a ( when it's a function argument. I assume it's a
				2206	# function argument when the char before the whitespace is legal in
				2207	# a function name (alnum + _) and we're not starting a macro. Also ignore
				2208	# pointers and references to arrays and functions coz they're too tricky:
				2209	# we use a very simple way to recognize these:
				2210	# " (something)(maybe-something)" or
				2211	# " (something)(maybe-something," or
				2212	# " (something)[something]"
				2213	# Note that we assume the contents of [] to be short enough that
				2214	# they'll never need to wrap.
				2215	if ( # Ignore control structures.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2216	not Search(r'\b(if\|for\|while\|switch\|return\|new\|delete\|catch\|sizeof)\b',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2217	fncall) and
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2218	# Ignore pointers/references to functions.
				2219	not Search(r' $[^)]+$$[^)]*($\|,$)', fncall) and
				2220	# Ignore pointers/references to arrays.
				2221	not Search(r' $[^)]+$\[[^\]]+\]', fncall)):
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	2222	if Search(r'\w\s\(\s(?!\s\\$)', fncall): # a ( used for a fn call
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2223	error(filename, linenum, 'whitespace/parens', 4,
				2224	'Extra space after ( in function call')
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	2225	elif Search(r'$\s+(?!(\s*\$\|\()', fncall):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2226	error(filename, linenum, 'whitespace/parens', 2,
				2227	'Extra space after (')
				2228	if (Search(r'\w\s+\(', fncall) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2229	not Search(r'#\s*define\|typedef', fncall) and
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2230	not Search(r'\w\s+$(\w+::)\\w+$\(', fncall)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2231	error(filename, linenum, 'whitespace/parens', 4,
				2232	'Extra space before ( in function call')
				2233	# If the ) is followed only by a newline or a { + newline, assume it's
				2234	# part of a control statement (if/while/etc), and don't complain
				2235	if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2236	# If the closing parenthesis is preceded by only whitespaces,
				2237	# try to give a more descriptive error message.
				2238	if Search(r'^\s+\)', fncall):
				2239	error(filename, linenum, 'whitespace/parens', 2,
				2240	'Closing ) should be moved to the previous line')
				2241	else:
				2242	error(filename, linenum, 'whitespace/parens', 2,
				2243	'Extra space before )')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2244
				2245
				2246	def IsBlankLine(line):
				2247	"""Returns true if the given line is blank.
				2248
				2249	We consider a line to be blank if the line is empty or consists of
				2250	only white spaces.
				2251
				2252	Args:
				2253	line: A line of a string.
				2254
				2255	Returns:
				2256	True, if the given line is blank.
				2257	"""
				2258	return not line or line.isspace()
				2259
				2260
				2261	def CheckForFunctionLengths(filename, clean_lines, linenum,
				2262	function_state, error):
				2263	"""Reports for long function bodies.
				2264
				2265	For an overview why this is done, see:
				2266	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
				2267
				2268	Uses a simplistic algorithm assuming other style guidelines
				2269	(especially spacing) are followed.
				2270	Only checks unindented functions, so class members are unchecked.
				2271	Trivial bodies are unchecked, so constructors with huge initializer lists
				2272	may be missed.
				2273	Blank/comment lines are not counted so as to avoid encouraging the removal
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2274	of vertical space and comments just to get through a lint check.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2275	NOLINT on the last line of a function disables this check.
				2276
				2277	Args:
				2278	filename: The name of the current file.
				2279	clean_lines: A CleansedLines instance containing the file.
				2280	linenum: The number of the line to check.
				2281	function_state: Current function name and lines in body so far.
				2282	error: The function to call with any errors found.
				2283	"""
				2284	lines = clean_lines.lines
				2285	line = lines[linenum]
				2286	raw = clean_lines.raw_lines
				2287	raw_line = raw[linenum]
				2288	joined_line = ''
				2289
				2290	starting_func = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2291	regexp = r'(\w(\w\|::\|\\|\&\|\s))\(' # decls * & space::name( ...
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2292	match_result = Match(regexp, line)
				2293	if match_result:
				2294	# If the name is all caps and underscores, figure it's a macro and
				2295	# ignore it, unless it's TEST or TEST_F.
				2296	function_name = match_result.group(1).split()[-1]
				2297	if function_name == 'TEST' or function_name == 'TEST_F' or (
				2298	not Match(r'[A-Z_]+$', function_name)):
				2299	starting_func = True
				2300
				2301	if starting_func:
				2302	body_found = False
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2303	for start_linenum in xrange(linenum, clean_lines.NumLines()):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2304	start_line = lines[start_linenum]
				2305	joined_line += ' ' + start_line.lstrip()
				2306	if Search(r'(;\|})', start_line): # Declarations and trivial functions
				2307	body_found = True
				2308	break # ... ignore
				2309	elif Search(r'{', start_line):
				2310	body_found = True
				2311	function = Search(r'((\w\|:)*)\(', line).group(1)
				2312	if Match(r'TEST', function): # Handle TEST... macros
				2313	parameter_regexp = Search(r'($.*$)', joined_line)
				2314	if parameter_regexp: # Ignore bad syntax
				2315	function += parameter_regexp.group(1)
				2316	else:
				2317	function += '()'
				2318	function_state.Begin(function)
				2319	break
				2320	if not body_found:
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	2321	# No body for the function (or evidence of a non-function) was found.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2322	error(filename, linenum, 'readability/fn_size', 5,
				2323	'Lint failed to find start of function body.')
				2324	elif Match(r'^\}\s*$', line): # function end
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	2325	function_state.Check(error, filename, linenum)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2326	function_state.End()
				2327	elif not Match(r'^\s*$', line):
				2328	function_state.Count() # Count non-blank/non-comment lines.
				2329
				2330
				2331	_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO($.+?$)?:?(\s\|$)?')
				2332
				2333
				2334	def CheckComment(comment, filename, linenum, error):
				2335	"""Checks for common mistakes in TODO comments.
				2336
				2337	Args:
				2338	comment: The text of the comment from the line in question.
				2339	filename: The name of the current file.
				2340	linenum: The number of the line to check.
				2341	error: The function to call with any errors found.
				2342	"""
				2343	match = _RE_PATTERN_TODO.match(comment)
				2344	if match:
				2345	# One whitespace is correct; zero whitespace is handled elsewhere.
				2346	leading_whitespace = match.group(1)
				2347	if len(leading_whitespace) > 1:
				2348	error(filename, linenum, 'whitespace/todo', 2,
				2349	'Too many spaces before TODO')
				2350
				2351	username = match.group(2)
				2352	if not username:
				2353	error(filename, linenum, 'readability/todo', 2,
				2354	'Missing username in TODO; it should look like '
				2355	'"// TODO(my_username): Stuff."')
				2356
				2357	middle_whitespace = match.group(3)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2358	# Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2359	if middle_whitespace != ' ' and middle_whitespace != '':
				2360	error(filename, linenum, 'whitespace/todo', 2,
				2361	'TODO(my_username) should be followed by a space')
				2362
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2363	def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
				2364	"""Checks for improper use of DISALLOW* macros.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2365
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2366	Args:
				2367	filename: The name of the current file.
				2368	clean_lines: A CleansedLines instance containing the file.
				2369	linenum: The number of the line to check.
				2370	nesting_state: A _NestingState instance which maintains information about
				2371	the current stack of nested blocks being parsed.
				2372	error: The function to call with any errors found.
				2373	"""
				2374	line = clean_lines.elided[linenum] # get rid of comments and strings
				2375
				2376	matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN\|'
				2377	r'DISALLOW_EVIL_CONSTRUCTORS\|'
				2378	r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
				2379	if not matched:
				2380	return
				2381	if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
				2382	if nesting_state.stack[-1].access != 'private':
				2383	error(filename, linenum, 'readability/constructors', 3,
				2384	'%s must be in the private: section' % matched.group(1))
				2385
				2386	else:
				2387	# Found DISALLOW* macro outside a class declaration, or perhaps it
				2388	# was used inside a function when it should have been part of the
				2389	# class declaration. We could issue a warning here, but it
				2390	# probably resulted in a compiler error already.
				2391	pass
				2392
				2393
				2394	def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
				2395	"""Find the corresponding > to close a template.
				2396
				2397	Args:
				2398	clean_lines: A CleansedLines instance containing the file.
				2399	linenum: Current line number.
				2400	init_suffix: Remainder of the current line after the initial <.
				2401
				2402	Returns:
				2403	True if a matching bracket exists.
				2404	"""
				2405	line = init_suffix
				2406	nesting_stack = ['<']
				2407	while True:
				2408	# Find the next operator that can tell us whether < is used as an
				2409	# opening bracket or as a less-than operator. We only want to
				2410	# warn on the latter case.
				2411	#
				2412	# We could also check all other operators and terminate the search
				2413	# early, e.g. if we got something like this "a<b+c", the "<" is
				2414	# most likely a less-than operator, but then we will get false
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2415	# positives for default arguments and other template expressions.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2416	match = Search(r'^[^<>(),;\[\]]([<>(),;\[\]])(.)$', line)
				2417	if match:
				2418	# Found an operator, update nesting stack
				2419	operator = match.group(1)
				2420	line = match.group(2)
				2421
				2422	if nesting_stack[-1] == '<':
				2423	# Expecting closing angle bracket
				2424	if operator in ('<', '(', '['):
				2425	nesting_stack.append(operator)
				2426	elif operator == '>':
				2427	nesting_stack.pop()
				2428	if not nesting_stack:
				2429	# Found matching angle bracket
				2430	return True
				2431	elif operator == ',':
				2432	# Got a comma after a bracket, this is most likely a template
				2433	# argument. We have not seen a closing angle bracket yet, but
				2434	# it's probably a few lines later if we look for it, so just
				2435	# return early here.
				2436	return True
				2437	else:
				2438	# Got some other operator.
				2439	return False
				2440
				2441	else:
				2442	# Expecting closing parenthesis or closing bracket
				2443	if operator in ('<', '(', '['):
				2444	nesting_stack.append(operator)
				2445	elif operator in (')', ']'):
				2446	# We don't bother checking for matching () or []. If we got
				2447	# something like (] or [), it would have been a syntax error.
				2448	nesting_stack.pop()
				2449
				2450	else:
				2451	# Scan the next line
				2452	linenum += 1
				2453	if linenum >= len(clean_lines.elided):
				2454	break
				2455	line = clean_lines.elided[linenum]
				2456
				2457	# Exhausted all remaining lines and still no matching angle bracket.
				2458	# Most likely the input was incomplete, otherwise we should have
				2459	# seen a semicolon and returned early.
				2460	return True
				2461
				2462
				2463	def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
				2464	"""Find the corresponding < that started a template.
				2465
				2466	Args:
				2467	clean_lines: A CleansedLines instance containing the file.
				2468	linenum: Current line number.
				2469	init_prefix: Part of the current line before the initial >.
				2470
				2471	Returns:
				2472	True if a matching bracket exists.
				2473	"""
				2474	line = init_prefix
				2475	nesting_stack = ['>']
				2476	while True:
				2477	# Find the previous operator
				2478	match = Search(r'^(.)([<>(),;\[\]])[^<>(),;\[\]]$', line)
				2479	if match:
				2480	# Found an operator, update nesting stack
				2481	operator = match.group(2)
				2482	line = match.group(1)
				2483
				2484	if nesting_stack[-1] == '>':
				2485	# Expecting opening angle bracket
				2486	if operator in ('>', ')', ']'):
				2487	nesting_stack.append(operator)
				2488	elif operator == '<':
				2489	nesting_stack.pop()
				2490	if not nesting_stack:
				2491	# Found matching angle bracket
				2492	return True
				2493	elif operator == ',':
				2494	# Got a comma before a bracket, this is most likely a
				2495	# template argument. The opening angle bracket is probably
				2496	# there if we look for it, so just return early here.
				2497	return True
				2498	else:
				2499	# Got some other operator.
				2500	return False
				2501
				2502	else:
				2503	# Expecting opening parenthesis or opening bracket
				2504	if operator in ('>', ')', ']'):
				2505	nesting_stack.append(operator)
				2506	elif operator in ('(', '['):
				2507	nesting_stack.pop()
				2508
				2509	else:
				2510	# Scan the previous line
				2511	linenum -= 1
				2512	if linenum < 0:
				2513	break
				2514	line = clean_lines.elided[linenum]
				2515
				2516	# Exhausted all earlier lines and still no matching angle bracket.
				2517	return False
				2518
				2519
				2520	def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2521	"""Checks for the correctness of various spacing issues in the code.
				2522
				2523	Things we check for: spaces around operators, spaces after
				2524	if/for/while/switch, no spaces around parens in function calls, two
				2525	spaces between code and comment, don't start a block with a blank
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2526	line, don't end a function with a blank line, don't add a blank line
				2527	after public/protected/private, don't have too many blank lines in a row.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2528
				2529	Args:
				2530	filename: The name of the current file.
				2531	clean_lines: A CleansedLines instance containing the file.
				2532	linenum: The number of the line to check.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2533	nesting_state: A _NestingState instance which maintains information about
				2534	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2535	error: The function to call with any errors found.
				2536	"""
				2537
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2538	# Don't use "elided" lines here, otherwise we can't check commented lines.
				2539	# Don't want to use "raw" either, because we don't want to check inside C++11
				2540	# raw strings,
				2541	raw = clean_lines.lines_without_raw_strings
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2542	line = raw[linenum]
				2543
				2544	# Before nixing comments, check if the line is blank for no good
				2545	# reason. This includes the first line after a block is opened, and
				2546	# blank lines at the end of a function (ie, right before a line like '}'
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2547	#
				2548	# Skip all the blank line checks if we are immediately inside a
				2549	# namespace body. In other words, don't issue blank line warnings
				2550	# for this block:
				2551	# namespace {
				2552	#
				2553	# }
				2554	#
				2555	# A warning about missing end of namespace comments will be issued instead.
				2556	if IsBlankLine(line) and not nesting_state.InNamespaceBody():
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2557	elided = clean_lines.elided
				2558	prev_line = elided[linenum - 1]
				2559	prevbrace = prev_line.rfind('{')
				2560	# TODO(unknown): Don't complain if line before blank line, and line after,
				2561	# both start with alnums and are indented the same amount.
				2562	# This ignores whitespace at the start of a namespace block
				2563	# because those are not usually indented.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2564	if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2565	# OK, we have a blank line at the start of a code block. Before we
				2566	# complain, we check if it is an exception to the rule: The previous
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2567	# non-empty line has the parameters of a function header that are indented
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2568	# 4 spaces (because they did not fit in a 80 column line when placed on
				2569	# the same line as the function name). We also check for the case where
				2570	# the previous line is indented 6 spaces, which may happen when the
				2571	# initializers of a constructor do not fit into a 80 column line.
				2572	exception = False
				2573	if Match(r' {6}\w', prev_line): # Initializer list?
				2574	# We are looking for the opening column of initializer list, which
				2575	# should be indented 4 spaces to cause 6 space indentation afterwards.
				2576	search_position = linenum-2
				2577	while (search_position >= 0
				2578	and Match(r' {6}\w', elided[search_position])):
				2579	search_position -= 1
				2580	exception = (search_position >= 0
				2581	and elided[search_position][:5] == ' :')
				2582	else:
				2583	# Search for the function arguments or an initializer list. We use a
				2584	# simple heuristic here: If the line is indented 4 spaces; and we have a
				2585	# closing paren, without the opening paren, followed by an opening brace
				2586	# or colon (for initializer lists) we assume that it is the last line of
				2587	# a function header. If we have a colon indented 4 spaces, it is an
				2588	# initializer list.
				2589	exception = (Match(r' {4}\w[^$]$\s(const\s)?(\{\s$\|:)',
				2590	prev_line)
				2591	or Match(r' {4}:', prev_line))
				2592
				2593	if not exception:
				2594	error(filename, linenum, 'whitespace/blank_line', 2,
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2595	'Redundant blank line at the start of a code block '
				2596	'should be deleted.')
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2597	# Ignore blank lines at the end of a block in a long if-else
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2598	# chain, like this:
				2599	# if (condition1) {
				2600	# // Something followed by a blank line
				2601	#
				2602	# } else if (condition2) {
				2603	# // Something else
				2604	# }
				2605	if linenum + 1 < clean_lines.NumLines():
				2606	next_line = raw[linenum + 1]
				2607	if (next_line
				2608	and Match(r'\s*}', next_line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2609	and next_line.find('} else ') == -1):
				2610	error(filename, linenum, 'whitespace/blank_line', 3,
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2611	'Redundant blank line at the end of a code block '
				2612	'should be deleted.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2613
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2614	matched = Match(r'\s*(public\|protected\|private):', prev_line)
				2615	if matched:
				2616	error(filename, linenum, 'whitespace/blank_line', 3,
				2617	'Do not leave a blank line after "%s:"' % matched.group(1))
				2618
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2619	# Next, we complain if there's a comment too near the text
				2620	commentpos = line.find('//')
				2621	if commentpos != -1:
				2622	# Check if the // may be in quotes. If so, ignore it
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2623	# Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2624	if (line.count('"', 0, commentpos) -
				2625	line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
				2626	# Allow one space for new scopes, two spaces otherwise:
				2627	if (not Match(r'^\s*{ //', line) and
				2628	((commentpos >= 1 and
				2629	line[commentpos-1] not in string.whitespace) or
				2630	(commentpos >= 2 and
				2631	line[commentpos-2] not in string.whitespace))):
				2632	error(filename, linenum, 'whitespace/comments', 2,
				2633	'At least two spaces is best between code and comments')
				2634	# There should always be a space between the // and the comment
				2635	commentend = commentpos + 2
				2636	if commentend < len(line) and not line[commentend] == ' ':
				2637	# but some lines are exceptions -- e.g. if they're big
				2638	# comment delimiters like:
				2639	# //----------------------------------------------------------
erg@google.com	a51c16b	2010-11-17 18:09:31 +0000	[diff] [blame]	2640	# or are an empty C++ style Doxygen comment, like:
				2641	# ///
erg@google.com	6d8d983	2013-10-31 19:46:18 +0000	[diff] [blame]	2642	# or C++ style Doxygen comments placed after the variable:
				2643	# ///< Header comment
				2644	# //!< Header comment
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2645	# or they begin with multiple slashes followed by a space:
				2646	# //////// Header comment
				2647	match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.com	a51c16b	2010-11-17 18:09:31 +0000	[diff] [blame]	2648	Search(r'^/$', line[commentend:]) or
erg@google.com	6d8d983	2013-10-31 19:46:18 +0000	[diff] [blame]	2649	Search(r'^!< ', line[commentend:]) or
				2650	Search(r'^/< ', line[commentend:]) or
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2651	Search(r'^/+ ', line[commentend:]))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2652	if not match:
				2653	error(filename, linenum, 'whitespace/comments', 4,
				2654	'Should have a space between // and comment')
				2655	CheckComment(line[commentpos:], filename, linenum, error)
				2656
				2657	line = clean_lines.elided[linenum] # get rid of comments and strings
				2658
				2659	# Don't try to do spacing checks for operator methods
				2660	line = re.sub(r'operator(==\|!=\|<\|<<\|<=\|>=\|>>\|>)\(', 'operator\(', line)
				2661
				2662	# We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
				2663	# Otherwise not. Note we only check for non-spaces on both sides;
				2664	# sometimes people put non-spaces on one side when aligning ='s among
				2665	# many lines (not that this is behavior that I approve of...)
				2666	if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if\|while) ', line):
				2667	error(filename, linenum, 'whitespace/operators', 4,
				2668	'Missing spaces around =')
				2669
				2670	# It's ok not to have spaces around binary operators like + - * /, but if
				2671	# there's too little whitespace, we get concerned. It's hard to tell,
				2672	# though, so we punt on this one for now. TODO.
				2673
				2674	# You should always have whitespace around binary operators.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2675	#
				2676	# Check <= and >= first to avoid false positives with < and >, then
				2677	# check non-include lines for spacing around < and >.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2678	match = Search(r'[^<>=!\s](==\|!=\|<=\|>=)[^<>=!\s]', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2679	if match:
				2680	error(filename, linenum, 'whitespace/operators', 3,
				2681	'Missing spaces around %s' % match.group(1))
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2682	# We allow no-spaces around << when used like this: 10<<20, but
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2683	# not otherwise (particularly, not when used as streams)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2684	match = Search(r'(\S)(?:L\|UL\|ULL\|l\|ul\|ull)?<<(\S)', line)
				2685	if match and not (match.group(1).isdigit() and match.group(2).isdigit()):
				2686	error(filename, linenum, 'whitespace/operators', 3,
				2687	'Missing spaces around <<')
				2688	elif not Match(r'#.*include', line):
				2689	# Avoid false positives on ->
				2690	reduced_line = line.replace('->', '')
				2691
				2692	# Look for < that is not surrounded by spaces. This is only
				2693	# triggered if both sides are missing spaces, even though
				2694	# technically should should flag if at least one side is missing a
				2695	# space. This is done to avoid some false positives with shifts.
				2696	match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
				2697	if (match and
				2698	not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
				2699	error(filename, linenum, 'whitespace/operators', 3,
				2700	'Missing spaces around <')
				2701
				2702	# Look for > that is not surrounded by spaces. Similar to the
				2703	# above, we only trigger if both sides are missing spaces to avoid
				2704	# false positives with shifts.
				2705	match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
				2706	if (match and
				2707	not FindPreviousMatchingAngleBracket(clean_lines, linenum,
				2708	match.group(1))):
				2709	error(filename, linenum, 'whitespace/operators', 3,
				2710	'Missing spaces around >')
				2711
				2712	# We allow no-spaces around >> for almost anything. This is because
				2713	# C++11 allows ">>" to close nested templates, which accounts for
				2714	# most cases when ">>" is not followed by a space.
				2715	#
				2716	# We still warn on ">>" followed by alpha character, because that is
				2717	# likely due to ">>" being used for right shifts, e.g.:
				2718	# value >> alpha
				2719	#
				2720	# When ">>" is used to close templates, the alphanumeric letter that
				2721	# follows would be part of an identifier, and there should still be
				2722	# a space separating the template type and the identifier.
				2723	# type<type<type>> alpha
				2724	match = Search(r'>>[a-zA-Z_]', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2725	if match:
				2726	error(filename, linenum, 'whitespace/operators', 3,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2727	'Missing spaces around >>')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2728
				2729	# There shouldn't be space around unary operators
				2730	match = Search(r'(!\s\|~\s\|[\s]--[\s;]\|[\s]\+\+[\s;])', line)
				2731	if match:
				2732	error(filename, linenum, 'whitespace/operators', 4,
				2733	'Extra space for operator %s' % match.group(1))
				2734
				2735	# A pet peeve of mine: no spaces after an if, while, switch, or for
				2736	match = Search(r' (if\(\|for\(\|while\(\|switch\()', line)
				2737	if match:
				2738	error(filename, linenum, 'whitespace/parens', 5,
				2739	'Missing space before ( in %s' % match.group(1))
				2740
				2741	# For if/for/while/switch, the left and right parens should be
				2742	# consistent about how many spaces are inside the parens, and
				2743	# there should either be zero or one spaces inside the parens.
				2744	# We don't want: "if ( foo)" or "if ( foo )".
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2745	# Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2746	match = Search(r'\b(if\|for\|while\|switch)\s*'
				2747	r'$([ ])(.).[^ ]+([ ])$\s{\s*$',
				2748	line)
				2749	if match:
				2750	if len(match.group(2)) != len(match.group(4)):
				2751	if not (match.group(3) == ';' and
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	2752	len(match.group(2)) == 1 + len(match.group(4)) or
				2753	not match.group(2) and Search(r'\bfor\s$.; $', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2754	error(filename, linenum, 'whitespace/parens', 5,
				2755	'Mismatching spaces inside () in %s' % match.group(1))
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2756	if len(match.group(2)) not in [0, 1]:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2757	error(filename, linenum, 'whitespace/parens', 5,
				2758	'Should have zero or one spaces inside ( and ) in %s' %
				2759	match.group(1))
				2760
				2761	# You should always have a space after a comma (either as fn arg or operator)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2762	#
				2763	# This does not apply when the non-space character following the
				2764	# comma is another comma, since the only time when that happens is
				2765	# for empty macro arguments.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2766	#
				2767	# We run this check in two passes: first pass on elided lines to
				2768	# verify that lines contain missing whitespaces, second pass on raw
				2769	# lines to confirm that those missing whitespaces are not due to
				2770	# elided comments.
				2771	if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2772	error(filename, linenum, 'whitespace/comma', 3,
				2773	'Missing space after ,')
				2774
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	2775	# You should always have a space after a semicolon
				2776	# except for few corner cases
				2777	# TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
				2778	# space after ;
				2779	if Search(r';[^\s};\\)/]', line):
				2780	error(filename, linenum, 'whitespace/semicolon', 3,
				2781	'Missing space after ;')
				2782
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2783	# Next we will look for issues with function calls.
				2784	CheckSpacingForFunctionCall(filename, line, linenum, error)
				2785
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2786	# Except after an opening paren, or after another opening brace (in case of
				2787	# an initializer list, for instance), you should have spaces before your
				2788	# braces. And since you should never have braces at the beginning of a line,
				2789	# this is an easy test.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2790	match = Match(r'^(.*[^ ({]){', line)
				2791	if match:
				2792	# Try a bit harder to check for brace initialization. This
				2793	# happens in one of the following forms:
				2794	# Constructor() : initializer_list_{} { ... }
				2795	# Constructor{}.MemberFunction()
				2796	# Type variable{};
				2797	# FunctionCall(type{}, ...);
				2798	# LastArgument(..., type{});
				2799	# LOG(INFO) << type{} << " ...";
				2800	# map_of_type[{...}] = ...;
				2801	#
				2802	# We check for the character following the closing brace, and
				2803	# silence the warning if it's one of those listed above, i.e.
				2804	# "{.;,)<]".
				2805	#
				2806	# To account for nested initializer list, we allow any number of
				2807	# closing braces up to "{;,)<". We can't simply silence the
				2808	# warning on first sight of closing brace, because that would
				2809	# cause false negatives for things that are not initializer lists.
				2810	# Silence this: But not this:
				2811	# Outer{ if (...) {
				2812	# Inner{...} if (...){ // Missing space before {
				2813	# }; }
				2814	#
				2815	# There is a false negative with this approach if people inserted
				2816	# spurious semicolons, e.g. "if (cond){};", but we will catch the
				2817	# spurious semicolon with a separate check.
				2818	(endline, endlinenum, endpos) = CloseExpression(
				2819	clean_lines, linenum, len(match.group(1)))
				2820	trailing_text = ''
				2821	if endpos > -1:
				2822	trailing_text = endline[endpos:]
				2823	for offset in xrange(endlinenum + 1,
				2824	min(endlinenum + 3, clean_lines.NumLines() - 1)):
				2825	trailing_text += clean_lines.elided[offset]
				2826	if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
				2827	error(filename, linenum, 'whitespace/braces', 5,
				2828	'Missing space before {')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2829
				2830	# Make sure '} else {' has spaces.
				2831	if Search(r'}else', line):
				2832	error(filename, linenum, 'whitespace/braces', 5,
				2833	'Missing space before else')
				2834
				2835	# You shouldn't have spaces before your brackets, except maybe after
				2836	# 'delete []' or 'new char * []'.
				2837	if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
				2838	error(filename, linenum, 'whitespace/braces', 5,
				2839	'Extra space before [')
				2840
				2841	# You shouldn't have a space before a semicolon at the end of the line.
				2842	# There's a special case for "for" since the style guide allows space before
				2843	# the semicolon there.
				2844	if Search(r':\s;\s$', line):
				2845	error(filename, linenum, 'whitespace/semicolon', 5,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2846	'Semicolon defining empty statement. Use {} instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2847	elif Search(r'^\s;\s$', line):
				2848	error(filename, linenum, 'whitespace/semicolon', 5,
				2849	'Line contains only semicolon. If this should be an empty statement, '
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2850	'use {} instead.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2851	elif (Search(r'\s+;\s*$', line) and
				2852	not Search(r'\bfor\b', line)):
				2853	error(filename, linenum, 'whitespace/semicolon', 5,
				2854	'Extra space before last semicolon. If this should be an empty '
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2855	'statement, use {} instead.')
				2856
				2857	# In range-based for, we wanted spaces before and after the colon, but
				2858	# not around "::" tokens that might appear.
				2859	if (Search('for \(.[^:]:[^: ]', line) or
				2860	Search('for \(.[^: ]:[^:]', line)):
				2861	error(filename, linenum, 'whitespace/forcolon', 2,
				2862	'Missing space around colon in range-based for loop')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2863
				2864
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2865	def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
				2866	"""Checks for additional blank line issues related to sections.
				2867
				2868	Currently the only thing checked here is blank line before protected/private.
				2869
				2870	Args:
				2871	filename: The name of the current file.
				2872	clean_lines: A CleansedLines instance containing the file.
				2873	class_info: A _ClassInfo objects.
				2874	linenum: The number of the line to check.
				2875	error: The function to call with any errors found.
				2876	"""
				2877	# Skip checks if the class is small, where small means 25 lines or less.
				2878	# 25 lines seems like a good cutoff since that's the usual height of
				2879	# terminals, and any class that can't fit in one screen can't really
				2880	# be considered "small".
				2881	#
				2882	# Also skip checks if we are on the first line. This accounts for
				2883	# classes that look like
				2884	# class Foo { public: ... };
				2885	#
				2886	# If we didn't find the end of the class, last_line would be zero,
				2887	# and the check will be skipped by the first condition.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2888	if (class_info.last_line - class_info.starting_linenum <= 24 or
				2889	linenum <= class_info.starting_linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2890	return
				2891
				2892	matched = Match(r'\s*(public\|protected\|private):', clean_lines.lines[linenum])
				2893	if matched:
				2894	# Issue warning if the line before public/protected/private was
				2895	# not a blank line, but don't do this if the previous line contains
				2896	# "class" or "struct". This can happen two ways:
				2897	# - We are at the beginning of the class.
				2898	# - We are forward-declaring an inner class that is semantically
				2899	# private, but needed to be public for implementation reasons.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2900	# Also ignores cases where the previous line ends with a backslash as can be
				2901	# common when defining classes in C macros.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2902	prev_line = clean_lines.lines[linenum - 1]
				2903	if (not IsBlankLine(prev_line) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2904	not Search(r'\b(class\|struct)\b', prev_line) and
				2905	not Search(r'\\$', prev_line)):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2906	# Try a bit harder to find the beginning of the class. This is to
				2907	# account for multi-line base-specifier lists, e.g.:
				2908	# class Derived
				2909	# : public Base {
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2910	end_class_head = class_info.starting_linenum
				2911	for i in range(class_info.starting_linenum, linenum):
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	2912	if Search(r'\{\s*$', clean_lines.lines[i]):
				2913	end_class_head = i
				2914	break
				2915	if end_class_head < linenum - 1:
				2916	error(filename, linenum, 'whitespace/blank_line', 3,
				2917	'"%s:" should be preceded by a blank line' % matched.group(1))
				2918
				2919
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2920	def GetPreviousNonBlankLine(clean_lines, linenum):
				2921	"""Return the most recent non-blank line and its line number.
				2922
				2923	Args:
				2924	clean_lines: A CleansedLines instance containing the file contents.
				2925	linenum: The number of the line to check.
				2926
				2927	Returns:
				2928	A tuple with two elements. The first element is the contents of the last
				2929	non-blank line before the current line, or the empty string if this is the
				2930	first non-blank line. The second is the line number of that line, or -1
				2931	if this is the first non-blank line.
				2932	"""
				2933
				2934	prevlinenum = linenum - 1
				2935	while prevlinenum >= 0:
				2936	prevline = clean_lines.elided[prevlinenum]
				2937	if not IsBlankLine(prevline): # if not a blank line...
				2938	return (prevline, prevlinenum)
				2939	prevlinenum -= 1
				2940	return ('', -1)
				2941
				2942
				2943	def CheckBraces(filename, clean_lines, linenum, error):
				2944	"""Looks for misplaced braces (e.g. at the end of line).
				2945
				2946	Args:
				2947	filename: The name of the current file.
				2948	clean_lines: A CleansedLines instance containing the file.
				2949	linenum: The number of the line to check.
				2950	error: The function to call with any errors found.
				2951	"""
				2952
				2953	line = clean_lines.elided[linenum] # get rid of comments and strings
				2954
				2955	if Match(r'\s{\s$', line):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2956	# We allow an open brace to start a line in the case where someone is using
				2957	# braces in a block to explicitly create a new scope, which is commonly used
				2958	# to control the lifetime of stack-allocated variables. Braces are also
				2959	# used for brace initializers inside function calls. We don't detect this
				2960	# perfectly: we just don't complain if the last non-whitespace character on
				2961	# the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	2962	# previous line starts a preprocessor block.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2963	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	2964	if (not Search(r'[,;:}{(]\s*$', prevline) and
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	2965	not Match(r'\s*#', prevline)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	2966	error(filename, linenum, 'whitespace/braces', 4,
				2967	'{ should almost always be at the end of the previous line')
				2968
				2969	# An else clause should be on the same line as the preceding closing brace.
				2970	if Match(r'\selse\s', line):
				2971	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				2972	if Match(r'\s}\s$', prevline):
				2973	error(filename, linenum, 'whitespace/newline', 4,
				2974	'An else should appear on the same line as the preceding }')
				2975
				2976	# If braces come on one side of an else, they should be on both.
				2977	# However, we have to worry about "else if" that spans multiple lines!
				2978	if Search(r'}\selse[^{]$', line) or Match(r'[^}]else\s{', line):
				2979	if Search(r'}\selse if([^{])$', line): # could be multi-line if
				2980	# find the ( after the if
				2981	pos = line.find('else if')
				2982	pos = line.find('(', pos)
				2983	if pos > 0:
				2984	(endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
				2985	if endline[endpos:].find('{') == -1: # must be brace after if
				2986	error(filename, linenum, 'readability/braces', 5,
				2987	'If an else has a brace on one side, it should have it on both')
				2988	else: # common case: else not followed by a multi-line if
				2989	error(filename, linenum, 'readability/braces', 5,
				2990	'If an else has a brace on one side, it should have it on both')
				2991
				2992	# Likewise, an else should never have the else clause on the same line
				2993	if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
				2994	error(filename, linenum, 'whitespace/newline', 4,
				2995	'Else clause should never be on same line as else (use 2 lines)')
				2996
				2997	# In the same way, a do/while should never be on one line
				2998	if Match(r'\s*do [^\s{]', line):
				2999	error(filename, linenum, 'whitespace/newline', 4,
				3000	'do/while clauses should not be on a single line')
				3001
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3002	# Block bodies should not be followed by a semicolon. Due to C++11
				3003	# brace initialization, there are more places where semicolons are
				3004	# required than not, so we use a whitelist approach to check these
				3005	# rather than a blacklist. These are the places where "};" should
				3006	# be replaced by just "}":
				3007	# 1. Some flavor of block following closing parenthesis:
				3008	# for (;;) {};
				3009	# while (...) {};
				3010	# switch (...) {};
				3011	# Function(...) {};
				3012	# if (...) {};
				3013	# if (...) else if (...) {};
				3014	#
				3015	# 2. else block:
				3016	# if (...) else {};
				3017	#
				3018	# 3. const member function:
				3019	# Function(...) const {};
				3020	#
				3021	# 4. Block following some statement:
				3022	# x = 42;
				3023	# {};
				3024	#
				3025	# 5. Block at the beginning of a function:
				3026	# Function(...) {
				3027	# {};
				3028	# }
				3029	#
				3030	# Note that naively checking for the preceding "{" will also match
				3031	# braces inside multi-dimensional arrays, but this is fine since
				3032	# that expression will not contain semicolons.
				3033	#
				3034	# 6. Block following another block:
				3035	# while (true) {}
				3036	# {};
				3037	#
				3038	# 7. End of namespaces:
				3039	# namespace {};
				3040	#
				3041	# These semicolons seems far more common than other kinds of
				3042	# redundant semicolons, possibly due to people converting classes
				3043	# to namespaces. For now we do not warn for this case.
				3044	#
				3045	# Try matching case 1 first.
				3046	match = Match(r'^(.\)\s)\{', line)
				3047	if match:
				3048	# Matched closing parenthesis (case 1). Check the token before the
				3049	# matching opening parenthesis, and don't warn if it looks like a
				3050	# macro. This avoids these false positives:
				3051	# - macro that defines a base class
				3052	# - multi-line macro that defines a base class
				3053	# - macro that defines the whole class-head
				3054	#
				3055	# But we still issue warnings for macros that we know are safe to
				3056	# warn, specifically:
				3057	# - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
				3058	# - TYPED_TEST
				3059	# - INTERFACE_DEF
				3060	# - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
				3061	#
				3062	# We implement a whitelist of safe macros instead of a blacklist of
				3063	# unsafe macros, even though the latter appears less frequently in
				3064	# google code and would have been easier to implement. This is because
				3065	# the downside for getting the whitelist wrong means some extra
				3066	# semicolons, while the downside for getting the blacklist wrong
				3067	# would result in compile errors.
				3068	#
				3069	# In addition to macros, we also don't want to warn on compound
				3070	# literals.
				3071	closing_brace_pos = match.group(1).rfind(')')
				3072	opening_parenthesis = ReverseCloseExpression(
				3073	clean_lines, linenum, closing_brace_pos)
				3074	if opening_parenthesis[2] > -1:
				3075	line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
				3076	macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
				3077	if ((macro and
				3078	macro.group(1) not in (
				3079	'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
				3080	'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
				3081	'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
				3082	Search(r'\s+=\s*$', line_prefix)):
				3083	match = None
				3084
				3085	else:
				3086	# Try matching cases 2-3.
				3087	match = Match(r'^(.(?:else\|\)\sconst)\s*)\{', line)
				3088	if not match:
				3089	# Try matching cases 4-6. These are always matched on separate lines.
				3090	#
				3091	# Note that we can't simply concatenate the previous line to the
				3092	# current line and do a single match, otherwise we may output
				3093	# duplicate warnings for the blank line case:
				3094	# if (cond) {
				3095	# // blank line
				3096	# }
				3097	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				3098	if prevline and Search(r'[;{}]\s*$', prevline):
				3099	match = Match(r'^(\s*)\{', line)
				3100
				3101	# Check matching closing brace
				3102	if match:
				3103	(endline, endlinenum, endpos) = CloseExpression(
				3104	clean_lines, linenum, len(match.group(1)))
				3105	if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
				3106	# Current {} pair is eligible for semicolon check, and we have found
				3107	# the redundant semicolon, output warning here.
				3108	#
				3109	# Note: because we are scanning forward for opening braces, and
				3110	# outputting warnings for the matching closing brace, if there are
				3111	# nested blocks with trailing semicolons, we will get the error
				3112	# messages in reversed order.
				3113	error(filename, endlinenum, 'readability/braces', 4,
				3114	"You don't need a ; after a }")
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3115
				3116
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3117	def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
				3118	"""Look for empty loop/conditional body with only a single semicolon.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3119
				3120	Args:
				3121	filename: The name of the current file.
				3122	clean_lines: A CleansedLines instance containing the file.
				3123	linenum: The number of the line to check.
				3124	error: The function to call with any errors found.
				3125	"""
				3126
				3127	# Search for loop keywords at the beginning of the line. Because only
				3128	# whitespaces are allowed before the keywords, this will also ignore most
				3129	# do-while-loops, since those lines should start with closing brace.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3130	#
				3131	# We also check "if" blocks here, since an empty conditional block
				3132	# is likely an error.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3133	line = clean_lines.elided[linenum]
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3134	matched = Match(r'\s(for\|while\|if)\s\(', line)
				3135	if matched:
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3136	# Find the end of the conditional expression
				3137	(end_line, end_linenum, end_pos) = CloseExpression(
				3138	clean_lines, linenum, line.find('('))
				3139
				3140	# Output warning if what follows the condition expression is a semicolon.
				3141	# No warning for all other cases, including whitespace or newline, since we
				3142	# have a separate check for semicolons preceded by whitespace.
				3143	if end_pos >= 0 and Match(r';', end_line[end_pos:]):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3144	if matched.group(1) == 'if':
				3145	error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
				3146	'Empty conditional bodies should use {}')
				3147	else:
				3148	error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
				3149	'Empty loop bodies should use {} or continue')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3150
				3151
				3152	def CheckCheck(filename, clean_lines, linenum, error):
				3153	"""Checks the use of CHECK and EXPECT macros.
				3154
				3155	Args:
				3156	filename: The name of the current file.
				3157	clean_lines: A CleansedLines instance containing the file.
				3158	linenum: The number of the line to check.
				3159	error: The function to call with any errors found.
				3160	"""
				3161
				3162	# Decide the set of replacement macros that should be suggested
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3163	lines = clean_lines.elided
				3164	check_macro = None
				3165	start_pos = -1
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3166	for macro in _CHECK_MACROS:
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3167	i = lines[linenum].find(macro)
				3168	if i >= 0:
				3169	check_macro = macro
				3170
				3171	# Find opening parenthesis. Do a regular expression match here
				3172	# to make sure that we are matching the expected CHECK macro, as
				3173	# opposed to some other macro that happens to contain the CHECK
				3174	# substring.
				3175	matched = Match(r'^(.\b' + check_macro + r'\s)\(', lines[linenum])
				3176	if not matched:
				3177	continue
				3178	start_pos = len(matched.group(1))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3179	break
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3180	if not check_macro or start_pos < 0:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3181	# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
				3182	return
				3183
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3184	# Find end of the boolean expression by matching parentheses
				3185	(last_line, end_line, end_pos) = CloseExpression(
				3186	clean_lines, linenum, start_pos)
				3187	if end_pos < 0:
				3188	return
				3189	if linenum == end_line:
				3190	expression = lines[linenum][start_pos + 1:end_pos - 1]
				3191	else:
				3192	expression = lines[linenum][start_pos + 1:]
				3193	for i in xrange(linenum + 1, end_line):
				3194	expression += lines[i]
				3195	expression += last_line[0:end_pos - 1]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3196
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3197	# Parse expression so that we can take parentheses into account.
				3198	# This avoids false positives for inputs like "CHECK((a < 4) == b)",
				3199	# which is not replaceable by CHECK_LE.
				3200	lhs = ''
				3201	rhs = ''
				3202	operator = None
				3203	while expression:
				3204	matched = Match(r'^\s(<<\|<<=\|>>\|>>=\|->\\|->\|&&\|\\|\\|\|'
				3205	r'==\|!=\|>=\|>\|<=\|<\|\()(.*)$', expression)
				3206	if matched:
				3207	token = matched.group(1)
				3208	if token == '(':
				3209	# Parenthesized operand
				3210	expression = matched.group(2)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3211	(end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3212	if end < 0:
				3213	return # Unmatched parenthesis
				3214	lhs += '(' + expression[0:end]
				3215	expression = expression[end:]
				3216	elif token in ('&&', '\|\|'):
				3217	# Logical and/or operators. This means the expression
				3218	# contains more than one term, for example:
				3219	# CHECK(42 < a && a < b);
				3220	#
				3221	# These are not replaceable with CHECK_LE, so bail out early.
				3222	return
				3223	elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
				3224	# Non-relational operator
				3225	lhs += token
				3226	expression = matched.group(2)
				3227	else:
				3228	# Relational operator
				3229	operator = token
				3230	rhs = matched.group(2)
				3231	break
				3232	else:
				3233	# Unparenthesized operand. Instead of appending to lhs one character
				3234	# at a time, we do another regular expression match to consume several
				3235	# characters at once if possible. Trivial benchmark shows that this
				3236	# is more efficient when the operands are longer than a single
				3237	# character, which is generally the case.
				3238	matched = Match(r'^([^-=!<>()&\|]+)(.*)$', expression)
				3239	if not matched:
				3240	matched = Match(r'^(\s\S)(.)$', expression)
				3241	if not matched:
				3242	break
				3243	lhs += matched.group(1)
				3244	expression = matched.group(2)
				3245
				3246	# Only apply checks if we got all parts of the boolean expression
				3247	if not (lhs and operator and rhs):
				3248	return
				3249
				3250	# Check that rhs do not contain logical operators. We already know
				3251	# that lhs is fine since the loop above parses out && and \|\|.
				3252	if rhs.find('&&') > -1 or rhs.find('\|\|') > -1:
				3253	return
				3254
				3255	# At least one of the operands must be a constant literal. This is
				3256	# to avoid suggesting replacements for unprintable things like
				3257	# CHECK(variable != iterator)
				3258	#
				3259	# The following pattern matches decimal, hex integers, strings, and
				3260	# characters (in that order).
				3261	lhs = lhs.strip()
				3262	rhs = rhs.strip()
				3263	match_constant = r'^([-+]?(\d+\|0[xX][0-9a-fA-F]+)[lLuU]{0,3}\|"."\|\'.\')$'
				3264	if Match(match_constant, lhs) or Match(match_constant, rhs):
				3265	# Note: since we know both lhs and rhs, we can provide a more
				3266	# descriptive error message like:
				3267	# Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
				3268	# Instead of:
				3269	# Consider using CHECK_EQ instead of CHECK(a == b)
				3270	#
				3271	# We are still keeping the less descriptive message because if lhs
				3272	# or rhs gets long, the error message might become unreadable.
				3273	error(filename, linenum, 'readability/check', 2,
				3274	'Consider using %s instead of %s(a %s b)' % (
				3275	_CHECK_REPLACEMENT[check_macro][operator],
				3276	check_macro, operator))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3277
				3278
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3279	def CheckAltTokens(filename, clean_lines, linenum, error):
				3280	"""Check alternative keywords being used in boolean expressions.
				3281
				3282	Args:
				3283	filename: The name of the current file.
				3284	clean_lines: A CleansedLines instance containing the file.
				3285	linenum: The number of the line to check.
				3286	error: The function to call with any errors found.
				3287	"""
				3288	line = clean_lines.elided[linenum]
				3289
				3290	# Avoid preprocessor lines
				3291	if Match(r'^\s*#', line):
				3292	return
				3293
				3294	# Last ditch effort to avoid multi-line comments. This will not help
				3295	# if the comment started before the current line or ended after the
				3296	# current line, but it catches most of the false positives. At least,
				3297	# it provides a way to workaround this warning for people who use
				3298	# multi-line comments in preprocessor macros.
				3299	#
				3300	# TODO(unknown): remove this once cpplint has better support for
				3301	# multi-line comments.
				3302	if line.find('/') >= 0 or line.find('/') >= 0:
				3303	return
				3304
				3305	for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
				3306	error(filename, linenum, 'readability/alt_tokens', 2,
				3307	'Use operator %s instead of %s' % (
				3308	_ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
				3309
				3310
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3311	def GetLineWidth(line):
				3312	"""Determines the width of the line in column positions.
				3313
				3314	Args:
				3315	line: A string, which may be a Unicode string.
				3316
				3317	Returns:
				3318	The width of the line in column positions, accounting for Unicode
				3319	combining characters and wide characters.
				3320	"""
				3321	if isinstance(line, unicode):
				3322	width = 0
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3323	for uc in unicodedata.normalize('NFC', line):
				3324	if unicodedata.east_asian_width(uc) in ('W', 'F'):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3325	width += 2
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3326	elif not unicodedata.combining(uc):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3327	width += 1
				3328	return width
				3329	else:
				3330	return len(line)
				3331
				3332
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3333	def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3334	error):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3335	"""Checks rules from the 'C++ style rules' section of cppguide.html.
				3336
				3337	Most of these rules are hard to test (naming, comment style), but we
				3338	do what we can. In particular we check for 2-space indents, line lengths,
				3339	tab usage, spaces inside code, etc.
				3340
				3341	Args:
				3342	filename: The name of the current file.
				3343	clean_lines: A CleansedLines instance containing the file.
				3344	linenum: The number of the line to check.
				3345	file_extension: The extension (without the dot) of the filename.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3346	nesting_state: A _NestingState instance which maintains information about
				3347	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3348	error: The function to call with any errors found.
				3349	"""
				3350
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3351	# Don't use "elided" lines here, otherwise we can't check commented lines.
				3352	# Don't want to use "raw" either, because we don't want to check inside C++11
				3353	# raw strings,
				3354	raw_lines = clean_lines.lines_without_raw_strings
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3355	line = raw_lines[linenum]
				3356
				3357	if line.find('\t') != -1:
				3358	error(filename, linenum, 'whitespace/tab', 1,
				3359	'Tab found; better to use spaces')
				3360
				3361	# One or three blank spaces at the beginning of the line is weird; it's
				3362	# hard to reconcile that with 2-space indents.
				3363	# NOTE: here are the conditions rob pike used for his tests. Mine aren't
				3364	# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
				3365	# if(RLENGTH > 20) complain = 0;
				3366	# if(match($0, " +(error\|private\|public\|protected):")) complain = 0;
				3367	# if(match(prev, "&& *$")) complain = 0;
				3368	# if(match(prev, "\\\|\\\| *$")) complain = 0;
				3369	# if(match(prev, "[\",=><] *$")) complain = 0;
				3370	# if(match($0, " <<")) complain = 0;
				3371	# if(match(prev, " +for \\(")) complain = 0;
				3372	# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
				3373	initial_spaces = 0
				3374	cleansed_line = clean_lines.elided[linenum]
				3375	while initial_spaces < len(line) and line[initial_spaces] == ' ':
				3376	initial_spaces += 1
				3377	if line and line[-1].isspace():
				3378	error(filename, linenum, 'whitespace/end_of_line', 4,
				3379	'Line ends in whitespace. Consider deleting these extra spaces.')
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3380	# There are certain situations we allow one space, notably for section labels
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3381	elif ((initial_spaces == 1 or initial_spaces == 3) and
				3382	not Match(r'\s\w+\s:\s*$', cleansed_line)):
				3383	error(filename, linenum, 'whitespace/indent', 3,
				3384	'Weird number of spaces at line-start. '
				3385	'Are you using a 2-space indent?')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3386
				3387	# Check if the line is a header guard.
				3388	is_header_guard = False
				3389	if file_extension == 'h':
				3390	cppvar = GetHeaderGuardCPPVariable(filename)
				3391	if (line.startswith('#ifndef %s' % cppvar) or
				3392	line.startswith('#define %s' % cppvar) or
				3393	line.startswith('#endif // %s' % cppvar)):
				3394	is_header_guard = True
				3395	# #include lines and header guards can be long, since there's no clean way to
				3396	# split them.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3397	#
				3398	# URLs can be long too. It's possible to split these, but it makes them
				3399	# harder to cut&paste.
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3400	#
				3401	# The "$Id:...$" comment may also get very long without it being the
				3402	# developers fault.
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3403	if (not line.startswith('#include') and not is_header_guard and
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3404	not Match(r'^\s//.http(s?)://\S*$', line) and
				3405	not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3406	line_width = GetLineWidth(line)
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame^]	3407	extended_length = int((_line_length * 1.25))
				3408	if line_width > extended_length:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3409	error(filename, linenum, 'whitespace/line_length', 4,
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame^]	3410	'Lines should very rarely be longer than %i characters' %
				3411	extended_length)
				3412	elif line_width > _line_length:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3413	error(filename, linenum, 'whitespace/line_length', 2,
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame^]	3414	'Lines should be <= %i characters long' % _line_length)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3415
				3416	if (cleansed_line.count(';') > 1 and
				3417	# for loops are allowed two ;'s (and may run over two lines).
				3418	cleansed_line.find('for') == -1 and
				3419	(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
				3420	GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
				3421	# It's ok to have many commands in a switch case that fits in 1 line
				3422	not ((cleansed_line.find('case ') != -1 or
				3423	cleansed_line.find('default:') != -1) and
				3424	cleansed_line.find('break;') != -1)):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3425	error(filename, linenum, 'whitespace/newline', 0,
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3426	'More than one command on the same line')
				3427
				3428	# Some more style checks
				3429	CheckBraces(filename, clean_lines, linenum, error)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3430	CheckEmptyBlockBody(filename, clean_lines, linenum, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3431	CheckAccess(filename, clean_lines, linenum, nesting_state, error)
				3432	CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3433	CheckCheck(filename, clean_lines, linenum, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3434	CheckAltTokens(filename, clean_lines, linenum, error)
				3435	classinfo = nesting_state.InnermostClass()
				3436	if classinfo:
				3437	CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3438
				3439
				3440	_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
				3441	_RE_PATTERN_INCLUDE = re.compile(r'^\s#\sinclude\s([<"])([^>"])[>"].*$')
				3442	# Matches the first component of a filename delimited by -s and _s. That is:
				3443	# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
				3444	# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
				3445	# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
				3446	# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
				3447	_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
				3448
				3449
				3450	def _DropCommonSuffixes(filename):
				3451	"""Drops common suffixes like _test.cc or -inl.h from filename.
				3452
				3453	For example:
				3454	>>> _DropCommonSuffixes('foo/foo-inl.h')
				3455	'foo/foo'
				3456	>>> _DropCommonSuffixes('foo/bar/foo.cc')
				3457	'foo/bar/foo'
				3458	>>> _DropCommonSuffixes('foo/foo_internal.h')
				3459	'foo/foo'
				3460	>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
				3461	'foo/foo_unusualinternal'
				3462
				3463	Args:
				3464	filename: The input filename.
				3465
				3466	Returns:
				3467	The filename with the common suffix removed.
				3468	"""
				3469	for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
				3470	'inl.h', 'impl.h', 'internal.h'):
				3471	if (filename.endswith(suffix) and len(filename) > len(suffix) and
				3472	filename[-len(suffix) - 1] in ('-', '_')):
				3473	return filename[:-len(suffix) - 1]
				3474	return os.path.splitext(filename)[0]
				3475
				3476
				3477	def _IsTestFilename(filename):
				3478	"""Determines if the given filename has a suffix that identifies it as a test.
				3479
				3480	Args:
				3481	filename: The input filename.
				3482
				3483	Returns:
				3484	True if 'filename' looks like a test, False otherwise.
				3485	"""
				3486	if (filename.endswith('_test.cc') or
				3487	filename.endswith('_unittest.cc') or
				3488	filename.endswith('_regtest.cc')):
				3489	return True
				3490	else:
				3491	return False
				3492
				3493
				3494	def _ClassifyInclude(fileinfo, include, is_system):
				3495	"""Figures out what kind of header 'include' is.
				3496
				3497	Args:
				3498	fileinfo: The current file cpplint is running over. A FileInfo instance.
				3499	include: The path to a #included file.
				3500	is_system: True if the #include used <> rather than "".
				3501
				3502	Returns:
				3503	One of the _XXX_HEADER constants.
				3504
				3505	For example:
				3506	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
				3507	_C_SYS_HEADER
				3508	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
				3509	_CPP_SYS_HEADER
				3510	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
				3511	_LIKELY_MY_HEADER
				3512	>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
				3513	... 'bar/foo_other_ext.h', False)
				3514	_POSSIBLE_MY_HEADER
				3515	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
				3516	_OTHER_HEADER
				3517	"""
				3518	# This is a list of all standard c++ header files, except
				3519	# those already checked for above.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3520	is_cpp_h = include in _CPP_HEADERS
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3521
				3522	if is_system:
				3523	if is_cpp_h:
				3524	return _CPP_SYS_HEADER
				3525	else:
				3526	return _C_SYS_HEADER
				3527
				3528	# If the target file and the include we're checking share a
				3529	# basename when we drop common extensions, and the include
				3530	# lives in . , then it's likely to be owned by the target file.
				3531	target_dir, target_base = (
				3532	os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
				3533	include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
				3534	if target_base == include_base and (
				3535	include_dir == target_dir or
				3536	include_dir == os.path.normpath(target_dir + '/../public')):
				3537	return _LIKELY_MY_HEADER
				3538
				3539	# If the target and include share some initial basename
				3540	# component, it's possible the target is implementing the
				3541	# include, so it's allowed to be first, but we'll never
				3542	# complain if it's not there.
				3543	target_first_component = _RE_FIRST_COMPONENT.match(target_base)
				3544	include_first_component = _RE_FIRST_COMPONENT.match(include_base)
				3545	if (target_first_component and include_first_component and
				3546	target_first_component.group(0) ==
				3547	include_first_component.group(0)):
				3548	return _POSSIBLE_MY_HEADER
				3549
				3550	return _OTHER_HEADER
				3551
				3552
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	3553
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3554	def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
				3555	"""Check rules that are applicable to #include lines.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3556
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3557	Strings on #include lines are NOT removed from elided line, to make
				3558	certain tasks easier. However, to prevent false positives, checks
				3559	applicable to #include lines in CheckLanguage must be put here.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3560
				3561	Args:
				3562	filename: The name of the current file.
				3563	clean_lines: A CleansedLines instance containing the file.
				3564	linenum: The number of the line to check.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3565	include_state: An _IncludeState instance in which the headers are inserted.
				3566	error: The function to call with any errors found.
				3567	"""
				3568	fileinfo = FileInfo(filename)
				3569
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3570	line = clean_lines.lines[linenum]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3571
				3572	# "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3573	if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3574	error(filename, linenum, 'build/include', 4,
				3575	'Include the directory when naming .h files')
				3576
				3577	# we shouldn't include a file more than once. actually, there are a
				3578	# handful of instances where doing so is okay, but in general it's
				3579	# not.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3580	match = _RE_PATTERN_INCLUDE.search(line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3581	if match:
				3582	include = match.group(2)
				3583	is_system = (match.group(1) == '<')
				3584	if include in include_state:
				3585	error(filename, linenum, 'build/include', 4,
				3586	'"%s" already included at %s:%s' %
				3587	(include, filename, include_state[include]))
				3588	else:
				3589	include_state[include] = linenum
				3590
				3591	# We want to ensure that headers appear in the right order:
				3592	# 1) for foo.cc, foo.h (preferred location)
				3593	# 2) c system files
				3594	# 3) cpp system files
				3595	# 4) for foo.cc, foo.h (deprecated location)
				3596	# 5) other google headers
				3597	#
				3598	# We classify each include statement as one of those 5 types
				3599	# using a number of techniques. The include_state object keeps
				3600	# track of the highest type seen, and complains if we see a
				3601	# lower type after that.
				3602	error_message = include_state.CheckNextIncludeOrder(
				3603	_ClassifyInclude(fileinfo, include, is_system))
				3604	if error_message:
				3605	error(filename, linenum, 'build/include_order', 4,
				3606	'%s. Should be: %s.h, c system, c++ system, other.' %
				3607	(error_message, fileinfo.BaseName()))
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3608	canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
				3609	if not include_state.IsInAlphabeticalOrder(
				3610	clean_lines, linenum, canonical_include):
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3611	error(filename, linenum, 'build/include_alpha', 4,
				3612	'Include "%s" not in alphabetical order' % include)
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3613	include_state.SetLastHeader(canonical_include)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3614
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3615	# Look for any of the stream classes that are part of standard C++.
				3616	match = _RE_PATTERN_INCLUDE.match(line)
				3617	if match:
				3618	include = match.group(2)
				3619	if Match(r'(f\|ind\|io\|i\|o\|parse\|pf\|stdio\|str\|)?stream$', include):
				3620	# Many unit tests use cout, so we exempt them.
				3621	if not _IsTestFilename(filename):
				3622	error(filename, linenum, 'readability/streams', 3,
				3623	'Streams are highly discouraged.')
				3624
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3625
				3626	def _GetTextInside(text, start_pattern):
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3627	r"""Retrieves all the text between matching open and close parentheses.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3628
				3629	Given a string of lines and a regular expression string, retrieve all the text
				3630	following the expression and between opening punctuation symbols like
				3631	(, [, or {, and the matching close-punctuation symbol. This properly nested
				3632	occurrences of the punctuations, so for the text like
				3633	printf(a(), b(c()));
				3634	a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
				3635	start_pattern must match string having an open punctuation symbol at the end.
				3636
				3637	Args:
				3638	text: The lines to extract text. Its comments and strings must be elided.
				3639	It can be single line and can span multiple lines.
				3640	start_pattern: The regexp string indicating where to start extracting
				3641	the text.
				3642	Returns:
				3643	The extracted text.
				3644	None if either the opening string or ending punctuation could not be found.
				3645	"""
				3646	# TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
				3647	# rewritten to use _GetTextInside (and use inferior regexp matching today).
				3648
				3649	# Give opening punctuations to get the matching close-punctuations.
				3650	matching_punctuation = {'(': ')', '{': '}', '[': ']'}
				3651	closing_punctuation = set(matching_punctuation.itervalues())
				3652
				3653	# Find the position to start extracting text.
				3654	match = re.search(start_pattern, text, re.M)
				3655	if not match: # start_pattern not found in text.
				3656	return None
				3657	start_position = match.end(0)
				3658
				3659	assert start_position > 0, (
				3660	'start_pattern must ends with an opening punctuation.')
				3661	assert text[start_position - 1] in matching_punctuation, (
				3662	'start_pattern must ends with an opening punctuation.')
				3663	# Stack of closing punctuations we expect to have in text after position.
				3664	punctuation_stack = [matching_punctuation[text[start_position - 1]]]
				3665	position = start_position
				3666	while punctuation_stack and position < len(text):
				3667	if text[position] == punctuation_stack[-1]:
				3668	punctuation_stack.pop()
				3669	elif text[position] in closing_punctuation:
				3670	# A closing punctuation without matching opening punctuations.
				3671	return None
				3672	elif text[position] in matching_punctuation:
				3673	punctuation_stack.append(matching_punctuation[text[position]])
				3674	position += 1
				3675	if punctuation_stack:
				3676	# Opening punctuations left without matching close-punctuations.
				3677	return None
				3678	# punctuations match.
				3679	return text[start_position:position - 1]
				3680
				3681
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3682	# Patterns for matching call-by-reference parameters.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3683	#
				3684	# Supports nested templates up to 2 levels deep using this messy pattern:
				3685	# < (?: < (?: < [^<>]*
				3686	# >
				3687	# \| [^<>] )*
				3688	# >
				3689	# \| [^<>] )*
				3690	# >
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3691	_RE_PATTERN_IDENT = r'[_a-zA-Z]\w' # =~ [[:alpha:]][[:alnum:]]
				3692	_RE_PATTERN_TYPE = (
				3693	r'(?:const\s+)?(?:typename\s+\|class\s+\|struct\s+\|union\s+\|enum\s+)?'
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3694	r'(?:\w\|'
				3695	r'\s<(?:<(?:<[^<>]>\|[^<>])>\|[^<>])>\|'
				3696	r'::)+')
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3697	# A call-by-reference parameter ends with '& identifier'.
				3698	_RE_PATTERN_REF_PARAM = re.compile(
				3699	r'(' + _RE_PATTERN_TYPE + r'(?:\s(?:\bconst\b\|[]))\s'
				3700	r'&\s' + _RE_PATTERN_IDENT + r')\s(?:=[^,()]+)?[,)]')
				3701	# A call-by-const-reference parameter either ends with 'const& identifier'
				3702	# or looks like 'const type& identifier' when 'type' is atomic.
				3703	_RE_PATTERN_CONST_REF_PARAM = (
				3704	r'(?:.\s\bconst\s&\s' + _RE_PATTERN_IDENT +
				3705	r'\|const\s+' + _RE_PATTERN_TYPE + r'\s&\s' + _RE_PATTERN_IDENT + r')')
				3706
				3707
				3708	def CheckLanguage(filename, clean_lines, linenum, file_extension,
				3709	include_state, nesting_state, error):
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3710	"""Checks rules from the 'C++ language rules' section of cppguide.html.
				3711
				3712	Some of these rules are hard to test (function overloading, using
				3713	uint32 inappropriately), but we do the best we can.
				3714
				3715	Args:
				3716	filename: The name of the current file.
				3717	clean_lines: A CleansedLines instance containing the file.
				3718	linenum: The number of the line to check.
				3719	file_extension: The extension (without the dot) of the filename.
				3720	include_state: An _IncludeState instance in which the headers are inserted.
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	3721	nesting_state: A _NestingState instance which maintains information about
				3722	the current stack of nested blocks being parsed.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3723	error: The function to call with any errors found.
				3724	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3725	# If the line is empty or consists of entirely a comment, no need to
				3726	# check it.
				3727	line = clean_lines.elided[linenum]
				3728	if not line:
				3729	return
				3730
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	3731	match = _RE_PATTERN_INCLUDE.search(line)
				3732	if match:
				3733	CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
				3734	return
				3735
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3736	# Reset include state across preprocessor directives. This is meant
				3737	# to silence warnings for conditional includes.
				3738	if Match(r'^\s#\s(?:ifdef\|elif\|else\|endif)\b', line):
				3739	include_state.ResetSection()
				3740
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3741	# Make Windows paths like Unix.
				3742	fullname = os.path.abspath(filename).replace('\\', '/')
				3743
				3744	# TODO(unknown): figure out if they're using default arguments in fn proto.
				3745
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3746	# Check to see if they're using an conversion function cast.
				3747	# I just try to capture the most common basic types, though there are more.
				3748	# Parameterless conversion functions, such as bool(), are allowed as they are
				3749	# probably a member operator declaration or default constructor.
				3750	match = Search(
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3751	r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3752	r'(int\|float\|double\|bool\|char\|int32\|uint32\|int64\|uint64)'
				3753	r'(\([^)].*)', line)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3754	if match:
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3755	matched_new = match.group(1)
				3756	matched_type = match.group(2)
				3757	matched_funcptr = match.group(3)
				3758
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3759	# gMock methods are defined using some variant of MOCK_METHODx(name, type)
				3760	# where type may be float(), int(string), etc. Without context they are
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3761	# virtually indistinguishable from int(x) casts. Likewise, gMock's
				3762	# MockCallback takes a template parameter of the form return_type(arg_type),
				3763	# which looks much like the cast we're trying to detect.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3764	#
				3765	# std::function<> wrapper has a similar problem.
				3766	#
				3767	# Return types for function pointers also look like casts if they
				3768	# don't have an extra space.
				3769	if (matched_new is None and # If new operator, then this isn't a cast
erg@google.com	d7d2747	2011-09-07 17:36:35 +0000	[diff] [blame]	3770	not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3771	Search(r'\bMockCallback<.*>', line) or
				3772	Search(r'\bstd::function<.*>', line)) and
				3773	not (matched_funcptr and
				3774	Match(r'$(?:[^() ]+::\s\\s)?[^() ]+$\s\(',
				3775	matched_funcptr))):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3776	# Try a bit harder to catch gmock lines: the only place where
				3777	# something looks like an old-style cast is where we declare the
				3778	# return type of the mocked method, and the only time when we
				3779	# are missing context is if MOCK_METHOD was split across
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3780	# multiple lines. The missing MOCK_METHOD is usually one or two
				3781	# lines back, so scan back one or two lines.
				3782	#
				3783	# It's not possible for gmock macros to appear in the first 2
				3784	# lines, since the class head + section name takes up 2 lines.
				3785	if (linenum < 2 or
				3786	not (Match(r'^\sMOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s$',
				3787	clean_lines.elided[linenum - 1]) or
				3788	Match(r'^\sMOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s$',
				3789	clean_lines.elided[linenum - 2]))):
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3790	error(filename, linenum, 'readability/casting', 4,
				3791	'Using deprecated casting style. '
				3792	'Use static_cast<%s>(...) instead' %
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3793	matched_type)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3794
				3795	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3796	'static_cast',
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3797	r'$(int\|float\|double\|bool\|char\|u?int(16\|32\|64))$', error)
				3798
				3799	# This doesn't catch all cases. Consider (const char * const)"hello".
				3800	#
				3801	# (char *) "foo" should always be a const_cast (reinterpret_cast won't
				3802	# compile).
				3803	if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3804	'const_cast', r'$(char\s?\+\s?)$\s"', error):
				3805	pass
				3806	else:
				3807	# Check pointer casts for other than string constants
				3808	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				3809	'reinterpret_cast', r'$(\w+\s?\*+\s?)$', error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3810
				3811	# In addition, we look for people taking the address of a cast. This
				3812	# is dangerous -- casts can assign to temporaries, so the pointer doesn't
				3813	# point where you think.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3814	match = Search(
				3815	r'(?:&$([^)]+)$[\w(])\|'
				3816	r'(?:&(static\|dynamic\|down\|reinterpret)_cast\b)', line)
				3817	if match and match.group(1) != '*':
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3818	error(filename, linenum, 'runtime/casting', 4,
				3819	('Are you taking an address of a cast? '
				3820	'This is dangerous: could be a temp var. '
				3821	'Take the address before doing the cast, rather than after'))
				3822
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	3823	# Create an extended_line, which is the concatenation of the current and
				3824	# next lines, for more effective checking of code that may span more than one
				3825	# line.
				3826	if linenum + 1 < clean_lines.NumLines():
				3827	extended_line = line + clean_lines.elided[linenum + 1]
				3828	else:
				3829	extended_line = line
				3830
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3831	# Check for people declaring static/global STL strings at the top level.
				3832	# This is dangerous because the C++ language does not guarantee that
				3833	# globals with constructors are initialized before the first access.
				3834	match = Match(
				3835	r'((?:\|static +)(?:\|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
				3836	line)
				3837	# Make sure it's not a function.
				3838	# Function template specialization looks like: "string foo<Type>(...".
				3839	# Class template definitions look like: "string Foo<Type>::Method(...".
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	3840	#
				3841	# Also ignore things that look like operators. These are matched separately
				3842	# because operator names cross non-word boundaries. If we change the pattern
				3843	# above, we would decrease the accuracy of matching identifiers.
				3844	if (match and
				3845	not Search(r'\boperator\W', line) and
				3846	not Match(r'\s(<.>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]\|$)', match.group(3))):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3847	error(filename, linenum, 'runtime/string', 4,
				3848	'For a static/global string constant, use a C style string instead: '
				3849	'"%schar %s[]".' %
				3850	(match.group(1), match.group(2)))
				3851
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3852	if Search(r'\b([A-Za-z0-9_]*_)$\1$', line):
				3853	error(filename, linenum, 'runtime/init', 4,
				3854	'You seem to be initializing a member variable with itself.')
				3855
				3856	if file_extension == 'h':
				3857	# TODO(unknown): check that 1-arg constructors are explicit.
				3858	# How to tell it's a constructor?
				3859	# (handled in CheckForNonStandardConstructs for now)
				3860	# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
				3861	# (level 1 error)
				3862	pass
				3863
				3864	# Check if people are using the verboten C basic types. The only exception
				3865	# we regularly allow is "unsigned short port" for port.
				3866	if Search(r'\bshort port\b', line):
				3867	if not Search(r'\bunsigned short port\b', line):
				3868	error(filename, linenum, 'runtime/int', 4,
				3869	'Use "unsigned short" for ports, not "short"')
				3870	else:
				3871	match = Search(r'\b(short\|long(?! +double)\|long long)\b', line)
				3872	if match:
				3873	error(filename, linenum, 'runtime/int', 4,
				3874	'Use int16/int64/etc, rather than the C type %s' % match.group(1))
				3875
				3876	# When snprintf is used, the second argument shouldn't be a literal.
				3877	match = Search(r'snprintf\s\(([^,]),\s([0-9])\s*,', line)
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	3878	if match and match.group(2) != '0':
				3879	# If 2nd arg is zero, snprintf is used to calculate size.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3880	error(filename, linenum, 'runtime/printf', 3,
				3881	'If you can, use sizeof(%s) instead of %s as the 2nd arg '
				3882	'to snprintf.' % (match.group(1), match.group(2)))
				3883
				3884	# Check if some verboten C functions are being used.
				3885	if Search(r'\bsprintf\b', line):
				3886	error(filename, linenum, 'runtime/printf', 5,
				3887	'Never use sprintf. Use snprintf instead.')
				3888	match = Search(r'\b(strcpy\|strcat)\b', line)
				3889	if match:
				3890	error(filename, linenum, 'runtime/printf', 4,
				3891	'Almost always, snprintf is better than %s' % match.group(1))
				3892
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	3893	# Check if some verboten operator overloading is going on
				3894	# TODO(unknown): catch out-of-line unary operator&:
				3895	# class X {};
				3896	# int operator&(const X& x) { return 42; } // unary operator&
				3897	# The trick is it's hard to tell apart from binary operator&:
				3898	# class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
				3899	if Search(r'\boperator\s&\s$\s*$', line):
				3900	error(filename, linenum, 'runtime/operator', 4,
				3901	'Unary operator& is dangerous. Do not use it.')
				3902
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3903	# Check for suspicious usage of "if" like
				3904	# } if (a == b) {
				3905	if Search(r'\}\sif\s\(', line):
				3906	error(filename, linenum, 'readability/braces', 4,
				3907	'Did you mean "else if"? If not, start a new line for "if".')
				3908
				3909	# Check for potential format string bugs like printf(foo).
				3910	# We constrain the pattern not to pick things like DocidForPrintf(foo).
				3911	# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3912	# TODO(sugawarayu): Catch the following case. Need to change the calling
				3913	# convention of the whole function to process multiple line to handle it.
				3914	# printf(
				3915	# boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
				3916	printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
				3917	if printf_args:
				3918	match = Match(r'([\w.\->()]+)$', printf_args)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	3919	if match and match.group(1) != '__VA_ARGS__':
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3920	function_name = re.search(r'\b((?:string)?printf)\s*\(',
				3921	line, re.I).group(1)
				3922	error(filename, linenum, 'runtime/printf', 4,
				3923	'Potential format string bug. Do %s("%%s", %s) instead.'
				3924	% (function_name, match.group(1)))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3925
				3926	# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
				3927	match = Search(r'memset\s$([^,]),\s([^,]),\s0\s$', line)
				3928	if match and not Match(r"^''\|-?[0-9]+\|0x[0-9A-Fa-f]$", match.group(2)):
				3929	error(filename, linenum, 'runtime/memset', 4,
				3930	'Did you mean "memset(%s, 0, %s)"?'
				3931	% (match.group(1), match.group(2)))
				3932
				3933	if Search(r'\busing namespace\b', line):
				3934	error(filename, linenum, 'build/namespaces', 5,
				3935	'Do not use namespace using-directives. '
				3936	'Use using-declarations instead.')
				3937
				3938	# Detect variable-length arrays.
				3939	match = Match(r'\s(.+::)?(\w+) [a-z]\w\[(.+)];', line)
				3940	if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
				3941	match.group(3).find(']') == -1):
				3942	# Split the size using space and arithmetic operators as delimiters.
				3943	# If any of the resulting tokens are not compile time constants then
				3944	# report the error.
				3945	tokens = re.split(r'\s\|\+\|\-\|\*\|\/\|<<\|>>]', match.group(3))
				3946	is_const = True
				3947	skip_next = False
				3948	for tok in tokens:
				3949	if skip_next:
				3950	skip_next = False
				3951	continue
				3952
				3953	if Search(r'sizeof$.+$', tok): continue
				3954	if Search(r'arraysize$\w+$', tok): continue
				3955
				3956	tok = tok.lstrip('(')
				3957	tok = tok.rstrip(')')
				3958	if not tok: continue
				3959	if Match(r'\d+', tok): continue
				3960	if Match(r'0[xX][0-9a-fA-F]+', tok): continue
				3961	if Match(r'k[A-Z0-9]\w*', tok): continue
				3962	if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
				3963	if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
				3964	# A catch all for tricky sizeof cases, including 'sizeof expression',
				3965	# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3966	# requires skipping the next token because we split on ' ' and '*'.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3967	if tok.startswith('sizeof'):
				3968	skip_next = True
				3969	continue
				3970	is_const = False
				3971	break
				3972	if not is_const:
				3973	error(filename, linenum, 'runtime/arrays', 1,
				3974	'Do not use variable-length arrays. Use an appropriately named '
				3975	"('k' followed by CamelCase) compile-time constant for the size.")
				3976
				3977	# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
				3978	# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
				3979	# in the class declaration.
				3980	match = Match(
				3981	(r'\s*'
				3982	r'(DISALLOW_(EVIL_CONSTRUCTORS\|COPY_AND_ASSIGN\|IMPLICIT_CONSTRUCTORS))'
				3983	r'$.*$;$'),
				3984	line)
				3985	if match and linenum + 1 < clean_lines.NumLines():
				3986	next_line = clean_lines.elided[linenum + 1]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	3987	# We allow some, but not all, declarations of variables to be present
				3988	# in the statement that defines the class. The [\w\,\s] fragment of
				3989	# the regular expression below allows users to declare instances of
				3990	# the class or pointers to instances, but not less common types such
				3991	# as function pointers or arrays. It's a tradeoff between allowing
				3992	# reasonable code and avoiding trying to parse more C++ using regexps.
				3993	if not Search(r'^\s}[\w\,\s]*;', next_line):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	3994	error(filename, linenum, 'readability/constructors', 3,
				3995	match.group(1) + ' should be the last thing in the class')
				3996
				3997	# Check for use of unnamed namespaces in header files. Registration
				3998	# macros are typically OK, so we allow use of "namespace {" on lines
				3999	# that end with backslashes.
				4000	if (file_extension == 'h'
				4001	and Search(r'\bnamespace\s*{', line)
				4002	and line[-1] != '\\'):
				4003	error(filename, linenum, 'build/namespaces', 4,
				4004	'Do not use unnamed namespaces in header files. See '
				4005	'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
				4006	' for more information.')
				4007
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4008	def CheckForNonConstReference(filename, clean_lines, linenum,
				4009	nesting_state, error):
				4010	"""Check for non-const references.
				4011
				4012	Separate from CheckLanguage since it scans backwards from current
				4013	line, instead of scanning forward.
				4014
				4015	Args:
				4016	filename: The name of the current file.
				4017	clean_lines: A CleansedLines instance containing the file.
				4018	linenum: The number of the line to check.
				4019	nesting_state: A _NestingState instance which maintains information about
				4020	the current stack of nested blocks being parsed.
				4021	error: The function to call with any errors found.
				4022	"""
				4023	# Do nothing if there is no '&' on current line.
				4024	line = clean_lines.elided[linenum]
				4025	if '&' not in line:
				4026	return
				4027
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4028	# Long type names may be broken across multiple lines, usually in one
				4029	# of these forms:
				4030	# LongType
				4031	# ::LongTypeContinued &identifier
				4032	# LongType::
				4033	# LongTypeContinued &identifier
				4034	# LongType<
				4035	# ...>::LongTypeContinued &identifier
				4036	#
				4037	# If we detected a type split across two lines, join the previous
				4038	# line to current line so that we can match const references
				4039	# accordingly.
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4040	#
				4041	# Note that this only scans back one line, since scanning back
				4042	# arbitrary number of lines would be expensive. If you have a type
				4043	# that spans more than 2 lines, please use a typedef.
				4044	if linenum > 1:
				4045	previous = None
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4046	if Match(r'\s::(?:[\w<>]\|::)+\s&\s*\S', line):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4047	# previous_line\n + ::current_line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4048	previous = Search(r'\b((?:const\s)?(?:[\w<>]\|::)+[\w<>])\s$',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4049	clean_lines.elided[linenum - 1])
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4050	elif Match(r'\s[a-zA-Z_]([\w<>]\|::)+\s&\s*\S', line):
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4051	# previous_line::\n + current_line
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4052	previous = Search(r'\b((?:const\s)?(?:[\w<>]\|::)+::)\s$',
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4053	clean_lines.elided[linenum - 1])
				4054	if previous:
				4055	line = previous.group(1) + line.lstrip()
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4056	else:
				4057	# Check for templated parameter that is split across multiple lines
				4058	endpos = line.rfind('>')
				4059	if endpos > -1:
				4060	(_, startline, startpos) = ReverseCloseExpression(
				4061	clean_lines, linenum, endpos)
				4062	if startpos > -1 and startline < linenum:
				4063	# Found the matching < on an earlier line, collect all
				4064	# pieces up to current line.
				4065	line = ''
				4066	for i in xrange(startline, linenum + 1):
				4067	line += clean_lines.elided[i].strip()
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4068
				4069	# Check for non-const references in function parameters. A single '&' may
				4070	# found in the following places:
				4071	# inside expression: binary & for bitwise AND
				4072	# inside expression: unary & for taking the address of something
				4073	# inside declarators: reference parameter
				4074	# We will exclude the first two cases by checking that we are not inside a
				4075	# function body, including one that was just introduced by a trailing '{'.
				4076	# TODO(unknwon): Doesn't account for preprocessor directives.
				4077	# TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
				4078	check_params = False
				4079	if not nesting_state.stack:
				4080	check_params = True # top level
				4081	elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
				4082	isinstance(nesting_state.stack[-1], _NamespaceInfo)):
				4083	check_params = True # within class or namespace
				4084	elif Match(r'.{\s$', line):
				4085	if (len(nesting_state.stack) == 1 or
				4086	isinstance(nesting_state.stack[-2], _ClassInfo) or
				4087	isinstance(nesting_state.stack[-2], _NamespaceInfo)):
				4088	check_params = True # just opened global/class/namespace block
				4089	# We allow non-const references in a few standard places, like functions
				4090	# called "swap()" or iostream operators like "<<" or ">>". Do not check
				4091	# those function parameters.
				4092	#
				4093	# We also accept & in static_assert, which looks like a function but
				4094	# it's actually a declaration expression.
				4095	whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?\|'
				4096	r'operator\s*[<>][<>]\|'
				4097	r'static_assert\|COMPILE_ASSERT'
				4098	r')\s*\(')
				4099	if Search(whitelisted_functions, line):
				4100	check_params = False
				4101	elif not Search(r'\S+\([^)]*$', line):
				4102	# Don't see a whitelisted function on this line. Actually we
				4103	# didn't see any function name on this line, so this is likely a
				4104	# multi-line parameter list. Try a bit harder to catch this case.
				4105	for i in xrange(2):
				4106	if (linenum > i and
				4107	Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
				4108	check_params = False
				4109	break
				4110
				4111	if check_params:
				4112	decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
				4113	for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
				4114	if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
				4115	error(filename, linenum, 'runtime/references', 2,
				4116	'Is this a non-const reference? '
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4117	'If so, make const or use a pointer: ' +
				4118	ReplaceAll(' *<', '<', parameter))
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4119
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4120
				4121	def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
				4122	error):
				4123	"""Checks for a C-style cast by looking for the pattern.
				4124
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4125	Args:
				4126	filename: The name of the current file.
				4127	linenum: The number of the line to check.
				4128	line: The line of code to check.
				4129	raw_line: The raw line of code to check, with comments.
				4130	cast_type: The string for the C++ cast to recommend. This is either
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4131	reinterpret_cast, static_cast, or const_cast, depending.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4132	pattern: The regular expression used to find C-style casts.
				4133	error: The function to call with any errors found.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4134
				4135	Returns:
				4136	True if an error was emitted.
				4137	False otherwise.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4138	"""
				4139	match = Search(pattern, line)
				4140	if not match:
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4141	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4142
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4143	# Exclude lines with sizeof, since sizeof looks like a cast.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4144	sizeof_match = Match(r'.sizeof\s$', line[0:match.start(1) - 1])
				4145	if sizeof_match:
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4146	return False
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4147
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4148	# operator++(int) and operator--(int)
				4149	if (line[0:match.start(1) - 1].endswith(' operator++') or
				4150	line[0:match.start(1) - 1].endswith(' operator--')):
				4151	return False
				4152
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4153	# A single unnamed argument for a function tends to look like old
				4154	# style cast. If we see those, don't issue warnings for deprecated
				4155	# casts, instead issue warnings for unnamed arguments where
				4156	# appropriate.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4157	#
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4158	# These are things that we want warnings for, since the style guide
				4159	# explicitly require all parameters to be named:
				4160	# Function(int);
				4161	# Function(int) {
				4162	# ConstMember(int) const;
				4163	# ConstMember(int) const {
				4164	# ExceptionMember(int) throw (...);
				4165	# ExceptionMember(int) throw (...) {
				4166	# PureVirtual(int) = 0;
				4167	#
				4168	# These are functions of some sort, where the compiler would be fine
				4169	# if they had named parameters, but people often omit those
				4170	# identifiers to reduce clutter:
				4171	# (FunctionPointer)(int);
				4172	# (FunctionPointer)(int) = value;
				4173	# Function((function_pointer_arg)(int))
				4174	# <TemplateArgument(int)>;
				4175	# <(FunctionPointerTemplateArgument)(int)>;
				4176	remainder = line[match.end(0):]
				4177	if Match(r'^\s*(?:;\|const\b\|throw\b\|=\|>\|\{\|\))', remainder):
				4178	# Looks like an unnamed parameter.
				4179
				4180	# Don't warn on any kind of template arguments.
				4181	if Match(r'^\s*>', remainder):
				4182	return False
				4183
				4184	# Don't warn on assignments to function pointers, but keep warnings for
				4185	# unnamed parameters to pure virtual functions. Note that this pattern
				4186	# will also pass on assignments of "0" to function pointers, but the
				4187	# preferred values for those would be "nullptr" or "NULL".
				4188	matched_zero = Match(r'^\s=\s(\S+)\s;', remainder)
				4189	if matched_zero and matched_zero.group(1) != '0':
				4190	return False
				4191
				4192	# Don't warn on function pointer declarations. For this we need
				4193	# to check what came before the "(type)" string.
				4194	if Match(r'.\)\s$', line[0:match.start(0)]):
				4195	return False
				4196
				4197	# Don't warn if the parameter is named with block comments, e.g.:
				4198	# Function(int /unused_param/);
				4199	if '/*' in raw_line:
				4200	return False
				4201
				4202	# Passed all filters, issue warning here.
				4203	error(filename, linenum, 'readability/function', 3,
				4204	'All parameters should be named in a function')
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4205	return True
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4206
				4207	# At this point, all that should be left is actual casts.
				4208	error(filename, linenum, 'readability/casting', 4,
				4209	'Using C-style cast. Use %s<%s>(...) instead' %
				4210	(cast_type, match.group(1)))
				4211
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4212	return True
				4213
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4214
				4215	_HEADERS_CONTAINING_TEMPLATES = (
				4216	('<deque>', ('deque',)),
				4217	('<functional>', ('unary_function', 'binary_function',
				4218	'plus', 'minus', 'multiplies', 'divides', 'modulus',
				4219	'negate',
				4220	'equal_to', 'not_equal_to', 'greater', 'less',
				4221	'greater_equal', 'less_equal',
				4222	'logical_and', 'logical_or', 'logical_not',
				4223	'unary_negate', 'not1', 'binary_negate', 'not2',
				4224	'bind1st', 'bind2nd',
				4225	'pointer_to_unary_function',
				4226	'pointer_to_binary_function',
				4227	'ptr_fun',
				4228	'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
				4229	'mem_fun_ref_t',
				4230	'const_mem_fun_t', 'const_mem_fun1_t',
				4231	'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
				4232	'mem_fun_ref',
				4233	)),
				4234	('<limits>', ('numeric_limits',)),
				4235	('<list>', ('list',)),
				4236	('<map>', ('map', 'multimap',)),
				4237	('<memory>', ('allocator',)),
				4238	('<queue>', ('queue', 'priority_queue',)),
				4239	('<set>', ('set', 'multiset',)),
				4240	('<stack>', ('stack',)),
				4241	('<string>', ('char_traits', 'basic_string',)),
				4242	('<utility>', ('pair',)),
				4243	('<vector>', ('vector',)),
				4244
				4245	# gcc extensions.
				4246	# Note: std::hash is their hash, ::hash is our hash
				4247	('<hash_map>', ('hash_map', 'hash_multimap',)),
				4248	('<hash_set>', ('hash_set', 'hash_multiset',)),
				4249	('<slist>', ('slist',)),
				4250	)
				4251
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4252	_RE_PATTERN_STRING = re.compile(r'\bstring\b')
				4253
				4254	_re_pattern_algorithm_header = []
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	4255	for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
				4256	'transform'):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4257	# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
				4258	# type::max().
				4259	_re_pattern_algorithm_header.append(
				4260	(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?$[^$]'),
				4261	_template,
				4262	'<algorithm>'))
				4263
				4264	_re_pattern_templates = []
				4265	for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
				4266	for _template in _templates:
				4267	_re_pattern_templates.append(
				4268	(re.compile(r'(\<\|\b)' + _template + r'\s*\<'),
				4269	_template + '<>',
				4270	_header))
				4271
				4272
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4273	def FilesBelongToSameModule(filename_cc, filename_h):
				4274	"""Check if these two filenames belong to the same module.
				4275
				4276	The concept of a 'module' here is a as follows:
				4277	foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
				4278	same 'module' if they are in the same directory.
				4279	some/path/public/xyzzy and some/path/internal/xyzzy are also considered
				4280	to belong to the same module here.
				4281
				4282	If the filename_cc contains a longer path than the filename_h, for example,
				4283	'/absolute/path/to/base/sysinfo.cc', and this file would include
				4284	'base/sysinfo.h', this function also produces the prefix needed to open the
				4285	header. This is used by the caller of this function to more robustly open the
				4286	header file. We don't have access to the real include paths in this context,
				4287	so we need this guesswork here.
				4288
				4289	Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
				4290	according to this implementation. Because of this, this function gives
				4291	some false positives. This should be sufficiently rare in practice.
				4292
				4293	Args:
				4294	filename_cc: is the path for the .cc file
				4295	filename_h: is the path for the header path
				4296
				4297	Returns:
				4298	Tuple with a bool and a string:
				4299	bool: True if filename_cc and filename_h belong to the same module.
				4300	string: the additional prefix needed to open the header file.
				4301	"""
				4302
				4303	if not filename_cc.endswith('.cc'):
				4304	return (False, '')
				4305	filename_cc = filename_cc[:-len('.cc')]
				4306	if filename_cc.endswith('_unittest'):
				4307	filename_cc = filename_cc[:-len('_unittest')]
				4308	elif filename_cc.endswith('_test'):
				4309	filename_cc = filename_cc[:-len('_test')]
				4310	filename_cc = filename_cc.replace('/public/', '/')
				4311	filename_cc = filename_cc.replace('/internal/', '/')
				4312
				4313	if not filename_h.endswith('.h'):
				4314	return (False, '')
				4315	filename_h = filename_h[:-len('.h')]
				4316	if filename_h.endswith('-inl'):
				4317	filename_h = filename_h[:-len('-inl')]
				4318	filename_h = filename_h.replace('/public/', '/')
				4319	filename_h = filename_h.replace('/internal/', '/')
				4320
				4321	files_belong_to_same_module = filename_cc.endswith(filename_h)
				4322	common_path = ''
				4323	if files_belong_to_same_module:
				4324	common_path = filename_cc[:-len(filename_h)]
				4325	return files_belong_to_same_module, common_path
				4326
				4327
				4328	def UpdateIncludeState(filename, include_state, io=codecs):
				4329	"""Fill up the include_state with new includes found from the file.
				4330
				4331	Args:
				4332	filename: the name of the header to read.
				4333	include_state: an _IncludeState instance in which the headers are inserted.
				4334	io: The io factory to use to read the file. Provided for testability.
				4335
				4336	Returns:
				4337	True if a header was succesfully added. False otherwise.
				4338	"""
				4339	headerfile = None
				4340	try:
				4341	headerfile = io.open(filename, 'r', 'utf8', 'replace')
				4342	except IOError:
				4343	return False
				4344	linenum = 0
				4345	for line in headerfile:
				4346	linenum += 1
				4347	clean_line = CleanseComments(line)
				4348	match = _RE_PATTERN_INCLUDE.search(clean_line)
				4349	if match:
				4350	include = match.group(2)
				4351	# The value formatting is cute, but not really used right now.
				4352	# What matters here is that the key is in include_state.
				4353	include_state.setdefault(include, '%s:%d' % (filename, linenum))
				4354	return True
				4355
				4356
				4357	def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
				4358	io=codecs):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4359	"""Reports for missing stl includes.
				4360
				4361	This function will output warnings to make sure you are including the headers
				4362	necessary for the stl containers and functions that you use. We only give one
				4363	reason to include a header. For example, if you use both equal_to<> and
				4364	less<> in a .h file, only one (the latter in the file) of these will be
				4365	reported as a reason to include the <functional>.
				4366
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4367	Args:
				4368	filename: The name of the current file.
				4369	clean_lines: A CleansedLines instance containing the file.
				4370	include_state: An _IncludeState instance.
				4371	error: The function to call with any errors found.
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4372	io: The IO factory to use to read the header file. Provided for unittest
				4373	injection.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4374	"""
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4375	required = {} # A map of header name to linenumber and the template entity.
				4376	# Example of required: { '<functional>': (1219, 'less<>') }
				4377
				4378	for linenum in xrange(clean_lines.NumLines()):
				4379	line = clean_lines.elided[linenum]
				4380	if not line or line[0] == '#':
				4381	continue
				4382
				4383	# String is special -- it is a non-templatized type in STL.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4384	matched = _RE_PATTERN_STRING.search(line)
				4385	if matched:
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4386	# Don't warn about strings in non-STL namespaces:
				4387	# (We check only the first match per line; good enough.)
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4388	prefix = line[:matched.start()]
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4389	if prefix.endswith('std::') or not prefix.endswith('::'):
				4390	required['<string>'] = (linenum, 'string')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4391
				4392	for pattern, template, header in _re_pattern_algorithm_header:
				4393	if pattern.search(line):
				4394	required[header] = (linenum, template)
				4395
				4396	# The following function is just a speed up, no semantics are changed.
				4397	if not '<' in line: # Reduces the cpu time usage by skipping lines.
				4398	continue
				4399
				4400	for pattern, template, header in _re_pattern_templates:
				4401	if pattern.search(line):
				4402	required[header] = (linenum, template)
				4403
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4404	# The policy is that if you #include something in foo.h you don't need to
				4405	# include it again in foo.cc. Here, we will look at possible includes.
				4406	# Let's copy the include_state so it is only messed up within this function.
				4407	include_state = include_state.copy()
				4408
				4409	# Did we find the header for this file (if any) and succesfully load it?
				4410	header_found = False
				4411
				4412	# Use the absolute path so that matching works properly.
erg@google.com	90ecb62	2012-01-30 19:34:23 +0000	[diff] [blame]	4413	abs_filename = FileInfo(filename).FullName()
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4414
				4415	# For Emacs's flymake.
				4416	# If cpplint is invoked from Emacs's flymake, a temporary file is generated
				4417	# by flymake and that file name might end with '_flymake.cc'. In that case,
				4418	# restore original file name here so that the corresponding header file can be
				4419	# found.
				4420	# e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
				4421	# instead of 'foo_flymake.h'
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4422	abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4423
				4424	# include_state is modified during iteration, so we iterate over a copy of
				4425	# the keys.
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4426	header_keys = include_state.keys()
				4427	for header in header_keys:
erg@google.com	e35f765	2009-06-19 20:52:09 +0000	[diff] [blame]	4428	(same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
				4429	fullpath = common_path + header
				4430	if same_module and UpdateIncludeState(fullpath, include_state, io):
				4431	header_found = True
				4432
				4433	# If we can't find the header file for a .cc, assume it's because we don't
				4434	# know where to look. In that case we'll give up as we're not sure they
				4435	# didn't include it in the .h file.
				4436	# TODO(unknown): Do a better job of finding .h files so we are confident that
				4437	# not having the .h file means there isn't one.
				4438	if filename.endswith('.cc') and not header_found:
				4439	return
				4440
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4441	# All the lines have been processed, report the errors found.
				4442	for required_header_unstripped in required:
				4443	template = required[required_header_unstripped][1]
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4444	if required_header_unstripped.strip('<>"') not in include_state:
				4445	error(filename, required[required_header_unstripped][0],
				4446	'build/include_what_you_use', 4,
				4447	'Add #include ' + required_header_unstripped + ' for ' + template)
				4448
				4449
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4450	_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
				4451
				4452
				4453	def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
				4454	"""Check that make_pair's template arguments are deduced.
				4455
				4456	G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
				4457	specified explicitly, and such use isn't intended in any case.
				4458
				4459	Args:
				4460	filename: The name of the current file.
				4461	clean_lines: A CleansedLines instance containing the file.
				4462	linenum: The number of the line to check.
				4463	error: The function to call with any errors found.
				4464	"""
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4465	line = clean_lines.elided[linenum]
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4466	match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
				4467	if match:
				4468	error(filename, linenum, 'build/explicit_make_pair',
				4469	4, # 4 = high confidence
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4470	'For C++11-compatibility, omit template arguments from make_pair'
				4471	' OR use pair directly OR if appropriate, construct a pair directly')
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4472
				4473
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4474	def ProcessLine(filename, file_extension, clean_lines, line,
				4475	include_state, function_state, nesting_state, error,
				4476	extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4477	"""Processes a single line in the file.
				4478
				4479	Args:
				4480	filename: Filename of the file that is being processed.
				4481	file_extension: The extension (dot not included) of the file.
				4482	clean_lines: An array of strings, each representing a line of the file,
				4483	with comments stripped.
				4484	line: Number of line being processed.
				4485	include_state: An _IncludeState instance in which the headers are inserted.
				4486	function_state: A _FunctionState instance which counts function lines, etc.
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4487	nesting_state: A _NestingState instance which maintains information about
				4488	the current stack of nested blocks being parsed.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4489	error: A callable to which errors are reported, which takes 4 arguments:
				4490	filename, line number, error level, and message
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4491	extra_check_functions: An array of additional check functions that will be
				4492	run on each source line. Each function takes 4
				4493	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4494	"""
				4495	raw_lines = clean_lines.raw_lines
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4496	ParseNolintSuppressions(filename, raw_lines[line], line, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4497	nesting_state.Update(filename, clean_lines, line, error)
				4498	if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
				4499	return
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4500	CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4501	CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4502	CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4503	CheckLanguage(filename, clean_lines, line, file_extension, include_state,
erg@google.com	fd5da63	2013-10-25 17:39:45 +0000	[diff] [blame]	4504	nesting_state, error)
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4505	CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4506	CheckForNonStandardConstructs(filename, clean_lines, line,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4507	nesting_state, error)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4508	CheckVlogArguments(filename, clean_lines, line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4509	CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com	3664910	2009-03-25 21:18:36 +0000	[diff] [blame]	4510	CheckInvalidIncrement(filename, clean_lines, line, error)
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4511	CheckMakePairUsesDeduction(filename, clean_lines, line, error)
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4512	for check_fn in extra_check_functions:
				4513	check_fn(filename, clean_lines, line, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4514
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4515	def ProcessFileData(filename, file_extension, lines, error,
				4516	extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4517	"""Performs lint checks and reports any errors to the given error function.
				4518
				4519	Args:
				4520	filename: Filename of the file that is being processed.
				4521	file_extension: The extension (dot not included) of the file.
				4522	lines: An array of strings, each representing a line of the file, with the
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4523	last element being empty if the file is terminated with a newline.
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4524	error: A callable to which errors are reported, which takes 4 arguments:
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4525	filename, line number, error level, and message
				4526	extra_check_functions: An array of additional check functions that will be
				4527	run on each source line. Each function takes 4
				4528	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4529	"""
				4530	lines = (['// marker so line numbers and indices both start at 1'] + lines +
				4531	['// marker so line numbers end in a known way'])
				4532
				4533	include_state = _IncludeState()
				4534	function_state = _FunctionState()
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4535	nesting_state = _NestingState()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4536
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4537	ResetNolintSuppressions()
				4538
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4539	CheckForCopyright(filename, lines, error)
				4540
				4541	if file_extension == 'h':
				4542	CheckForHeaderGuard(filename, lines, error)
				4543
				4544	RemoveMultiLineComments(filename, lines, error)
				4545	clean_lines = CleansedLines(lines)
				4546	for line in xrange(clean_lines.NumLines()):
				4547	ProcessLine(filename, file_extension, clean_lines, line,
erg@google.com	d350fe5	2013-01-14 17:51:48 +0000	[diff] [blame]	4548	include_state, function_state, nesting_state, error,
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4549	extra_check_functions)
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4550	nesting_state.CheckCompletedBlocks(filename, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4551
				4552	CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
				4553
				4554	# We check here rather than inside ProcessLine so that we see raw
				4555	# lines rather than "cleaned" lines.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4556	CheckForBadCharacters(filename, lines, error)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4557
				4558	CheckForNewlineAtEOF(filename, lines, error)
				4559
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4560	def ProcessFile(filename, vlevel, extra_check_functions=[]):
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4561	"""Does google-lint on a single file.
				4562
				4563	Args:
				4564	filename: The name of the file to parse.
				4565
				4566	vlevel: The level of errors to report. Every error of confidence
				4567	>= verbose_level will be reported. 0 is a good default.
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4568
				4569	extra_check_functions: An array of additional check functions that will be
				4570	run on each source line. Each function takes 4
				4571	arguments: filename, clean_lines, line, error
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4572	"""
				4573
				4574	_SetVerboseLevel(vlevel)
				4575
				4576	try:
				4577	# Support the UNIX convention of using "-" for stdin. Note that
				4578	# we are not opening the file with universal newline support
				4579	# (which codecs doesn't support anyway), so the resulting lines do
				4580	# contain trailing '\r' characters if we are reading a file that
				4581	# has CRLF endings.
				4582	# If after the split a trailing '\r' is present, it is removed
				4583	# below. If it is not expected to be present (i.e. os.linesep !=
				4584	# '\r\n' as in Windows), a warning is issued below if this file
				4585	# is processed.
				4586
				4587	if filename == '-':
				4588	lines = codecs.StreamReaderWriter(sys.stdin,
				4589	codecs.getreader('utf8'),
				4590	codecs.getwriter('utf8'),
				4591	'replace').read().split('\n')
				4592	else:
				4593	lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
				4594
				4595	carriage_return_found = False
				4596	# Remove trailing '\r'.
				4597	for linenum in range(len(lines)):
				4598	if lines[linenum].endswith('\r'):
				4599	lines[linenum] = lines[linenum].rstrip('\r')
				4600	carriage_return_found = True
				4601
				4602	except IOError:
				4603	sys.stderr.write(
				4604	"Skipping input '%s': Can't open for reading\n" % filename)
				4605	return
				4606
				4607	# Note, if no dot is found, this will give the entire filename as the ext.
				4608	file_extension = filename[filename.rfind('.') + 1:]
				4609
				4610	# When reading from stdin, the extension is unknown, so no cpplint tests
				4611	# should rely on the extension.
erg@google.com	2aa5998	2013-10-28 19:09:25 +0000	[diff] [blame]	4612	valid_extensions = ['cc', 'h', 'cpp', 'cu', 'cuh']
				4613	if filename != '-' and file_extension not in valid_extensions:
				4614	sys.stderr.write('Ignoring %s; not a valid file name '
				4615	'(.cc, .h, .cpp, .cu, .cuh)\n' % filename)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4616	else:
erg@google.com	efeacdf	2011-09-07 21:12:16 +0000	[diff] [blame]	4617	ProcessFileData(filename, file_extension, lines, Error,
				4618	extra_check_functions)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4619	if carriage_return_found and os.linesep != '\r\n':
erg@google.com	8a95ecc	2011-09-08 00:45:54 +0000	[diff] [blame]	4620	# Use 0 for linenum since outputting only one error for potentially
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4621	# several lines.
				4622	Error(filename, 0, 'whitespace/newline', 1,
				4623	'One or more unexpected \\r (^M) found;'
				4624	'better to use only a \\n')
				4625
				4626	sys.stderr.write('Done processing %s\n' % filename)
				4627
				4628
				4629	def PrintUsage(message):
				4630	"""Prints a brief usage string and exits, optionally with an error message.
				4631
				4632	Args:
				4633	message: The optional error message.
				4634	"""
				4635	sys.stderr.write(_USAGE)
				4636	if message:
				4637	sys.exit('\nFATAL ERROR: ' + message)
				4638	else:
				4639	sys.exit(1)
				4640
				4641
				4642	def PrintCategories():
				4643	"""Prints a list of all the error-categories used by error messages.
				4644
				4645	These are the categories used to filter messages via --filter.
				4646	"""
erg+personal@google.com	0518964	2010-04-30 20:43:03 +0000	[diff] [blame]	4647	sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4648	sys.exit(0)
				4649
				4650
				4651	def ParseArguments(args):
				4652	"""Parses the command line arguments.
				4653
				4654	This may set the output format and verbosity level as side-effects.
				4655
				4656	Args:
				4657	args: The command line arguments:
				4658
				4659	Returns:
				4660	The list of filenames to lint.
				4661	"""
				4662	try:
				4663	(opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4664	'counting=',
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	4665	'filter=',
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame^]	4666	'root=',
				4667	'linelength='])
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4668	except getopt.GetoptError:
				4669	PrintUsage('Invalid arguments.')
				4670
				4671	verbosity = _VerboseLevel()
				4672	output_format = _OutputFormat()
				4673	filters = ''
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4674	counting_style = ''
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4675
				4676	for (opt, val) in opts:
				4677	if opt == '--help':
				4678	PrintUsage(None)
				4679	elif opt == '--output':
erg@google.com	c667123	2013-10-25 21:44:03 +0000	[diff] [blame]	4680	if val not in ('emacs', 'vs7', 'eclipse'):
erg@google.com	02c27fd	2013-05-28 21:34:34 +0000	[diff] [blame]	4681	PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4682	output_format = val
				4683	elif opt == '--verbose':
				4684	verbosity = int(val)
				4685	elif opt == '--filter':
				4686	filters = val
erg@google.com	a87abb8	2009-02-24 01:41:01 +0000	[diff] [blame]	4687	if not filters:
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4688	PrintCategories()
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4689	elif opt == '--counting':
				4690	if val not in ('total', 'toplevel', 'detailed'):
				4691	PrintUsage('Valid counting options are total, toplevel, and detailed')
				4692	counting_style = val
erg@google.com	4d70a88	2013-04-16 21:06:32 +0000	[diff] [blame]	4693	elif opt == '--root':
				4694	global _root
				4695	_root = val
erg@google.com	ab53edf	2013-11-05 22:23:37 +0000	[diff] [blame^]	4696	elif opt == '--linelength':
				4697	global _line_length
				4698	try:
				4699	_line_length = int(val)
				4700	except ValueError:
				4701	PrintUsage('Line length must be digits.')
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4702
				4703	if not filenames:
				4704	PrintUsage('No files were specified.')
				4705
				4706	_SetOutputFormat(output_format)
				4707	_SetVerboseLevel(verbosity)
				4708	_SetFilters(filters)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4709	_SetCountingStyle(counting_style)
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4710
				4711	return filenames
				4712
				4713
				4714	def main():
				4715	filenames = ParseArguments(sys.argv[1:])
				4716
				4717	# Change stderr to write with replacement characters so we don't die
				4718	# if we try to print something containing non-ASCII characters.
				4719	sys.stderr = codecs.StreamReaderWriter(sys.stderr,
				4720	codecs.getreader('utf8'),
				4721	codecs.getwriter('utf8'),
				4722	'replace')
				4723
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4724	_cpplint_state.ResetErrorCounts()
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4725	for filename in filenames:
				4726	ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.com	a868d2d	2009-10-09 21:18:45 +0000	[diff] [blame]	4727	_cpplint_state.PrintErrorCounts()
				4728
erg@google.com	4e00b9a	2009-01-12 23:05:11 +0000	[diff] [blame]	4729	sys.exit(_cpplint_state.error_count > 0)
				4730
				4731
				4732	if __name__ == '__main__':
				4733	main()