Blame - hooks.py - tools/repo

blob: 1abba0c496a68cc7729c40c254a832323bdc27aa [file] [log] [blame]

Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	1	# -- coding:utf-8 --
				2	#
				3	# Copyright (C) 2008 The Android Open Source Project
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the "License");
				6	# you may not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# http://www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an "AS IS" BASIS,
				13	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	17	import errno
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	18	import json
				19	import os
				20	import re
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	21	import subprocess
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	22	import sys
				23	import traceback
				24
				25	from error import HookError
				26	from git_refs import HEAD
				27
				28	from pyversion import is_python3
				29	if is_python3():
				30	import urllib.parse
				31	else:
				32	import imp
				33	import urlparse
				34	urllib = imp.new_module('urllib')
				35	urllib.parse = urlparse
				36	input = raw_input # noqa: F821
				37
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	38
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	39	class RepoHook(object):
				40	"""A RepoHook contains information about a script to run as a hook.
				41
				42	Hooks are used to run a python script before running an upload (for instance,
				43	to run presubmit checks). Eventually, we may have hooks for other actions.
				44
				45	This shouldn't be confused with files in the 'repo/hooks' directory. Those
				46	files are copied into each '.git/hooks' folder for each project. Repo-level
				47	hooks are associated instead with repo actions.
				48
				49	Hooks are always python. When a hook is run, we will load the hook into the
				50	interpreter and execute its main() function.
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	51
				52	Combinations of hook option flags:
				53	- no-verify=False, verify=False (DEFAULT):
				54	If stdout is a tty, can prompt about running hooks if needed.
				55	If user denies running hooks, the action is cancelled. If stdout is
				56	not a tty and we would need to prompt about hooks, action is
				57	cancelled.
				58	- no-verify=False, verify=True:
				59	Always run hooks with no prompt.
				60	- no-verify=True, verify=False:
				61	Never run hooks, but run action anyway (AKA bypass hooks).
				62	- no-verify=True, verify=True:
				63	Invalid
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	64	"""
				65
				66	def __init__(self,
				67	hook_type,
				68	hooks_project,
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	69	repo_topdir,
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	70	manifest_url,
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	71	bypass_hooks=False,
				72	allow_all_hooks=False,
				73	ignore_hooks=False,
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	74	abort_if_user_denies=False):
				75	"""RepoHook constructor.
				76
				77	Params:
				78	hook_type: A string representing the type of hook. This is also used
				79	to figure out the name of the file containing the hook. For
				80	example: 'pre-upload'.
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	81	hooks_project: The project containing the repo hooks.
				82	If you have a manifest, this is manifest.repo_hooks_project.
				83	OK if this is None, which will make the hook a no-op.
				84	repo_topdir: The top directory of the repo client checkout.
				85	This is the one containing the .repo directory. Scripts will
				86	run with CWD as this directory.
				87	If you have a manifest, this is manifest.topdir.
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	88	manifest_url: The URL to the manifest git repo.
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	89	bypass_hooks: If True, then 'Do not run the hook'.
				90	allow_all_hooks: If True, then 'Run the hook without prompting'.
				91	ignore_hooks: If True, then 'Do not abort action if hooks fail'.
				92	abort_if_user_denies: If True, we'll abort running the hook if the user
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	93	doesn't allow us to run the hook.
				94	"""
				95	self._hook_type = hook_type
				96	self._hooks_project = hooks_project
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	97	self._repo_topdir = repo_topdir
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	98	self._manifest_url = manifest_url
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	99	self._bypass_hooks = bypass_hooks
				100	self._allow_all_hooks = allow_all_hooks
				101	self._ignore_hooks = ignore_hooks
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	102	self._abort_if_user_denies = abort_if_user_denies
				103
				104	# Store the full path to the script for convenience.
				105	if self._hooks_project:
				106	self._script_fullpath = os.path.join(self._hooks_project.worktree,
				107	self._hook_type + '.py')
				108	else:
				109	self._script_fullpath = None
				110
				111	def _GetHash(self):
				112	"""Return a hash of the contents of the hooks directory.
				113
				114	We'll just use git to do this. This hash has the property that if anything
				115	changes in the directory we will return a different has.
				116
				117	SECURITY CONSIDERATION:
				118	This hash only represents the contents of files in the hook directory, not
				119	any other files imported or called by hooks. Changes to imported files
				120	can change the script behavior without affecting the hash.
				121
				122	Returns:
				123	A string representing the hash. This will always be ASCII so that it can
				124	be printed to the user easily.
				125	"""
				126	assert self._hooks_project, "Must have hooks to calculate their hash."
				127
				128	# We will use the work_git object rather than just calling GetRevisionId().
				129	# That gives us a hash of the latest checked in version of the files that
				130	# the user will actually be executing. Specifically, GetRevisionId()
				131	# doesn't appear to change even if a user checks out a different version
				132	# of the hooks repo (via git checkout) nor if a user commits their own revs.
				133	#
				134	# NOTE: Local (non-committed) changes will not be factored into this hash.
				135	# I think this is OK, since we're really only worried about warning the user
				136	# about upstream changes.
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	137	return self._hooks_project.work_git.rev_parse(HEAD)
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	138
				139	def _GetMustVerb(self):
				140	"""Return 'must' if the hook is required; 'should' if not."""
				141	if self._abort_if_user_denies:
				142	return 'must'
				143	else:
				144	return 'should'
				145
				146	def _CheckForHookApproval(self):
				147	"""Check to see whether this hook has been approved.
				148
				149	We'll accept approval of manifest URLs if they're using secure transports.
				150	This way the user can say they trust the manifest hoster. For insecure
				151	hosts, we fall back to checking the hash of the hooks repo.
				152
				153	Note that we ask permission for each individual hook even though we use
				154	the hash of all hooks when detecting changes. We'd like the user to be
				155	able to approve / deny each hook individually. We only use the hash of all
				156	hooks because there is no other easy way to detect changes to local imports.
				157
				158	Returns:
				159	True if this hook is approved to run; False otherwise.
				160
				161	Raises:
				162	HookError: Raised if the user doesn't approve and abort_if_user_denies
				163	was passed to the consturctor.
				164	"""
				165	if self._ManifestUrlHasSecureScheme():
				166	return self._CheckForHookApprovalManifest()
				167	else:
				168	return self._CheckForHookApprovalHash()
				169
				170	def _CheckForHookApprovalHelper(self, subkey, new_val, main_prompt,
				171	changed_prompt):
				172	"""Check for approval for a particular attribute and hook.
				173
				174	Args:
				175	subkey: The git config key under [repo.hooks.<hook_type>] to store the
				176	last approved string.
				177	new_val: The new value to compare against the last approved one.
				178	main_prompt: Message to display to the user to ask for approval.
				179	changed_prompt: Message explaining why we're re-asking for approval.
				180
				181	Returns:
				182	True if this hook is approved to run; False otherwise.
				183
				184	Raises:
				185	HookError: Raised if the user doesn't approve and abort_if_user_denies
				186	was passed to the consturctor.
				187	"""
				188	hooks_config = self._hooks_project.config
				189	git_approval_key = 'repo.hooks.%s.%s' % (self._hook_type, subkey)
				190
				191	# Get the last value that the user approved for this hook; may be None.
				192	old_val = hooks_config.GetString(git_approval_key)
				193
				194	if old_val is not None:
				195	# User previously approved hook and asked not to be prompted again.
				196	if new_val == old_val:
				197	# Approval matched. We're done.
				198	return True
				199	else:
				200	# Give the user a reason why we're prompting, since they last told
				201	# us to "never ask again".
				202	prompt = 'WARNING: %s\n\n' % (changed_prompt,)
				203	else:
				204	prompt = ''
				205
				206	# Prompt the user if we're not on a tty; on a tty we'll assume "no".
				207	if sys.stdout.isatty():
				208	prompt += main_prompt + ' (yes/always/NO)? '
				209	response = input(prompt).lower()
				210	print()
				211
				212	# User is doing a one-time approval.
				213	if response in ('y', 'yes'):
				214	return True
				215	elif response == 'always':
				216	hooks_config.SetString(git_approval_key, new_val)
				217	return True
				218
				219	# For anything else, we'll assume no approval.
				220	if self._abort_if_user_denies:
				221	raise HookError('You must allow the %s hook or use --no-verify.' %
				222	self._hook_type)
				223
				224	return False
				225
				226	def _ManifestUrlHasSecureScheme(self):
				227	"""Check if the URI for the manifest is a secure transport."""
				228	secure_schemes = ('file', 'https', 'ssh', 'persistent-https', 'sso', 'rpc')
				229	parse_results = urllib.parse.urlparse(self._manifest_url)
				230	return parse_results.scheme in secure_schemes
				231
				232	def _CheckForHookApprovalManifest(self):
				233	"""Check whether the user has approved this manifest host.
				234
				235	Returns:
				236	True if this hook is approved to run; False otherwise.
				237	"""
				238	return self._CheckForHookApprovalHelper(
				239	'approvedmanifest',
				240	self._manifest_url,
				241	'Run hook scripts from %s' % (self._manifest_url,),
				242	'Manifest URL has changed since %s was allowed.' % (self._hook_type,))
				243
				244	def _CheckForHookApprovalHash(self):
				245	"""Check whether the user has approved the hooks repo.
				246
				247	Returns:
				248	True if this hook is approved to run; False otherwise.
				249	"""
				250	prompt = ('Repo %s run the script:\n'
				251	' %s\n'
				252	'\n'
				253	'Do you want to allow this script to run')
				254	return self._CheckForHookApprovalHelper(
				255	'approvedhash',
				256	self._GetHash(),
				257	prompt % (self._GetMustVerb(), self._script_fullpath),
				258	'Scripts have changed since %s was allowed.' % (self._hook_type,))
				259
				260	@staticmethod
				261	def _ExtractInterpFromShebang(data):
				262	"""Extract the interpreter used in the shebang.
				263
				264	Try to locate the interpreter the script is using (ignoring `env`).
				265
				266	Args:
				267	data: The file content of the script.
				268
				269	Returns:
				270	The basename of the main script interpreter, or None if a shebang is not
				271	used or could not be parsed out.
				272	"""
				273	firstline = data.splitlines()[:1]
				274	if not firstline:
				275	return None
				276
				277	# The format here can be tricky.
				278	shebang = firstline[0].strip()
				279	m = re.match(r'^#!\s*([^\s]+)(?:\s+([^\s]+))?', shebang)
				280	if not m:
				281	return None
				282
				283	# If the using `env`, find the target program.
				284	interp = m.group(1)
				285	if os.path.basename(interp) == 'env':
				286	interp = m.group(2)
				287
				288	return interp
				289
				290	def _ExecuteHookViaReexec(self, interp, context, **kwargs):
				291	"""Execute the hook script through \|interp\|.
				292
				293	Note: Support for this feature should be dropped ~Jun 2021.
				294
				295	Args:
				296	interp: The Python program to run.
				297	context: Basic Python context to execute the hook inside.
				298	kwargs: Arbitrary arguments to pass to the hook script.
				299
				300	Raises:
				301	HookError: When the hooks failed for any reason.
				302	"""
				303	# This logic needs to be kept in sync with _ExecuteHookViaImport below.
				304	script = """
				305	import json, os, sys
				306	path = '''%(path)s'''
				307	kwargs = json.loads('''%(kwargs)s''')
				308	context = json.loads('''%(context)s''')
				309	sys.path.insert(0, os.path.dirname(path))
				310	data = open(path).read()
				311	exec(compile(data, path, 'exec'), context)
				312	context['main'](**kwargs)
				313	""" % {
				314	'path': self._script_fullpath,
				315	'kwargs': json.dumps(kwargs),
				316	'context': json.dumps(context),
				317	}
				318
				319	# We pass the script via stdin to avoid OS argv limits. It also makes
				320	# unhandled exception tracebacks less verbose/confusing for users.
				321	cmd = [interp, '-c', 'import sys; exec(sys.stdin.read())']
				322	proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)
				323	proc.communicate(input=script.encode('utf-8'))
				324	if proc.returncode:
				325	raise HookError('Failed to run %s hook.' % (self._hook_type,))
				326
				327	def _ExecuteHookViaImport(self, data, context, **kwargs):
				328	"""Execute the hook code in \|data\| directly.
				329
				330	Args:
				331	data: The code of the hook to execute.
				332	context: Basic Python context to execute the hook inside.
				333	kwargs: Arbitrary arguments to pass to the hook script.
				334
				335	Raises:
				336	HookError: When the hooks failed for any reason.
				337	"""
				338	# Exec, storing global context in the context dict. We catch exceptions
				339	# and convert to a HookError w/ just the failing traceback.
				340	try:
				341	exec(compile(data, self._script_fullpath, 'exec'), context)
				342	except Exception:
				343	raise HookError('%s\nFailed to import %s hook; see traceback above.' %
				344	(traceback.format_exc(), self._hook_type))
				345
				346	# Running the script should have defined a main() function.
				347	if 'main' not in context:
				348	raise HookError('Missing main() in: "%s"' % self._script_fullpath)
				349
				350	# Call the main function in the hook. If the hook should cause the
				351	# build to fail, it will raise an Exception. We'll catch that convert
				352	# to a HookError w/ just the failing traceback.
				353	try:
				354	context['main'](**kwargs)
				355	except Exception:
				356	raise HookError('%s\nFailed to run main() for %s hook; see traceback '
				357	'above.' % (traceback.format_exc(), self._hook_type))
				358
				359	def _ExecuteHook(self, **kwargs):
				360	"""Actually execute the given hook.
				361
				362	This will run the hook's 'main' function in our python interpreter.
				363
				364	Args:
				365	kwargs: Keyword arguments to pass to the hook. These are often specific
				366	to the hook type. For instance, pre-upload hooks will contain
				367	a project_list.
				368	"""
				369	# Keep sys.path and CWD stashed away so that we can always restore them
				370	# upon function exit.
				371	orig_path = os.getcwd()
				372	orig_syspath = sys.path
				373
				374	try:
				375	# Always run hooks with CWD as topdir.
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	376	os.chdir(self._repo_topdir)
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	377
				378	# Put the hook dir as the first item of sys.path so hooks can do
				379	# relative imports. We want to replace the repo dir as [0] so
				380	# hooks can't import repo files.
				381	sys.path = [os.path.dirname(self._script_fullpath)] + sys.path[1:]
				382
				383	# Initial global context for the hook to run within.
				384	context = {'__file__': self._script_fullpath}
				385
				386	# Add 'hook_should_take_kwargs' to the arguments to be passed to main.
				387	# We don't actually want hooks to define their main with this argument--
				388	# it's there to remind them that their hook should always take **kwargs.
				389	# For instance, a pre-upload hook should be defined like:
				390	# def main(project_list, **kwargs):
				391	#
				392	# This allows us to later expand the API without breaking old hooks.
				393	kwargs = kwargs.copy()
				394	kwargs['hook_should_take_kwargs'] = True
				395
				396	# See what version of python the hook has been written against.
				397	data = open(self._script_fullpath).read()
				398	interp = self._ExtractInterpFromShebang(data)
				399	reexec = False
				400	if interp:
				401	prog = os.path.basename(interp)
				402	if prog.startswith('python2') and sys.version_info.major != 2:
				403	reexec = True
				404	elif prog.startswith('python3') and sys.version_info.major == 2:
				405	reexec = True
				406
				407	# Attempt to execute the hooks through the requested version of Python.
				408	if reexec:
				409	try:
				410	self._ExecuteHookViaReexec(interp, context, **kwargs)
				411	except OSError as e:
				412	if e.errno == errno.ENOENT:
				413	# We couldn't find the interpreter, so fallback to importing.
				414	reexec = False
				415	else:
				416	raise
				417
				418	# Run the hook by importing directly.
				419	if not reexec:
				420	self._ExecuteHookViaImport(data, context, **kwargs)
				421	finally:
				422	# Restore sys.path and CWD.
				423	sys.path = orig_syspath
				424	os.chdir(orig_path)
				425
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	426	def _CheckHook(self):
				427	# Bail with a nice error if we can't find the hook.
				428	if not os.path.isfile(self._script_fullpath):
				429	raise HookError('Couldn\'t find repo hook: %s' % self._script_fullpath)
				430
				431	def Run(self, **kwargs):
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	432	"""Run the hook.
				433
				434	If the hook doesn't exist (because there is no hooks project or because
				435	this particular hook is not enabled), this is a no-op.
				436
				437	Args:
				438	user_allows_all_hooks: If True, we will never prompt about running the
				439	hook--we'll just assume it's OK to run it.
				440	kwargs: Keyword arguments to pass to the hook. These are often specific
				441	to the hook type. For instance, pre-upload hooks will contain
				442	a project_list.
				443
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	444	Returns:
				445	True: On success or ignore hooks by user-request
				446	False: The hook failed. The caller should respond with aborting the action.
				447	Some examples in which False is returned:
				448	* Finding the hook failed while it was enabled, or
				449	* the user declined to run a required hook (from _CheckForHookApproval)
				450	In all these cases the user did not pass the proper arguments to
				451	ignore the result through the option combinations as listed in
				452	AddHookOptionGroup().
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	453	"""
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	454	# Do not do anything in case bypass_hooks is set, or
				455	# no-op if there is no hooks project or if hook is disabled.
				456	if (self._bypass_hooks or
				457	not self._hooks_project or
				458	self._hook_type not in self._hooks_project.enabled_repo_hooks):
				459	return True
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	460
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	461	passed = True
				462	try:
				463	self._CheckHook()
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	464
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	465	# Make sure the user is OK with running the hook.
				466	if self._allow_all_hooks or self._CheckForHookApproval():
				467	# Run the hook with the same version of python we're using.
				468	self._ExecuteHook(**kwargs)
				469	except SystemExit as e:
				470	passed = False
				471	print('ERROR: %s hooks exited with exit code: %s' % (self._hook_type, str(e)),
				472	file=sys.stderr)
				473	except HookError as e:
				474	passed = False
				475	print('ERROR: %s' % str(e), file=sys.stderr)
Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	476
Remy Bohmer	7f7acfe	2020-08-01 18:36:44 +0200	[diff] [blame^]	477	if not passed and self._ignore_hooks:
				478	print('\nWARNING: %s hooks failed, but continuing anyways.' % self._hook_type,
				479	file=sys.stderr)
				480	passed = True
				481
				482	return passed
				483
				484	@classmethod
				485	def FromSubcmd(cls, manifest, opt, args, *kwargs):
				486	"""Method to construct the repo hook class
				487
				488	Args:
				489	manifest: The current active manifest for this command from which we
				490	extract a couple of fields.
				491	opt: Contains the commandline options for the action of this hook.
				492	It should contain the options added by AddHookOptionGroup() in which
				493	we are interested in RepoHook execution.
				494	"""
				495	for key in ('bypass_hooks', 'allow_all_hooks', 'ignore_hooks'):
				496	kwargs.setdefault(key, getattr(opt, key))
				497	kwargs.update({
				498	'hooks_project': manifest.repo_hooks_project,
				499	'repo_topdir': manifest.topdir,
				500	'manifest_url': manifest.manifestProject.GetRemote('origin').url,
				501	})
				502	return cls(args, *kwargs)
				503
				504	@staticmethod
				505	def AddOptionGroup(parser, name):
				506	"""Help options relating to the various hooks."""
				507
				508	# Note that verify and no-verify are NOT opposites of each other, which
				509	# is why they store to different locations. We are using them to match
				510	# 'git commit' syntax.
				511	group = parser.add_option_group(name + ' hooks')
				512	group.add_option('--no-verify',
				513	dest='bypass_hooks', action='store_true',
				514	help='Do not run the %s hook.' % name)
				515	group.add_option('--verify',
				516	dest='allow_all_hooks', action='store_true',
				517	help='Run the %s hook without prompting.' % name)
				518	group.add_option('--ignore-hooks',
				519	action='store_true',
				520	help='Do not abort if %s hooks fail.' % name)