Blame - hooks.py - tools/repo

blob: 177bc88b97a0e7bb97acd89a0811cc72b4758d5f [file] [log] [blame]

Remy Bohmer	16c1328	2020-09-10 10:38:04 +0200	[diff] [blame]	1	# -- coding:utf-8 --
				2	#
				3	# Copyright (C) 2008 The Android Open Source Project
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the "License");
				6	# you may not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# http://www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an "AS IS" BASIS,
				13	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16
				17	import json
				18	import os
				19	import re
				20	import sys
				21	import traceback
				22
				23	from error import HookError
				24	from git_refs import HEAD
				25
				26	from pyversion import is_python3
				27	if is_python3():
				28	import urllib.parse
				29	else:
				30	import imp
				31	import urlparse
				32	urllib = imp.new_module('urllib')
				33	urllib.parse = urlparse
				34	input = raw_input # noqa: F821
				35
				36	class RepoHook(object):
				37	"""A RepoHook contains information about a script to run as a hook.
				38
				39	Hooks are used to run a python script before running an upload (for instance,
				40	to run presubmit checks). Eventually, we may have hooks for other actions.
				41
				42	This shouldn't be confused with files in the 'repo/hooks' directory. Those
				43	files are copied into each '.git/hooks' folder for each project. Repo-level
				44	hooks are associated instead with repo actions.
				45
				46	Hooks are always python. When a hook is run, we will load the hook into the
				47	interpreter and execute its main() function.
				48	"""
				49
				50	def __init__(self,
				51	hook_type,
				52	hooks_project,
				53	topdir,
				54	manifest_url,
				55	abort_if_user_denies=False):
				56	"""RepoHook constructor.
				57
				58	Params:
				59	hook_type: A string representing the type of hook. This is also used
				60	to figure out the name of the file containing the hook. For
				61	example: 'pre-upload'.
				62	hooks_project: The project containing the repo hooks. If you have a
				63	manifest, this is manifest.repo_hooks_project. OK if this is None,
				64	which will make the hook a no-op.
				65	topdir: Repo's top directory (the one containing the .repo directory).
				66	Scripts will run with CWD as this directory. If you have a manifest,
				67	this is manifest.topdir
				68	manifest_url: The URL to the manifest git repo.
				69	abort_if_user_denies: If True, we'll throw a HookError() if the user
				70	doesn't allow us to run the hook.
				71	"""
				72	self._hook_type = hook_type
				73	self._hooks_project = hooks_project
				74	self._manifest_url = manifest_url
				75	self._topdir = topdir
				76	self._abort_if_user_denies = abort_if_user_denies
				77
				78	# Store the full path to the script for convenience.
				79	if self._hooks_project:
				80	self._script_fullpath = os.path.join(self._hooks_project.worktree,
				81	self._hook_type + '.py')
				82	else:
				83	self._script_fullpath = None
				84
				85	def _GetHash(self):
				86	"""Return a hash of the contents of the hooks directory.
				87
				88	We'll just use git to do this. This hash has the property that if anything
				89	changes in the directory we will return a different has.
				90
				91	SECURITY CONSIDERATION:
				92	This hash only represents the contents of files in the hook directory, not
				93	any other files imported or called by hooks. Changes to imported files
				94	can change the script behavior without affecting the hash.
				95
				96	Returns:
				97	A string representing the hash. This will always be ASCII so that it can
				98	be printed to the user easily.
				99	"""
				100	assert self._hooks_project, "Must have hooks to calculate their hash."
				101
				102	# We will use the work_git object rather than just calling GetRevisionId().
				103	# That gives us a hash of the latest checked in version of the files that
				104	# the user will actually be executing. Specifically, GetRevisionId()
				105	# doesn't appear to change even if a user checks out a different version
				106	# of the hooks repo (via git checkout) nor if a user commits their own revs.
				107	#
				108	# NOTE: Local (non-committed) changes will not be factored into this hash.
				109	# I think this is OK, since we're really only worried about warning the user
				110	# about upstream changes.
				111	return self._hooks_project.work_git.rev_parse('HEAD')
				112
				113	def _GetMustVerb(self):
				114	"""Return 'must' if the hook is required; 'should' if not."""
				115	if self._abort_if_user_denies:
				116	return 'must'
				117	else:
				118	return 'should'
				119
				120	def _CheckForHookApproval(self):
				121	"""Check to see whether this hook has been approved.
				122
				123	We'll accept approval of manifest URLs if they're using secure transports.
				124	This way the user can say they trust the manifest hoster. For insecure
				125	hosts, we fall back to checking the hash of the hooks repo.
				126
				127	Note that we ask permission for each individual hook even though we use
				128	the hash of all hooks when detecting changes. We'd like the user to be
				129	able to approve / deny each hook individually. We only use the hash of all
				130	hooks because there is no other easy way to detect changes to local imports.
				131
				132	Returns:
				133	True if this hook is approved to run; False otherwise.
				134
				135	Raises:
				136	HookError: Raised if the user doesn't approve and abort_if_user_denies
				137	was passed to the consturctor.
				138	"""
				139	if self._ManifestUrlHasSecureScheme():
				140	return self._CheckForHookApprovalManifest()
				141	else:
				142	return self._CheckForHookApprovalHash()
				143
				144	def _CheckForHookApprovalHelper(self, subkey, new_val, main_prompt,
				145	changed_prompt):
				146	"""Check for approval for a particular attribute and hook.
				147
				148	Args:
				149	subkey: The git config key under [repo.hooks.<hook_type>] to store the
				150	last approved string.
				151	new_val: The new value to compare against the last approved one.
				152	main_prompt: Message to display to the user to ask for approval.
				153	changed_prompt: Message explaining why we're re-asking for approval.
				154
				155	Returns:
				156	True if this hook is approved to run; False otherwise.
				157
				158	Raises:
				159	HookError: Raised if the user doesn't approve and abort_if_user_denies
				160	was passed to the consturctor.
				161	"""
				162	hooks_config = self._hooks_project.config
				163	git_approval_key = 'repo.hooks.%s.%s' % (self._hook_type, subkey)
				164
				165	# Get the last value that the user approved for this hook; may be None.
				166	old_val = hooks_config.GetString(git_approval_key)
				167
				168	if old_val is not None:
				169	# User previously approved hook and asked not to be prompted again.
				170	if new_val == old_val:
				171	# Approval matched. We're done.
				172	return True
				173	else:
				174	# Give the user a reason why we're prompting, since they last told
				175	# us to "never ask again".
				176	prompt = 'WARNING: %s\n\n' % (changed_prompt,)
				177	else:
				178	prompt = ''
				179
				180	# Prompt the user if we're not on a tty; on a tty we'll assume "no".
				181	if sys.stdout.isatty():
				182	prompt += main_prompt + ' (yes/always/NO)? '
				183	response = input(prompt).lower()
				184	print()
				185
				186	# User is doing a one-time approval.
				187	if response in ('y', 'yes'):
				188	return True
				189	elif response == 'always':
				190	hooks_config.SetString(git_approval_key, new_val)
				191	return True
				192
				193	# For anything else, we'll assume no approval.
				194	if self._abort_if_user_denies:
				195	raise HookError('You must allow the %s hook or use --no-verify.' %
				196	self._hook_type)
				197
				198	return False
				199
				200	def _ManifestUrlHasSecureScheme(self):
				201	"""Check if the URI for the manifest is a secure transport."""
				202	secure_schemes = ('file', 'https', 'ssh', 'persistent-https', 'sso', 'rpc')
				203	parse_results = urllib.parse.urlparse(self._manifest_url)
				204	return parse_results.scheme in secure_schemes
				205
				206	def _CheckForHookApprovalManifest(self):
				207	"""Check whether the user has approved this manifest host.
				208
				209	Returns:
				210	True if this hook is approved to run; False otherwise.
				211	"""
				212	return self._CheckForHookApprovalHelper(
				213	'approvedmanifest',
				214	self._manifest_url,
				215	'Run hook scripts from %s' % (self._manifest_url,),
				216	'Manifest URL has changed since %s was allowed.' % (self._hook_type,))
				217
				218	def _CheckForHookApprovalHash(self):
				219	"""Check whether the user has approved the hooks repo.
				220
				221	Returns:
				222	True if this hook is approved to run; False otherwise.
				223	"""
				224	prompt = ('Repo %s run the script:\n'
				225	' %s\n'
				226	'\n'
				227	'Do you want to allow this script to run')
				228	return self._CheckForHookApprovalHelper(
				229	'approvedhash',
				230	self._GetHash(),
				231	prompt % (self._GetMustVerb(), self._script_fullpath),
				232	'Scripts have changed since %s was allowed.' % (self._hook_type,))
				233
				234	@staticmethod
				235	def _ExtractInterpFromShebang(data):
				236	"""Extract the interpreter used in the shebang.
				237
				238	Try to locate the interpreter the script is using (ignoring `env`).
				239
				240	Args:
				241	data: The file content of the script.
				242
				243	Returns:
				244	The basename of the main script interpreter, or None if a shebang is not
				245	used or could not be parsed out.
				246	"""
				247	firstline = data.splitlines()[:1]
				248	if not firstline:
				249	return None
				250
				251	# The format here can be tricky.
				252	shebang = firstline[0].strip()
				253	m = re.match(r'^#!\s*([^\s]+)(?:\s+([^\s]+))?', shebang)
				254	if not m:
				255	return None
				256
				257	# If the using `env`, find the target program.
				258	interp = m.group(1)
				259	if os.path.basename(interp) == 'env':
				260	interp = m.group(2)
				261
				262	return interp
				263
				264	def _ExecuteHookViaReexec(self, interp, context, **kwargs):
				265	"""Execute the hook script through \|interp\|.
				266
				267	Note: Support for this feature should be dropped ~Jun 2021.
				268
				269	Args:
				270	interp: The Python program to run.
				271	context: Basic Python context to execute the hook inside.
				272	kwargs: Arbitrary arguments to pass to the hook script.
				273
				274	Raises:
				275	HookError: When the hooks failed for any reason.
				276	"""
				277	# This logic needs to be kept in sync with _ExecuteHookViaImport below.
				278	script = """
				279	import json, os, sys
				280	path = '''%(path)s'''
				281	kwargs = json.loads('''%(kwargs)s''')
				282	context = json.loads('''%(context)s''')
				283	sys.path.insert(0, os.path.dirname(path))
				284	data = open(path).read()
				285	exec(compile(data, path, 'exec'), context)
				286	context['main'](**kwargs)
				287	""" % {
				288	'path': self._script_fullpath,
				289	'kwargs': json.dumps(kwargs),
				290	'context': json.dumps(context),
				291	}
				292
				293	# We pass the script via stdin to avoid OS argv limits. It also makes
				294	# unhandled exception tracebacks less verbose/confusing for users.
				295	cmd = [interp, '-c', 'import sys; exec(sys.stdin.read())']
				296	proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)
				297	proc.communicate(input=script.encode('utf-8'))
				298	if proc.returncode:
				299	raise HookError('Failed to run %s hook.' % (self._hook_type,))
				300
				301	def _ExecuteHookViaImport(self, data, context, **kwargs):
				302	"""Execute the hook code in \|data\| directly.
				303
				304	Args:
				305	data: The code of the hook to execute.
				306	context: Basic Python context to execute the hook inside.
				307	kwargs: Arbitrary arguments to pass to the hook script.
				308
				309	Raises:
				310	HookError: When the hooks failed for any reason.
				311	"""
				312	# Exec, storing global context in the context dict. We catch exceptions
				313	# and convert to a HookError w/ just the failing traceback.
				314	try:
				315	exec(compile(data, self._script_fullpath, 'exec'), context)
				316	except Exception:
				317	raise HookError('%s\nFailed to import %s hook; see traceback above.' %
				318	(traceback.format_exc(), self._hook_type))
				319
				320	# Running the script should have defined a main() function.
				321	if 'main' not in context:
				322	raise HookError('Missing main() in: "%s"' % self._script_fullpath)
				323
				324	# Call the main function in the hook. If the hook should cause the
				325	# build to fail, it will raise an Exception. We'll catch that convert
				326	# to a HookError w/ just the failing traceback.
				327	try:
				328	context['main'](**kwargs)
				329	except Exception:
				330	raise HookError('%s\nFailed to run main() for %s hook; see traceback '
				331	'above.' % (traceback.format_exc(), self._hook_type))
				332
				333	def _ExecuteHook(self, **kwargs):
				334	"""Actually execute the given hook.
				335
				336	This will run the hook's 'main' function in our python interpreter.
				337
				338	Args:
				339	kwargs: Keyword arguments to pass to the hook. These are often specific
				340	to the hook type. For instance, pre-upload hooks will contain
				341	a project_list.
				342	"""
				343	# Keep sys.path and CWD stashed away so that we can always restore them
				344	# upon function exit.
				345	orig_path = os.getcwd()
				346	orig_syspath = sys.path
				347
				348	try:
				349	# Always run hooks with CWD as topdir.
				350	os.chdir(self._topdir)
				351
				352	# Put the hook dir as the first item of sys.path so hooks can do
				353	# relative imports. We want to replace the repo dir as [0] so
				354	# hooks can't import repo files.
				355	sys.path = [os.path.dirname(self._script_fullpath)] + sys.path[1:]
				356
				357	# Initial global context for the hook to run within.
				358	context = {'__file__': self._script_fullpath}
				359
				360	# Add 'hook_should_take_kwargs' to the arguments to be passed to main.
				361	# We don't actually want hooks to define their main with this argument--
				362	# it's there to remind them that their hook should always take **kwargs.
				363	# For instance, a pre-upload hook should be defined like:
				364	# def main(project_list, **kwargs):
				365	#
				366	# This allows us to later expand the API without breaking old hooks.
				367	kwargs = kwargs.copy()
				368	kwargs['hook_should_take_kwargs'] = True
				369
				370	# See what version of python the hook has been written against.
				371	data = open(self._script_fullpath).read()
				372	interp = self._ExtractInterpFromShebang(data)
				373	reexec = False
				374	if interp:
				375	prog = os.path.basename(interp)
				376	if prog.startswith('python2') and sys.version_info.major != 2:
				377	reexec = True
				378	elif prog.startswith('python3') and sys.version_info.major == 2:
				379	reexec = True
				380
				381	# Attempt to execute the hooks through the requested version of Python.
				382	if reexec:
				383	try:
				384	self._ExecuteHookViaReexec(interp, context, **kwargs)
				385	except OSError as e:
				386	if e.errno == errno.ENOENT:
				387	# We couldn't find the interpreter, so fallback to importing.
				388	reexec = False
				389	else:
				390	raise
				391
				392	# Run the hook by importing directly.
				393	if not reexec:
				394	self._ExecuteHookViaImport(data, context, **kwargs)
				395	finally:
				396	# Restore sys.path and CWD.
				397	sys.path = orig_syspath
				398	os.chdir(orig_path)
				399
				400	def Run(self, user_allows_all_hooks, **kwargs):
				401	"""Run the hook.
				402
				403	If the hook doesn't exist (because there is no hooks project or because
				404	this particular hook is not enabled), this is a no-op.
				405
				406	Args:
				407	user_allows_all_hooks: If True, we will never prompt about running the
				408	hook--we'll just assume it's OK to run it.
				409	kwargs: Keyword arguments to pass to the hook. These are often specific
				410	to the hook type. For instance, pre-upload hooks will contain
				411	a project_list.
				412
				413	Raises:
				414	HookError: If there was a problem finding the hook or the user declined
				415	to run a required hook (from _CheckForHookApproval).
				416	"""
				417	# No-op if there is no hooks project or if hook is disabled.
				418	if ((not self._hooks_project) or (self._hook_type not in
				419	self._hooks_project.enabled_repo_hooks)):
				420	return
				421
				422	# Bail with a nice error if we can't find the hook.
				423	if not os.path.isfile(self._script_fullpath):
				424	raise HookError('Couldn\'t find repo hook: "%s"' % self._script_fullpath)
				425
				426	# Make sure the user is OK with running the hook.
				427	if (not user_allows_all_hooks) and (not self._CheckForHookApproval()):
				428	return
				429
				430	# Run the hook with the same version of python we're using.
				431	self._ExecuteHook(**kwargs)